diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 2 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-bufio.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 101 | ||||
-rw-r--r-- | drivers/md/dm-flakey.c | 5 | ||||
-rw-r--r-- | drivers/md/dm-init.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-integrity.c | 7 | ||||
-rw-r--r-- | drivers/md/dm-kcopyd.c | 34 | ||||
-rw-r--r-- | drivers/md/dm-linear.c | 5 | ||||
-rw-r--r-- | drivers/md/dm-log-writes.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-rq.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 196 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 24 | ||||
-rw-r--r-- | drivers/md/dm-thin-metadata.c | 7 | ||||
-rw-r--r-- | drivers/md/dm-zoned-metadata.c | 40 | ||||
-rw-r--r-- | drivers/md/dm-zoned.h | 28 | ||||
-rw-r--r-- | drivers/md/dm.c | 11 | ||||
-rw-r--r-- | drivers/md/dm.h | 5 | ||||
-rw-r--r-- | drivers/md/md.c | 3 |
20 files changed, 381 insertions, 104 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 5ccac0b77f17..3834332f4963 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -453,7 +453,7 @@ config DM_INIT Enable "dm-mod.create=" parameter to create mapped devices at init time. This option is useful to allow mounting rootfs without requiring an initramfs. - See Documentation/device-mapper/dm-init.rst for dm-mod.create="..." + See Documentation/admin-guide/device-mapper/dm-init.rst for dm-mod.create="..." format. If unsure, say N. diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 26e374fbf57c..20ed838e9413 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -931,6 +931,9 @@ int bch_cached_dev_run(struct cached_dev *dc) if (dc->io_disable) { pr_err("I/O disabled on cached dev %s", dc->backing_dev_name); + kfree(env[1]); + kfree(env[2]); + kfree(buf); return -EIO; } diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 2a48ea3f1b30..b6b5acc92ca2 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1599,9 +1599,7 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) unsigned long freed; c = container_of(shrink, struct dm_bufio_client, shrinker); - if (sc->gfp_mask & __GFP_FS) - dm_bufio_lock(c); - else if (!dm_bufio_trylock(c)) + if (!dm_bufio_trylock(c)) return SHRINK_STOP; freed = __scan(c, sc->nr_to_scan, sc->gfp_mask); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 1b16d34bb785..d5216bcc4649 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -120,6 +120,10 @@ struct iv_tcw_private { u8 *whitening; }; +struct iv_eboiv_private { + struct crypto_cipher *tfm; +}; + /* * Crypt: maps a linear range of a block device * and encrypts / decrypts at the same time. @@ -159,6 +163,7 @@ struct crypt_config { struct iv_benbi_private benbi; struct iv_lmk_private lmk; struct iv_tcw_private tcw; + struct iv_eboiv_private eboiv; } iv_gen_private; u64 iv_offset; unsigned int iv_size; @@ -291,8 +296,9 @@ static struct crypto_aead *any_tfm_aead(struct crypt_config *cc) * Note that this encryption scheme is vulnerable to watermarking attacks * and should be used for old compatible containers access only. * - * plumb: unimplemented, see: - * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454 + * eboiv: Encrypted byte-offset IV (used in Bitlocker in CBC mode) + * The IV is encrypted little-endian byte-offset (with the same key + * and cipher as the volume). */ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, @@ -841,6 +847,67 @@ static int crypt_iv_random_gen(struct crypt_config *cc, u8 *iv, return 0; } +static void crypt_iv_eboiv_dtr(struct crypt_config *cc) +{ + struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv; + + crypto_free_cipher(eboiv->tfm); + eboiv->tfm = NULL; +} + +static int crypt_iv_eboiv_ctr(struct crypt_config *cc, struct dm_target *ti, + const char *opts) +{ + struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv; + struct crypto_cipher *tfm; + + tfm = crypto_alloc_cipher(cc->cipher, 0, 0); + if (IS_ERR(tfm)) { + ti->error = "Error allocating crypto tfm for EBOIV"; + return PTR_ERR(tfm); + } + + if (crypto_cipher_blocksize(tfm) != cc->iv_size) { + ti->error = "Block size of EBOIV cipher does " + "not match IV size of block cipher"; + crypto_free_cipher(tfm); + return -EINVAL; + } + + eboiv->tfm = tfm; + return 0; +} + +static int crypt_iv_eboiv_init(struct crypt_config *cc) +{ + struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv; + int err; + + err = crypto_cipher_setkey(eboiv->tfm, cc->key, cc->key_size); + if (err) + return err; + + return 0; +} + +static int crypt_iv_eboiv_wipe(struct crypt_config *cc) +{ + /* Called after cc->key is set to random key in crypt_wipe() */ + return crypt_iv_eboiv_init(cc); +} + +static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv, + struct dm_crypt_request *dmreq) +{ + struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv; + + memset(iv, 0, cc->iv_size); + *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector * cc->sector_size); + crypto_cipher_encrypt_one(eboiv->tfm, iv, iv); + + return 0; +} + static const struct crypt_iv_operations crypt_iv_plain_ops = { .generator = crypt_iv_plain_gen }; @@ -893,6 +960,14 @@ static struct crypt_iv_operations crypt_iv_random_ops = { .generator = crypt_iv_random_gen }; +static struct crypt_iv_operations crypt_iv_eboiv_ops = { + .ctr = crypt_iv_eboiv_ctr, + .dtr = crypt_iv_eboiv_dtr, + .init = crypt_iv_eboiv_init, + .wipe = crypt_iv_eboiv_wipe, + .generator = crypt_iv_eboiv_gen +}; + /* * Integrity extensions */ @@ -2158,6 +2233,14 @@ static int crypt_wipe_key(struct crypt_config *cc) clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); get_random_bytes(&cc->key, cc->key_size); + + /* Wipe IV private keys */ + if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) { + r = cc->iv_gen_ops->wipe(cc); + if (r) + return r; + } + kzfree(cc->key_string); cc->key_string = NULL; r = crypt_setkey(cc); @@ -2288,6 +2371,8 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode) cc->iv_gen_ops = &crypt_iv_benbi_ops; else if (strcmp(ivmode, "null") == 0) cc->iv_gen_ops = &crypt_iv_null_ops; + else if (strcmp(ivmode, "eboiv") == 0) + cc->iv_gen_ops = &crypt_iv_eboiv_ops; else if (strcmp(ivmode, "lmk") == 0) { cc->iv_gen_ops = &crypt_iv_lmk_ops; /* @@ -2699,7 +2784,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -EINVAL; } - cc = kzalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL); + cc = kzalloc(struct_size(cc, key, key_size), GFP_KERNEL); if (!cc) { ti->error = "Cannot allocate encryption context"; return -ENOMEM; @@ -3050,14 +3135,8 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv, memset(cc->key, 0, cc->key_size * sizeof(u8)); return ret; } - if (argc == 2 && !strcasecmp(argv[1], "wipe")) { - if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) { - ret = cc->iv_gen_ops->wipe(cc); - if (ret) - return ret; - } + if (argc == 2 && !strcasecmp(argv[1], "wipe")) return crypt_wipe_key(cc); - } } error: @@ -3094,7 +3173,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 18, 1}, + .version = {1, 19, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index a9bc518156f2..2900fbde89b3 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -461,15 +461,14 @@ static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev #ifdef CONFIG_BLK_DEV_ZONED static int flakey_report_zones(struct dm_target *ti, sector_t sector, - struct blk_zone *zones, unsigned int *nr_zones, - gfp_t gfp_mask) + struct blk_zone *zones, unsigned int *nr_zones) { struct flakey_c *fc = ti->private; int ret; /* Do report and remap it */ ret = blkdev_report_zones(fc->dev->bdev, flakey_map_sector(ti, sector), - zones, nr_zones, gfp_mask); + zones, nr_zones); if (ret != 0) return ret; diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c index b65faef2c4b5..b869316d3722 100644 --- a/drivers/md/dm-init.c +++ b/drivers/md/dm-init.c @@ -25,7 +25,7 @@ static char *create; * Format: dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+] * Table format: <start_sector> <num_sectors> <target_type> <target_args> * - * See Documentation/device-mapper/dm-init.rst for dm-mod.create="..." format + * See Documentation/admin-guide/device-mapper/dm-init.rst for dm-mod.create="..." format * details. */ diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 44e76cda087a..b1b0de402dfc 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -476,6 +476,9 @@ static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags) io_loc.sector = ic->start; io_loc.count = SB_SECTORS; + if (op == REQ_OP_WRITE) + sb_set_version(ic); + return dm_io(&io_req, 1, &io_loc, NULL); } @@ -2317,7 +2320,6 @@ static void recalc_write_super(struct dm_integrity_c *ic) if (dm_integrity_failed(ic)) return; - sb_set_version(ic); r = sync_rw_sb(ic, REQ_OP_WRITE, 0); if (unlikely(r)) dm_integrity_io_error(ic, "writing superblock", r); @@ -3358,7 +3360,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error) goto bad; } - crypt_iv = kmalloc(ivsize, GFP_KERNEL); + crypt_iv = kzalloc(ivsize, GFP_KERNEL); if (!crypt_iv) { *error = "Could not allocate iv"; r = -ENOMEM; @@ -3387,7 +3389,6 @@ static int create_journal(struct dm_integrity_c *ic, char **error) sg_set_buf(&sg[i], va, PAGE_SIZE); } sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids); - memset(crypt_iv, 0x00, ivsize); skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv); diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 671c24332802..df2011de7be2 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -28,10 +28,27 @@ #include "dm-core.h" -#define SUB_JOB_SIZE 128 #define SPLIT_COUNT 8 #define MIN_JOBS 8 -#define RESERVE_PAGES (DIV_ROUND_UP(SUB_JOB_SIZE << SECTOR_SHIFT, PAGE_SIZE)) + +#define DEFAULT_SUB_JOB_SIZE_KB 512 +#define MAX_SUB_JOB_SIZE_KB 1024 + +static unsigned kcopyd_subjob_size_kb = DEFAULT_SUB_JOB_SIZE_KB; + +module_param(kcopyd_subjob_size_kb, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(kcopyd_subjob_size_kb, "Sub-job size for dm-kcopyd clients"); + +static unsigned dm_get_kcopyd_subjob_size(void) +{ + unsigned sub_job_size_kb; + + sub_job_size_kb = __dm_get_module_param(&kcopyd_subjob_size_kb, + DEFAULT_SUB_JOB_SIZE_KB, + MAX_SUB_JOB_SIZE_KB); + + return sub_job_size_kb << 1; +} /*----------------------------------------------------------------- * Each kcopyd client has its own little pool of preallocated @@ -41,6 +58,7 @@ struct dm_kcopyd_client { struct page_list *pages; unsigned nr_reserved_pages; unsigned nr_free_pages; + unsigned sub_job_size; struct dm_io_client *io_client; @@ -693,8 +711,8 @@ static void segment_complete(int read_err, unsigned long write_err, progress = job->progress; count = job->source.count - progress; if (count) { - if (count > SUB_JOB_SIZE) - count = SUB_JOB_SIZE; + if (count > kc->sub_job_size) + count = kc->sub_job_size; job->progress += count; } @@ -821,7 +839,7 @@ void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->master_job = job; job->write_offset = 0; - if (job->source.count <= SUB_JOB_SIZE) + if (job->source.count <= kc->sub_job_size) dispatch_job(job); else { job->progress = 0; @@ -888,6 +906,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle) { int r; + unsigned reserve_pages; struct dm_kcopyd_client *kc; kc = kzalloc(sizeof(*kc), GFP_KERNEL); @@ -912,9 +931,12 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro goto bad_workqueue; } + kc->sub_job_size = dm_get_kcopyd_subjob_size(); + reserve_pages = DIV_ROUND_UP(kc->sub_job_size << SECTOR_SHIFT, PAGE_SIZE); + kc->pages = NULL; kc->nr_reserved_pages = kc->nr_free_pages = 0; - r = client_reserve_pages(kc, RESERVE_PAGES); + r = client_reserve_pages(kc, reserve_pages); if (r) goto bad_client_pages; diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index ad980a38fb1e..ecefe6703736 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -137,15 +137,14 @@ static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev #ifdef CONFIG_BLK_DEV_ZONED static int linear_report_zones(struct dm_target *ti, sector_t sector, - struct blk_zone *zones, unsigned int *nr_zones, - gfp_t gfp_mask) + struct blk_zone *zones, unsigned int *nr_zones) { struct linear_c *lc = (struct linear_c *) ti->private; int ret; /* Do report and remap it */ ret = blkdev_report_zones(lc->dev->bdev, linear_map_sector(ti, sector), - zones, nr_zones, gfp_mask); + zones, nr_zones); if (ret != 0) return ret; diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index e549392e0ea5..99721c76225d 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -40,7 +40,7 @@ * * Would result in the log looking like this: * - * c,a,flush,fuad,b,<other writes>,<next flush> + * c,a,b,flush,fuad,<other writes>,<next flush> * * This is meant to help expose problems where file systems do not properly wait * on data being written before invoking a FLUSH. FUA bypasses cache so once it @@ -699,7 +699,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) if (discard_bio) alloc_size = sizeof(struct pending_block); else - alloc_size = sizeof(struct pending_block) + sizeof(struct bio_vec) * bio_segments(bio); + alloc_size = struct_size(block, vecs, bio_segments(bio)); block = kzalloc(alloc_size, GFP_NOIO); if (!block) { diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 7a87a640f8ba..8a60a4a070ac 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3558,7 +3558,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, * v1.5.0+: * * Sync action: - * See Documentation/device-mapper/dm-raid.rst for + * See Documentation/admin-guide/device-mapper/dm-raid.rst for * information on each of these states. */ DMEMIT(" %s", sync_action); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 5f7063f05ae0..c9e44ac1f9a6 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -115,7 +115,7 @@ static void end_clone_bio(struct bio *clone) /* * Update the original request. - * Do not use blk_end_request() here, because it may complete + * Do not use blk_mq_end_request() here, because it may complete * the original request before the clone, and break the ordering. */ if (is_last) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 3107f2b1988b..f150f5c5492b 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1,6 +1,4 @@ /* - * dm-snapshot.c - * * Copyright (C) 2001-2002 Sistina Software (UK) Limited. * * This file is released under the GPL. @@ -134,7 +132,10 @@ struct dm_snapshot { * - I/O error while merging * => stop merging; set merge_failed; process I/O normally. */ - int merge_failed; + bool merge_failed:1; + + bool discard_zeroes_cow:1; + bool discard_passdown_origin:1; /* * Incoming bios that overlap with chunks being merged must wait @@ -1173,12 +1174,64 @@ static void stop_merge(struct dm_snapshot *s) clear_bit(SHUTDOWN_MERGE, &s->state_bits); } +static int parse_snapshot_features(struct dm_arg_set *as, struct dm_snapshot *s, + struct dm_target *ti) +{ + int r; + unsigned argc; + const char *arg_name; + + static const struct dm_arg _args[] = { + {0, 2, "Invalid number of feature arguments"}, + }; + + /* + * No feature arguments supplied. + */ + if (!as->argc) + return 0; + + r = dm_read_arg_group(_args, as, &argc, &ti->error); + if (r) + return -EINVAL; + + while (argc && !r) { + arg_name = dm_shift_arg(as); + argc--; + + if (!strcasecmp(arg_name, "discard_zeroes_cow")) + s->discard_zeroes_cow = true; + + else if (!strcasecmp(arg_name, "discard_passdown_origin")) + s->discard_passdown_origin = true; + + else { + ti->error = "Unrecognised feature requested"; + r = -EINVAL; + break; + } + } + + if (!s->discard_zeroes_cow && s->discard_passdown_origin) { + /* + * TODO: really these are disjoint.. but ti->num_discard_bios + * and dm_bio_get_target_bio_nr() require rigid constraints. + */ + ti->error = "discard_passdown_origin feature depends on discard_zeroes_cow"; + r = -EINVAL; + } + + return r; +} + /* - * Construct a snapshot mapping: <origin_dev> <COW-dev> <p|po|n> <chunk-size> + * Construct a snapshot mapping: + * <origin_dev> <COW-dev> <p|po|n> <chunk-size> [<# feature args> [<arg>]*] */ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct dm_snapshot *s; + struct dm_arg_set as; int i; int r = -EINVAL; char *origin_path, *cow_path; @@ -1186,8 +1239,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) unsigned args_used, num_flush_bios = 1; fmode_t origin_mode = FMODE_READ; - if (argc != 4) { - ti->error = "requires exactly 4 arguments"; + if (argc < 4) { + ti->error = "requires 4 or more arguments"; r = -EINVAL; goto bad; } @@ -1204,6 +1257,13 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } + as.argc = argc; + as.argv = argv; + dm_consume_args(&as, 4); + r = parse_snapshot_features(&as, s, ti); + if (r) + goto bad_features; + origin_path = argv[0]; argv++; argc--; @@ -1289,6 +1349,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->private = s; ti->num_flush_bios = num_flush_bios; + if (s->discard_zeroes_cow) + ti->num_discard_bios = (s->discard_passdown_origin ? 2 : 1); ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk); /* Add snapshot to the list of snapshots for this origin */ @@ -1336,29 +1398,22 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) bad_read_metadata: unregister_snapshot(s); - bad_load_and_register: mempool_exit(&s->pending_pool); - bad_pending_pool: dm_kcopyd_client_destroy(s->kcopyd_client); - bad_kcopyd: dm_exception_table_exit(&s->pending, pending_cache); dm_exception_table_exit(&s->complete, exception_cache); - bad_hash_tables: dm_exception_store_destroy(s->store); - bad_store: dm_put_device(ti, s->cow); - bad_cow: dm_put_device(ti, s->origin); - bad_origin: +bad_features: kfree(s); - bad: return r; } @@ -1806,6 +1861,37 @@ static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, (bio->bi_iter.bi_sector & s->store->chunk_mask); } +static void zero_callback(int read_err, unsigned long write_err, void *context) +{ + struct bio *bio = context; + struct dm_snapshot *s = bio->bi_private; + + up(&s->cow_count); + bio->bi_status = write_err ? BLK_STS_IOERR : 0; + bio_endio(bio); +} + +static void zero_exception(struct dm_snapshot *s, struct dm_exception *e, + struct bio *bio, chunk_t chunk) +{ + struct dm_io_region dest; + + dest.bdev = s->cow->bdev; + dest.sector = bio->bi_iter.bi_sector; + dest.count = s->store->chunk_size; + + down(&s->cow_count); + WARN_ON_ONCE(bio->bi_private); + bio->bi_private = s; + dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio); +} + +static bool io_overlaps_chunk(struct dm_snapshot *s, struct bio *bio) +{ + return bio->bi_iter.bi_size == + (s->store->chunk_size << SECTOR_SHIFT); +} + static int snapshot_map(struct dm_target *ti, struct bio *bio) { struct dm_exception *e; @@ -1839,10 +1925,43 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) goto out_unlock; } + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { + if (s->discard_passdown_origin && dm_bio_get_target_bio_nr(bio)) { + /* + * passdown discard to origin (without triggering + * snapshot exceptions via do_origin; doing so would + * defeat the goal of freeing space in origin that is + * implied by the "discard_passdown_origin" feature) + */ + bio_set_dev(bio, s->origin->bdev); + track_chunk(s, bio, chunk); + goto out_unlock; + } + /* discard to snapshot (target_bio_nr == 0) zeroes exceptions */ + } + /* If the block is already remapped - use that, else remap it */ e = dm_lookup_exception(&s->complete, chunk); if (e) { remap_exception(s, e, bio, chunk); + if (unlikely(bio_op(bio) == REQ_OP_DISCARD) && + io_overlaps_chunk(s, bio)) { + dm_exception_table_unlock(&lock); + up_read(&s->lock); + zero_exception(s, e, bio, chunk); + r = DM_MAPIO_SUBMITTED; /* discard is not issued */ + goto out; + } + goto out_unlock; + } + + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { + /* + * If no exception exists, complete discard immediately + * otherwise it'll trigger copy-out. + */ + bio_endio(bio); + r = DM_MAPIO_SUBMITTED; goto out_unlock; } @@ -1890,9 +2009,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) r = DM_MAPIO_SUBMITTED; - if (!pe->started && - bio->bi_iter.bi_size == - (s->store->chunk_size << SECTOR_SHIFT)) { + if (!pe->started && io_overlaps_chunk(s, bio)) { pe->started = 1; dm_exception_table_unlock(&lock); @@ -1955,6 +2072,12 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { + /* Once merging, discards no longer effect change */ + bio_endio(bio); + return DM_MAPIO_SUBMITTED; + } + chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector); down_write(&s->lock); @@ -2138,6 +2261,7 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, { unsigned sz = 0; struct dm_snapshot *snap = ti->private; + unsigned num_features; switch (type) { case STATUSTYPE_INFO: @@ -2178,8 +2302,16 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, * make sense. */ DMEMIT("%s %s", snap->origin->name, snap->cow->name); - snap->store->type->status(snap->store, type, result + sz, - maxlen - sz); + sz += snap->store->type->status(snap->store, type, result + sz, + maxlen - sz); + num_features = snap->discard_zeroes_cow + snap->discard_passdown_origin; + if (num_features) { + DMEMIT(" %u", num_features); + if (snap->discard_zeroes_cow) + DMEMIT(" discard_zeroes_cow"); + if (snap->discard_passdown_origin) + DMEMIT(" discard_passdown_origin"); + } break; } } @@ -2198,6 +2330,26 @@ static int snapshot_iterate_devices(struct dm_target *ti, return r; } +static void snapshot_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + struct dm_snapshot *snap = ti->private; + + if (snap->discard_zeroes_cow) { + struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; + + down_read(&_origins_lock); + + (void) __find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, NULL); + if (snap_src && snap_dest) + snap = snap_src; + + /* All discards are split on chunk_size boundary */ + limits->discard_granularity = snap->store->chunk_size; + limits->max_discard_sectors = snap->store->chunk_size; + + up_read(&_origins_lock); + } +} /*----------------------------------------------------------------- * Origin methods @@ -2522,7 +2674,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 15, 0}, + .version = {1, 16, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2532,11 +2684,12 @@ static struct target_type snapshot_target = { .resume = snapshot_resume, .status = snapshot_status, .iterate_devices = snapshot_iterate_devices, + .io_hints = snapshot_io_hints, }; static struct target_type merge_target = { .name = dm_snapshot_merge_target_name, - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2547,6 +2700,7 @@ static struct target_type merge_target = { .resume = snapshot_merge_resume, .status = snapshot_status, .iterate_devices = snapshot_iterate_devices, + .io_hints = snapshot_io_hints, }; static int __init dm_snapshot_init(void) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index ec8b27e20de3..caaee8032afe 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -881,7 +881,7 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type) EXPORT_SYMBOL_GPL(dm_table_set_type); /* validate the dax capability of the target device span */ -static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, +int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { int blocksize = *(int *) data; @@ -890,7 +890,15 @@ static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, start, len); } -bool dm_table_supports_dax(struct dm_table *t, int blocksize) +/* Check devices support synchronous DAX */ +static int device_synchronous(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + return dax_synchronous(dev->dax_dev); +} + +bool dm_table_supports_dax(struct dm_table *t, + iterate_devices_callout_fn iterate_fn, int *blocksize) { struct dm_target *ti; unsigned i; @@ -903,8 +911,7 @@ bool dm_table_supports_dax(struct dm_table *t, int blocksize) return false; if (!ti->type->iterate_devices || - !ti->type->iterate_devices(ti, device_supports_dax, - &blocksize)) + !ti->type->iterate_devices(ti, iterate_fn, blocksize)) return false; } @@ -940,6 +947,7 @@ static int dm_table_determine_type(struct dm_table *t) struct dm_target *tgt; struct list_head *devices = dm_table_get_devices(t); enum dm_queue_mode live_md_type = dm_get_md_type(t->md); + int page_size = PAGE_SIZE; if (t->type != DM_TYPE_NONE) { /* target already set the table's type */ @@ -984,7 +992,7 @@ static int dm_table_determine_type(struct dm_table *t) verify_bio_based: /* We must use this table as bio-based */ t->type = DM_TYPE_BIO_BASED; - if (dm_table_supports_dax(t, PAGE_SIZE) || + if (dm_table_supports_dax(t, device_supports_dax, &page_size) || (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) { t->type = DM_TYPE_DAX_BIO_BASED; } else { @@ -1883,6 +1891,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { bool wc = false, fua = false; + int page_size = PAGE_SIZE; /* * Copy table's limits to the DM device's request_queue @@ -1910,8 +1919,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } blk_queue_write_cache(q, wc, fua); - if (dm_table_supports_dax(t, PAGE_SIZE)) + if (dm_table_supports_dax(t, device_supports_dax, &page_size)) { blk_queue_flag_set(QUEUE_FLAG_DAX, q); + if (dm_table_supports_dax(t, device_synchronous, NULL)) + set_dax_synchronous(t->md->dax_dev); + } else blk_queue_flag_clear(QUEUE_FLAG_DAX, q); diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 7f0840601737..4c68a7b93d5e 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -2046,16 +2046,19 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; struct dm_block *sblock; struct thin_disk_superblock *disk_super; pmd_write_lock(pmd); + if (pmd->fail_io) + goto out; + pmd->flags |= THIN_METADATA_NEEDS_CHECK_FLAG; r = superblock_lock(pmd, &sblock); if (r) { - DMERR("couldn't read superblock"); + DMERR("couldn't lock superblock"); goto out; } diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index d8334cd45d7c..8545dcee9fd0 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <linux/crc32.h> +#include <linux/sched/mm.h> #define DM_MSG_PREFIX "zoned metadata" @@ -1162,8 +1163,7 @@ static int dmz_init_zones(struct dmz_metadata *zmd) while (sector < dev->capacity) { /* Get zone information */ nr_blkz = DMZ_REPORT_NR_ZONES; - ret = blkdev_report_zones(dev->bdev, sector, blkz, - &nr_blkz, GFP_KERNEL); + ret = blkdev_report_zones(dev->bdev, sector, blkz, &nr_blkz); if (ret) { dmz_dev_err(dev, "Report zones failed %d", ret); goto out; @@ -1201,12 +1201,20 @@ out: static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) { unsigned int nr_blkz = 1; + unsigned int noio_flag; struct blk_zone blkz; int ret; - /* Get zone information from disk */ + /* + * Get zone information from disk. Since blkdev_report_zones() uses + * GFP_KERNEL by default for memory allocations, set the per-task + * PF_MEMALLOC_NOIO flag so that all allocations are done as if + * GFP_NOIO was specified. + */ + noio_flag = memalloc_noio_save(); ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone), - &blkz, &nr_blkz, GFP_NOIO); + &blkz, &nr_blkz); + memalloc_noio_restore(noio_flag); if (!nr_blkz) ret = -EIO; if (ret) { @@ -1594,30 +1602,6 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd) } /* - * Activate a zone (increment its reference count). - */ -void dmz_activate_zone(struct dm_zone *zone) -{ - set_bit(DMZ_ACTIVE, &zone->flags); - atomic_inc(&zone->refcount); -} - -/* - * Deactivate a zone. This decrement the zone reference counter - * and clears the active state of the zone once the count reaches 0, - * indicating that all BIOs to the zone have completed. Returns - * true if the zone was deactivated. - */ -void dmz_deactivate_zone(struct dm_zone *zone) -{ - if (atomic_dec_and_test(&zone->refcount)) { - WARN_ON(!test_bit(DMZ_ACTIVE, &zone->flags)); - clear_bit_unlock(DMZ_ACTIVE, &zone->flags); - smp_mb__after_atomic(); - } -} - -/* * Get the zone mapping a chunk, if the chunk is mapped already. * If no mapping exist and the operation is WRITE, a zone is * allocated and used to map the chunk. diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h index 12419f0bfe78..ed8de49c9a08 100644 --- a/drivers/md/dm-zoned.h +++ b/drivers/md/dm-zoned.h @@ -115,7 +115,6 @@ enum { DMZ_BUF, /* Zone internal state */ - DMZ_ACTIVE, DMZ_RECLAIM, DMZ_SEQ_WRITE_ERR, }; @@ -128,7 +127,6 @@ enum { #define dmz_is_empty(z) ((z)->wp_block == 0) #define dmz_is_offline(z) test_bit(DMZ_OFFLINE, &(z)->flags) #define dmz_is_readonly(z) test_bit(DMZ_READ_ONLY, &(z)->flags) -#define dmz_is_active(z) test_bit(DMZ_ACTIVE, &(z)->flags) #define dmz_in_reclaim(z) test_bit(DMZ_RECLAIM, &(z)->flags) #define dmz_seq_write_err(z) test_bit(DMZ_SEQ_WRITE_ERR, &(z)->flags) @@ -188,8 +186,30 @@ void dmz_unmap_zone(struct dmz_metadata *zmd, struct dm_zone *zone); unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd); unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd); -void dmz_activate_zone(struct dm_zone *zone); -void dmz_deactivate_zone(struct dm_zone *zone); +/* + * Activate a zone (increment its reference count). + */ +static inline void dmz_activate_zone(struct dm_zone *zone) +{ + atomic_inc(&zone->refcount); +} + +/* + * Deactivate a zone. This decrement the zone reference counter + * indicating that all BIOs to the zone have completed when the count is 0. + */ +static inline void dmz_deactivate_zone(struct dm_zone *zone) +{ + atomic_dec(&zone->refcount); +} + +/* + * Test if a zone is active, that is, has a refcount > 0. + */ +static inline bool dmz_is_active(struct dm_zone *zone) +{ + return atomic_read(&zone->refcount); +} int dmz_lock_zone_reclaim(struct dm_zone *zone); void dmz_unlock_zone_reclaim(struct dm_zone *zone); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5475081dcbd6..d0beef033e2f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -441,8 +441,7 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) } static int dm_blk_report_zones(struct gendisk *disk, sector_t sector, - struct blk_zone *zones, unsigned int *nr_zones, - gfp_t gfp_mask) + struct blk_zone *zones, unsigned int *nr_zones) { #ifdef CONFIG_BLK_DEV_ZONED struct mapped_device *md = disk->private_data; @@ -480,8 +479,7 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector, * So there is no need to loop here trying to fill the entire array * of zones. */ - ret = tgt->type->report_zones(tgt, sector, zones, - nr_zones, gfp_mask); + ret = tgt->type->report_zones(tgt, sector, zones, nr_zones); out: dm_put_live_table(md, srcu_idx); @@ -1119,7 +1117,7 @@ static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bd if (!map) return false; - ret = dm_table_supports_dax(map, blocksize); + ret = dm_table_supports_dax(map, device_supports_dax, &blocksize); dm_put_live_table(md, srcu_idx); @@ -1991,7 +1989,8 @@ static struct mapped_device *alloc_dev(int minor) sprintf(md->disk->disk_name, "dm-%d", minor); if (IS_ENABLED(CONFIG_DAX_DRIVER)) { - md->dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops); + md->dax_dev = alloc_dax(md, md->disk->disk_name, + &dm_dax_ops, 0); if (!md->dax_dev) goto bad; } diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 17e3db54404c..0475673337f3 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -72,7 +72,10 @@ bool dm_table_bio_based(struct dm_table *t); bool dm_table_request_based(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); -bool dm_table_supports_dax(struct dm_table *t, int blocksize); +bool dm_table_supports_dax(struct dm_table *t, iterate_devices_callout_fn fn, + int *blocksize); +int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data); void dm_lock_md_type(struct mapped_device *md); void dm_unlock_md_type(struct mapped_device *md); diff --git a/drivers/md/md.c b/drivers/md/md.c index a114b05e3db4..24638ccedce4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5316,7 +5316,8 @@ int mddev_init_writes_pending(struct mddev *mddev) { if (mddev->writes_pending.percpu_count_ptr) return 0; - if (percpu_ref_init(&mddev->writes_pending, no_op, 0, GFP_KERNEL) < 0) + if (percpu_ref_init(&mddev->writes_pending, no_op, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0) return -ENOMEM; /* We want to start with the refcount at zero */ percpu_ref_put(&mddev->writes_pending); |