summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig2
-rw-r--r--drivers/md/bcache/super.c3
-rw-r--r--drivers/md/dm-bufio.c4
-rw-r--r--drivers/md/dm-crypt.c101
-rw-r--r--drivers/md/dm-flakey.c5
-rw-r--r--drivers/md/dm-init.c2
-rw-r--r--drivers/md/dm-integrity.c7
-rw-r--r--drivers/md/dm-kcopyd.c34
-rw-r--r--drivers/md/dm-linear.c5
-rw-r--r--drivers/md/dm-log-writes.c4
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm-rq.c2
-rw-r--r--drivers/md/dm-snap.c196
-rw-r--r--drivers/md/dm-table.c24
-rw-r--r--drivers/md/dm-thin-metadata.c7
-rw-r--r--drivers/md/dm-zoned-metadata.c40
-rw-r--r--drivers/md/dm-zoned.h28
-rw-r--r--drivers/md/dm.c11
-rw-r--r--drivers/md/dm.h5
-rw-r--r--drivers/md/md.c3
20 files changed, 381 insertions, 104 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 5ccac0b77f17..3834332f4963 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -453,7 +453,7 @@ config DM_INIT
Enable "dm-mod.create=" parameter to create mapped devices at init time.
This option is useful to allow mounting rootfs without requiring an
initramfs.
- See Documentation/device-mapper/dm-init.rst for dm-mod.create="..."
+ See Documentation/admin-guide/device-mapper/dm-init.rst for dm-mod.create="..."
format.
If unsure, say N.
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 26e374fbf57c..20ed838e9413 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -931,6 +931,9 @@ int bch_cached_dev_run(struct cached_dev *dc)
if (dc->io_disable) {
pr_err("I/O disabled on cached dev %s",
dc->backing_dev_name);
+ kfree(env[1]);
+ kfree(env[2]);
+ kfree(buf);
return -EIO;
}
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 2a48ea3f1b30..b6b5acc92ca2 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1599,9 +1599,7 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
unsigned long freed;
c = container_of(shrink, struct dm_bufio_client, shrinker);
- if (sc->gfp_mask & __GFP_FS)
- dm_bufio_lock(c);
- else if (!dm_bufio_trylock(c))
+ if (!dm_bufio_trylock(c))
return SHRINK_STOP;
freed = __scan(c, sc->nr_to_scan, sc->gfp_mask);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 1b16d34bb785..d5216bcc4649 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -120,6 +120,10 @@ struct iv_tcw_private {
u8 *whitening;
};
+struct iv_eboiv_private {
+ struct crypto_cipher *tfm;
+};
+
/*
* Crypt: maps a linear range of a block device
* and encrypts / decrypts at the same time.
@@ -159,6 +163,7 @@ struct crypt_config {
struct iv_benbi_private benbi;
struct iv_lmk_private lmk;
struct iv_tcw_private tcw;
+ struct iv_eboiv_private eboiv;
} iv_gen_private;
u64 iv_offset;
unsigned int iv_size;
@@ -291,8 +296,9 @@ static struct crypto_aead *any_tfm_aead(struct crypt_config *cc)
* Note that this encryption scheme is vulnerable to watermarking attacks
* and should be used for old compatible containers access only.
*
- * plumb: unimplemented, see:
- * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454
+ * eboiv: Encrypted byte-offset IV (used in Bitlocker in CBC mode)
+ * The IV is encrypted little-endian byte-offset (with the same key
+ * and cipher as the volume).
*/
static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv,
@@ -841,6 +847,67 @@ static int crypt_iv_random_gen(struct crypt_config *cc, u8 *iv,
return 0;
}
+static void crypt_iv_eboiv_dtr(struct crypt_config *cc)
+{
+ struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv;
+
+ crypto_free_cipher(eboiv->tfm);
+ eboiv->tfm = NULL;
+}
+
+static int crypt_iv_eboiv_ctr(struct crypt_config *cc, struct dm_target *ti,
+ const char *opts)
+{
+ struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv;
+ struct crypto_cipher *tfm;
+
+ tfm = crypto_alloc_cipher(cc->cipher, 0, 0);
+ if (IS_ERR(tfm)) {
+ ti->error = "Error allocating crypto tfm for EBOIV";
+ return PTR_ERR(tfm);
+ }
+
+ if (crypto_cipher_blocksize(tfm) != cc->iv_size) {
+ ti->error = "Block size of EBOIV cipher does "
+ "not match IV size of block cipher";
+ crypto_free_cipher(tfm);
+ return -EINVAL;
+ }
+
+ eboiv->tfm = tfm;
+ return 0;
+}
+
+static int crypt_iv_eboiv_init(struct crypt_config *cc)
+{
+ struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv;
+ int err;
+
+ err = crypto_cipher_setkey(eboiv->tfm, cc->key, cc->key_size);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int crypt_iv_eboiv_wipe(struct crypt_config *cc)
+{
+ /* Called after cc->key is set to random key in crypt_wipe() */
+ return crypt_iv_eboiv_init(cc);
+}
+
+static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv,
+ struct dm_crypt_request *dmreq)
+{
+ struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv;
+
+ memset(iv, 0, cc->iv_size);
+ *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector * cc->sector_size);
+ crypto_cipher_encrypt_one(eboiv->tfm, iv, iv);
+
+ return 0;
+}
+
static const struct crypt_iv_operations crypt_iv_plain_ops = {
.generator = crypt_iv_plain_gen
};
@@ -893,6 +960,14 @@ static struct crypt_iv_operations crypt_iv_random_ops = {
.generator = crypt_iv_random_gen
};
+static struct crypt_iv_operations crypt_iv_eboiv_ops = {
+ .ctr = crypt_iv_eboiv_ctr,
+ .dtr = crypt_iv_eboiv_dtr,
+ .init = crypt_iv_eboiv_init,
+ .wipe = crypt_iv_eboiv_wipe,
+ .generator = crypt_iv_eboiv_gen
+};
+
/*
* Integrity extensions
*/
@@ -2158,6 +2233,14 @@ static int crypt_wipe_key(struct crypt_config *cc)
clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
get_random_bytes(&cc->key, cc->key_size);
+
+ /* Wipe IV private keys */
+ if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) {
+ r = cc->iv_gen_ops->wipe(cc);
+ if (r)
+ return r;
+ }
+
kzfree(cc->key_string);
cc->key_string = NULL;
r = crypt_setkey(cc);
@@ -2288,6 +2371,8 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
cc->iv_gen_ops = &crypt_iv_benbi_ops;
else if (strcmp(ivmode, "null") == 0)
cc->iv_gen_ops = &crypt_iv_null_ops;
+ else if (strcmp(ivmode, "eboiv") == 0)
+ cc->iv_gen_ops = &crypt_iv_eboiv_ops;
else if (strcmp(ivmode, "lmk") == 0) {
cc->iv_gen_ops = &crypt_iv_lmk_ops;
/*
@@ -2699,7 +2784,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
return -EINVAL;
}
- cc = kzalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL);
+ cc = kzalloc(struct_size(cc, key, key_size), GFP_KERNEL);
if (!cc) {
ti->error = "Cannot allocate encryption context";
return -ENOMEM;
@@ -3050,14 +3135,8 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv,
memset(cc->key, 0, cc->key_size * sizeof(u8));
return ret;
}
- if (argc == 2 && !strcasecmp(argv[1], "wipe")) {
- if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) {
- ret = cc->iv_gen_ops->wipe(cc);
- if (ret)
- return ret;
- }
+ if (argc == 2 && !strcasecmp(argv[1], "wipe"))
return crypt_wipe_key(cc);
- }
}
error:
@@ -3094,7 +3173,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type crypt_target = {
.name = "crypt",
- .version = {1, 18, 1},
+ .version = {1, 19, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index a9bc518156f2..2900fbde89b3 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -461,15 +461,14 @@ static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev
#ifdef CONFIG_BLK_DEV_ZONED
static int flakey_report_zones(struct dm_target *ti, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones,
- gfp_t gfp_mask)
+ struct blk_zone *zones, unsigned int *nr_zones)
{
struct flakey_c *fc = ti->private;
int ret;
/* Do report and remap it */
ret = blkdev_report_zones(fc->dev->bdev, flakey_map_sector(ti, sector),
- zones, nr_zones, gfp_mask);
+ zones, nr_zones);
if (ret != 0)
return ret;
diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
index b65faef2c4b5..b869316d3722 100644
--- a/drivers/md/dm-init.c
+++ b/drivers/md/dm-init.c
@@ -25,7 +25,7 @@ static char *create;
* Format: dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]
* Table format: <start_sector> <num_sectors> <target_type> <target_args>
*
- * See Documentation/device-mapper/dm-init.rst for dm-mod.create="..." format
+ * See Documentation/admin-guide/device-mapper/dm-init.rst for dm-mod.create="..." format
* details.
*/
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 44e76cda087a..b1b0de402dfc 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -476,6 +476,9 @@ static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags)
io_loc.sector = ic->start;
io_loc.count = SB_SECTORS;
+ if (op == REQ_OP_WRITE)
+ sb_set_version(ic);
+
return dm_io(&io_req, 1, &io_loc, NULL);
}
@@ -2317,7 +2320,6 @@ static void recalc_write_super(struct dm_integrity_c *ic)
if (dm_integrity_failed(ic))
return;
- sb_set_version(ic);
r = sync_rw_sb(ic, REQ_OP_WRITE, 0);
if (unlikely(r))
dm_integrity_io_error(ic, "writing superblock", r);
@@ -3358,7 +3360,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
goto bad;
}
- crypt_iv = kmalloc(ivsize, GFP_KERNEL);
+ crypt_iv = kzalloc(ivsize, GFP_KERNEL);
if (!crypt_iv) {
*error = "Could not allocate iv";
r = -ENOMEM;
@@ -3387,7 +3389,6 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
sg_set_buf(&sg[i], va, PAGE_SIZE);
}
sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids);
- memset(crypt_iv, 0x00, ivsize);
skcipher_request_set_crypt(req, sg, sg,
PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv);
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 671c24332802..df2011de7be2 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -28,10 +28,27 @@
#include "dm-core.h"
-#define SUB_JOB_SIZE 128
#define SPLIT_COUNT 8
#define MIN_JOBS 8
-#define RESERVE_PAGES (DIV_ROUND_UP(SUB_JOB_SIZE << SECTOR_SHIFT, PAGE_SIZE))
+
+#define DEFAULT_SUB_JOB_SIZE_KB 512
+#define MAX_SUB_JOB_SIZE_KB 1024
+
+static unsigned kcopyd_subjob_size_kb = DEFAULT_SUB_JOB_SIZE_KB;
+
+module_param(kcopyd_subjob_size_kb, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(kcopyd_subjob_size_kb, "Sub-job size for dm-kcopyd clients");
+
+static unsigned dm_get_kcopyd_subjob_size(void)
+{
+ unsigned sub_job_size_kb;
+
+ sub_job_size_kb = __dm_get_module_param(&kcopyd_subjob_size_kb,
+ DEFAULT_SUB_JOB_SIZE_KB,
+ MAX_SUB_JOB_SIZE_KB);
+
+ return sub_job_size_kb << 1;
+}
/*-----------------------------------------------------------------
* Each kcopyd client has its own little pool of preallocated
@@ -41,6 +58,7 @@ struct dm_kcopyd_client {
struct page_list *pages;
unsigned nr_reserved_pages;
unsigned nr_free_pages;
+ unsigned sub_job_size;
struct dm_io_client *io_client;
@@ -693,8 +711,8 @@ static void segment_complete(int read_err, unsigned long write_err,
progress = job->progress;
count = job->source.count - progress;
if (count) {
- if (count > SUB_JOB_SIZE)
- count = SUB_JOB_SIZE;
+ if (count > kc->sub_job_size)
+ count = kc->sub_job_size;
job->progress += count;
}
@@ -821,7 +839,7 @@ void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
job->master_job = job;
job->write_offset = 0;
- if (job->source.count <= SUB_JOB_SIZE)
+ if (job->source.count <= kc->sub_job_size)
dispatch_job(job);
else {
job->progress = 0;
@@ -888,6 +906,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block)
struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle)
{
int r;
+ unsigned reserve_pages;
struct dm_kcopyd_client *kc;
kc = kzalloc(sizeof(*kc), GFP_KERNEL);
@@ -912,9 +931,12 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro
goto bad_workqueue;
}
+ kc->sub_job_size = dm_get_kcopyd_subjob_size();
+ reserve_pages = DIV_ROUND_UP(kc->sub_job_size << SECTOR_SHIFT, PAGE_SIZE);
+
kc->pages = NULL;
kc->nr_reserved_pages = kc->nr_free_pages = 0;
- r = client_reserve_pages(kc, RESERVE_PAGES);
+ r = client_reserve_pages(kc, reserve_pages);
if (r)
goto bad_client_pages;
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index ad980a38fb1e..ecefe6703736 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -137,15 +137,14 @@ static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev
#ifdef CONFIG_BLK_DEV_ZONED
static int linear_report_zones(struct dm_target *ti, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones,
- gfp_t gfp_mask)
+ struct blk_zone *zones, unsigned int *nr_zones)
{
struct linear_c *lc = (struct linear_c *) ti->private;
int ret;
/* Do report and remap it */
ret = blkdev_report_zones(lc->dev->bdev, linear_map_sector(ti, sector),
- zones, nr_zones, gfp_mask);
+ zones, nr_zones);
if (ret != 0)
return ret;
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index e549392e0ea5..99721c76225d 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -40,7 +40,7 @@
*
* Would result in the log looking like this:
*
- * c,a,flush,fuad,b,<other writes>,<next flush>
+ * c,a,b,flush,fuad,<other writes>,<next flush>
*
* This is meant to help expose problems where file systems do not properly wait
* on data being written before invoking a FLUSH. FUA bypasses cache so once it
@@ -699,7 +699,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
if (discard_bio)
alloc_size = sizeof(struct pending_block);
else
- alloc_size = sizeof(struct pending_block) + sizeof(struct bio_vec) * bio_segments(bio);
+ alloc_size = struct_size(block, vecs, bio_segments(bio));
block = kzalloc(alloc_size, GFP_NOIO);
if (!block) {
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 7a87a640f8ba..8a60a4a070ac 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3558,7 +3558,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
* v1.5.0+:
*
* Sync action:
- * See Documentation/device-mapper/dm-raid.rst for
+ * See Documentation/admin-guide/device-mapper/dm-raid.rst for
* information on each of these states.
*/
DMEMIT(" %s", sync_action);
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 5f7063f05ae0..c9e44ac1f9a6 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -115,7 +115,7 @@ static void end_clone_bio(struct bio *clone)
/*
* Update the original request.
- * Do not use blk_end_request() here, because it may complete
+ * Do not use blk_mq_end_request() here, because it may complete
* the original request before the clone, and break the ordering.
*/
if (is_last)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 3107f2b1988b..f150f5c5492b 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1,6 +1,4 @@
/*
- * dm-snapshot.c
- *
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
*
* This file is released under the GPL.
@@ -134,7 +132,10 @@ struct dm_snapshot {
* - I/O error while merging
* => stop merging; set merge_failed; process I/O normally.
*/
- int merge_failed;
+ bool merge_failed:1;
+
+ bool discard_zeroes_cow:1;
+ bool discard_passdown_origin:1;
/*
* Incoming bios that overlap with chunks being merged must wait
@@ -1173,12 +1174,64 @@ static void stop_merge(struct dm_snapshot *s)
clear_bit(SHUTDOWN_MERGE, &s->state_bits);
}
+static int parse_snapshot_features(struct dm_arg_set *as, struct dm_snapshot *s,
+ struct dm_target *ti)
+{
+ int r;
+ unsigned argc;
+ const char *arg_name;
+
+ static const struct dm_arg _args[] = {
+ {0, 2, "Invalid number of feature arguments"},
+ };
+
+ /*
+ * No feature arguments supplied.
+ */
+ if (!as->argc)
+ return 0;
+
+ r = dm_read_arg_group(_args, as, &argc, &ti->error);
+ if (r)
+ return -EINVAL;
+
+ while (argc && !r) {
+ arg_name = dm_shift_arg(as);
+ argc--;
+
+ if (!strcasecmp(arg_name, "discard_zeroes_cow"))
+ s->discard_zeroes_cow = true;
+
+ else if (!strcasecmp(arg_name, "discard_passdown_origin"))
+ s->discard_passdown_origin = true;
+
+ else {
+ ti->error = "Unrecognised feature requested";
+ r = -EINVAL;
+ break;
+ }
+ }
+
+ if (!s->discard_zeroes_cow && s->discard_passdown_origin) {
+ /*
+ * TODO: really these are disjoint.. but ti->num_discard_bios
+ * and dm_bio_get_target_bio_nr() require rigid constraints.
+ */
+ ti->error = "discard_passdown_origin feature depends on discard_zeroes_cow";
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
/*
- * Construct a snapshot mapping: <origin_dev> <COW-dev> <p|po|n> <chunk-size>
+ * Construct a snapshot mapping:
+ * <origin_dev> <COW-dev> <p|po|n> <chunk-size> [<# feature args> [<arg>]*]
*/
static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct dm_snapshot *s;
+ struct dm_arg_set as;
int i;
int r = -EINVAL;
char *origin_path, *cow_path;
@@ -1186,8 +1239,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
unsigned args_used, num_flush_bios = 1;
fmode_t origin_mode = FMODE_READ;
- if (argc != 4) {
- ti->error = "requires exactly 4 arguments";
+ if (argc < 4) {
+ ti->error = "requires 4 or more arguments";
r = -EINVAL;
goto bad;
}
@@ -1204,6 +1257,13 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
+ as.argc = argc;
+ as.argv = argv;
+ dm_consume_args(&as, 4);
+ r = parse_snapshot_features(&as, s, ti);
+ if (r)
+ goto bad_features;
+
origin_path = argv[0];
argv++;
argc--;
@@ -1289,6 +1349,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->private = s;
ti->num_flush_bios = num_flush_bios;
+ if (s->discard_zeroes_cow)
+ ti->num_discard_bios = (s->discard_passdown_origin ? 2 : 1);
ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk);
/* Add snapshot to the list of snapshots for this origin */
@@ -1336,29 +1398,22 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
bad_read_metadata:
unregister_snapshot(s);
-
bad_load_and_register:
mempool_exit(&s->pending_pool);
-
bad_pending_pool:
dm_kcopyd_client_destroy(s->kcopyd_client);
-
bad_kcopyd:
dm_exception_table_exit(&s->pending, pending_cache);
dm_exception_table_exit(&s->complete, exception_cache);
-
bad_hash_tables:
dm_exception_store_destroy(s->store);
-
bad_store:
dm_put_device(ti, s->cow);
-
bad_cow:
dm_put_device(ti, s->origin);
-
bad_origin:
+bad_features:
kfree(s);
-
bad:
return r;
}
@@ -1806,6 +1861,37 @@ static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
(bio->bi_iter.bi_sector & s->store->chunk_mask);
}
+static void zero_callback(int read_err, unsigned long write_err, void *context)
+{
+ struct bio *bio = context;
+ struct dm_snapshot *s = bio->bi_private;
+
+ up(&s->cow_count);
+ bio->bi_status = write_err ? BLK_STS_IOERR : 0;
+ bio_endio(bio);
+}
+
+static void zero_exception(struct dm_snapshot *s, struct dm_exception *e,
+ struct bio *bio, chunk_t chunk)
+{
+ struct dm_io_region dest;
+
+ dest.bdev = s->cow->bdev;
+ dest.sector = bio->bi_iter.bi_sector;
+ dest.count = s->store->chunk_size;
+
+ down(&s->cow_count);
+ WARN_ON_ONCE(bio->bi_private);
+ bio->bi_private = s;
+ dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio);
+}
+
+static bool io_overlaps_chunk(struct dm_snapshot *s, struct bio *bio)
+{
+ return bio->bi_iter.bi_size ==
+ (s->store->chunk_size << SECTOR_SHIFT);
+}
+
static int snapshot_map(struct dm_target *ti, struct bio *bio)
{
struct dm_exception *e;
@@ -1839,10 +1925,43 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
goto out_unlock;
}
+ if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
+ if (s->discard_passdown_origin && dm_bio_get_target_bio_nr(bio)) {
+ /*
+ * passdown discard to origin (without triggering
+ * snapshot exceptions via do_origin; doing so would
+ * defeat the goal of freeing space in origin that is
+ * implied by the "discard_passdown_origin" feature)
+ */
+ bio_set_dev(bio, s->origin->bdev);
+ track_chunk(s, bio, chunk);
+ goto out_unlock;
+ }
+ /* discard to snapshot (target_bio_nr == 0) zeroes exceptions */
+ }
+
/* If the block is already remapped - use that, else remap it */
e = dm_lookup_exception(&s->complete, chunk);
if (e) {
remap_exception(s, e, bio, chunk);
+ if (unlikely(bio_op(bio) == REQ_OP_DISCARD) &&
+ io_overlaps_chunk(s, bio)) {
+ dm_exception_table_unlock(&lock);
+ up_read(&s->lock);
+ zero_exception(s, e, bio, chunk);
+ r = DM_MAPIO_SUBMITTED; /* discard is not issued */
+ goto out;
+ }
+ goto out_unlock;
+ }
+
+ if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
+ /*
+ * If no exception exists, complete discard immediately
+ * otherwise it'll trigger copy-out.
+ */
+ bio_endio(bio);
+ r = DM_MAPIO_SUBMITTED;
goto out_unlock;
}
@@ -1890,9 +2009,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
r = DM_MAPIO_SUBMITTED;
- if (!pe->started &&
- bio->bi_iter.bi_size ==
- (s->store->chunk_size << SECTOR_SHIFT)) {
+ if (!pe->started && io_overlaps_chunk(s, bio)) {
pe->started = 1;
dm_exception_table_unlock(&lock);
@@ -1955,6 +2072,12 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
+ if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
+ /* Once merging, discards no longer effect change */
+ bio_endio(bio);
+ return DM_MAPIO_SUBMITTED;
+ }
+
chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
down_write(&s->lock);
@@ -2138,6 +2261,7 @@ static void snapshot_status(struct dm_target *ti, status_type_t type,
{
unsigned sz = 0;
struct dm_snapshot *snap = ti->private;
+ unsigned num_features;
switch (type) {
case STATUSTYPE_INFO:
@@ -2178,8 +2302,16 @@ static void snapshot_status(struct dm_target *ti, status_type_t type,
* make sense.
*/
DMEMIT("%s %s", snap->origin->name, snap->cow->name);
- snap->store->type->status(snap->store, type, result + sz,
- maxlen - sz);
+ sz += snap->store->type->status(snap->store, type, result + sz,
+ maxlen - sz);
+ num_features = snap->discard_zeroes_cow + snap->discard_passdown_origin;
+ if (num_features) {
+ DMEMIT(" %u", num_features);
+ if (snap->discard_zeroes_cow)
+ DMEMIT(" discard_zeroes_cow");
+ if (snap->discard_passdown_origin)
+ DMEMIT(" discard_passdown_origin");
+ }
break;
}
}
@@ -2198,6 +2330,26 @@ static int snapshot_iterate_devices(struct dm_target *ti,
return r;
}
+static void snapshot_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+ struct dm_snapshot *snap = ti->private;
+
+ if (snap->discard_zeroes_cow) {
+ struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
+
+ down_read(&_origins_lock);
+
+ (void) __find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, NULL);
+ if (snap_src && snap_dest)
+ snap = snap_src;
+
+ /* All discards are split on chunk_size boundary */
+ limits->discard_granularity = snap->store->chunk_size;
+ limits->max_discard_sectors = snap->store->chunk_size;
+
+ up_read(&_origins_lock);
+ }
+}
/*-----------------------------------------------------------------
* Origin methods
@@ -2522,7 +2674,7 @@ static struct target_type origin_target = {
static struct target_type snapshot_target = {
.name = "snapshot",
- .version = {1, 15, 0},
+ .version = {1, 16, 0},
.module = THIS_MODULE,
.ctr = snapshot_ctr,
.dtr = snapshot_dtr,
@@ -2532,11 +2684,12 @@ static struct target_type snapshot_target = {
.resume = snapshot_resume,
.status = snapshot_status,
.iterate_devices = snapshot_iterate_devices,
+ .io_hints = snapshot_io_hints,
};
static struct target_type merge_target = {
.name = dm_snapshot_merge_target_name,
- .version = {1, 4, 0},
+ .version = {1, 5, 0},
.module = THIS_MODULE,
.ctr = snapshot_ctr,
.dtr = snapshot_dtr,
@@ -2547,6 +2700,7 @@ static struct target_type merge_target = {
.resume = snapshot_merge_resume,
.status = snapshot_status,
.iterate_devices = snapshot_iterate_devices,
+ .io_hints = snapshot_io_hints,
};
static int __init dm_snapshot_init(void)
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index ec8b27e20de3..caaee8032afe 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -881,7 +881,7 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
EXPORT_SYMBOL_GPL(dm_table_set_type);
/* validate the dax capability of the target device span */
-static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
+int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
int blocksize = *(int *) data;
@@ -890,7 +890,15 @@ static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
start, len);
}
-bool dm_table_supports_dax(struct dm_table *t, int blocksize)
+/* Check devices support synchronous DAX */
+static int device_synchronous(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ return dax_synchronous(dev->dax_dev);
+}
+
+bool dm_table_supports_dax(struct dm_table *t,
+ iterate_devices_callout_fn iterate_fn, int *blocksize)
{
struct dm_target *ti;
unsigned i;
@@ -903,8 +911,7 @@ bool dm_table_supports_dax(struct dm_table *t, int blocksize)
return false;
if (!ti->type->iterate_devices ||
- !ti->type->iterate_devices(ti, device_supports_dax,
- &blocksize))
+ !ti->type->iterate_devices(ti, iterate_fn, blocksize))
return false;
}
@@ -940,6 +947,7 @@ static int dm_table_determine_type(struct dm_table *t)
struct dm_target *tgt;
struct list_head *devices = dm_table_get_devices(t);
enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
+ int page_size = PAGE_SIZE;
if (t->type != DM_TYPE_NONE) {
/* target already set the table's type */
@@ -984,7 +992,7 @@ static int dm_table_determine_type(struct dm_table *t)
verify_bio_based:
/* We must use this table as bio-based */
t->type = DM_TYPE_BIO_BASED;
- if (dm_table_supports_dax(t, PAGE_SIZE) ||
+ if (dm_table_supports_dax(t, device_supports_dax, &page_size) ||
(list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
t->type = DM_TYPE_DAX_BIO_BASED;
} else {
@@ -1883,6 +1891,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
bool wc = false, fua = false;
+ int page_size = PAGE_SIZE;
/*
* Copy table's limits to the DM device's request_queue
@@ -1910,8 +1919,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
}
blk_queue_write_cache(q, wc, fua);
- if (dm_table_supports_dax(t, PAGE_SIZE))
+ if (dm_table_supports_dax(t, device_supports_dax, &page_size)) {
blk_queue_flag_set(QUEUE_FLAG_DAX, q);
+ if (dm_table_supports_dax(t, device_synchronous, NULL))
+ set_dax_synchronous(t->md->dax_dev);
+ }
else
blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 7f0840601737..4c68a7b93d5e 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -2046,16 +2046,19 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd)
{
- int r;
+ int r = -EINVAL;
struct dm_block *sblock;
struct thin_disk_superblock *disk_super;
pmd_write_lock(pmd);
+ if (pmd->fail_io)
+ goto out;
+
pmd->flags |= THIN_METADATA_NEEDS_CHECK_FLAG;
r = superblock_lock(pmd, &sblock);
if (r) {
- DMERR("couldn't read superblock");
+ DMERR("couldn't lock superblock");
goto out;
}
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index d8334cd45d7c..8545dcee9fd0 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/crc32.h>
+#include <linux/sched/mm.h>
#define DM_MSG_PREFIX "zoned metadata"
@@ -1162,8 +1163,7 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
while (sector < dev->capacity) {
/* Get zone information */
nr_blkz = DMZ_REPORT_NR_ZONES;
- ret = blkdev_report_zones(dev->bdev, sector, blkz,
- &nr_blkz, GFP_KERNEL);
+ ret = blkdev_report_zones(dev->bdev, sector, blkz, &nr_blkz);
if (ret) {
dmz_dev_err(dev, "Report zones failed %d", ret);
goto out;
@@ -1201,12 +1201,20 @@ out:
static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
{
unsigned int nr_blkz = 1;
+ unsigned int noio_flag;
struct blk_zone blkz;
int ret;
- /* Get zone information from disk */
+ /*
+ * Get zone information from disk. Since blkdev_report_zones() uses
+ * GFP_KERNEL by default for memory allocations, set the per-task
+ * PF_MEMALLOC_NOIO flag so that all allocations are done as if
+ * GFP_NOIO was specified.
+ */
+ noio_flag = memalloc_noio_save();
ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone),
- &blkz, &nr_blkz, GFP_NOIO);
+ &blkz, &nr_blkz);
+ memalloc_noio_restore(noio_flag);
if (!nr_blkz)
ret = -EIO;
if (ret) {
@@ -1594,30 +1602,6 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd)
}
/*
- * Activate a zone (increment its reference count).
- */
-void dmz_activate_zone(struct dm_zone *zone)
-{
- set_bit(DMZ_ACTIVE, &zone->flags);
- atomic_inc(&zone->refcount);
-}
-
-/*
- * Deactivate a zone. This decrement the zone reference counter
- * and clears the active state of the zone once the count reaches 0,
- * indicating that all BIOs to the zone have completed. Returns
- * true if the zone was deactivated.
- */
-void dmz_deactivate_zone(struct dm_zone *zone)
-{
- if (atomic_dec_and_test(&zone->refcount)) {
- WARN_ON(!test_bit(DMZ_ACTIVE, &zone->flags));
- clear_bit_unlock(DMZ_ACTIVE, &zone->flags);
- smp_mb__after_atomic();
- }
-}
-
-/*
* Get the zone mapping a chunk, if the chunk is mapped already.
* If no mapping exist and the operation is WRITE, a zone is
* allocated and used to map the chunk.
diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h
index 12419f0bfe78..ed8de49c9a08 100644
--- a/drivers/md/dm-zoned.h
+++ b/drivers/md/dm-zoned.h
@@ -115,7 +115,6 @@ enum {
DMZ_BUF,
/* Zone internal state */
- DMZ_ACTIVE,
DMZ_RECLAIM,
DMZ_SEQ_WRITE_ERR,
};
@@ -128,7 +127,6 @@ enum {
#define dmz_is_empty(z) ((z)->wp_block == 0)
#define dmz_is_offline(z) test_bit(DMZ_OFFLINE, &(z)->flags)
#define dmz_is_readonly(z) test_bit(DMZ_READ_ONLY, &(z)->flags)
-#define dmz_is_active(z) test_bit(DMZ_ACTIVE, &(z)->flags)
#define dmz_in_reclaim(z) test_bit(DMZ_RECLAIM, &(z)->flags)
#define dmz_seq_write_err(z) test_bit(DMZ_SEQ_WRITE_ERR, &(z)->flags)
@@ -188,8 +186,30 @@ void dmz_unmap_zone(struct dmz_metadata *zmd, struct dm_zone *zone);
unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd);
unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd);
-void dmz_activate_zone(struct dm_zone *zone);
-void dmz_deactivate_zone(struct dm_zone *zone);
+/*
+ * Activate a zone (increment its reference count).
+ */
+static inline void dmz_activate_zone(struct dm_zone *zone)
+{
+ atomic_inc(&zone->refcount);
+}
+
+/*
+ * Deactivate a zone. This decrement the zone reference counter
+ * indicating that all BIOs to the zone have completed when the count is 0.
+ */
+static inline void dmz_deactivate_zone(struct dm_zone *zone)
+{
+ atomic_dec(&zone->refcount);
+}
+
+/*
+ * Test if a zone is active, that is, has a refcount > 0.
+ */
+static inline bool dmz_is_active(struct dm_zone *zone)
+{
+ return atomic_read(&zone->refcount);
+}
int dmz_lock_zone_reclaim(struct dm_zone *zone);
void dmz_unlock_zone_reclaim(struct dm_zone *zone);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5475081dcbd6..d0beef033e2f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -441,8 +441,7 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
}
static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones,
- gfp_t gfp_mask)
+ struct blk_zone *zones, unsigned int *nr_zones)
{
#ifdef CONFIG_BLK_DEV_ZONED
struct mapped_device *md = disk->private_data;
@@ -480,8 +479,7 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
* So there is no need to loop here trying to fill the entire array
* of zones.
*/
- ret = tgt->type->report_zones(tgt, sector, zones,
- nr_zones, gfp_mask);
+ ret = tgt->type->report_zones(tgt, sector, zones, nr_zones);
out:
dm_put_live_table(md, srcu_idx);
@@ -1119,7 +1117,7 @@ static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bd
if (!map)
return false;
- ret = dm_table_supports_dax(map, blocksize);
+ ret = dm_table_supports_dax(map, device_supports_dax, &blocksize);
dm_put_live_table(md, srcu_idx);
@@ -1991,7 +1989,8 @@ static struct mapped_device *alloc_dev(int minor)
sprintf(md->disk->disk_name, "dm-%d", minor);
if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
- md->dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
+ md->dax_dev = alloc_dax(md, md->disk->disk_name,
+ &dm_dax_ops, 0);
if (!md->dax_dev)
goto bad;
}
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 17e3db54404c..0475673337f3 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -72,7 +72,10 @@ bool dm_table_bio_based(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t);
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
-bool dm_table_supports_dax(struct dm_table *t, int blocksize);
+bool dm_table_supports_dax(struct dm_table *t, iterate_devices_callout_fn fn,
+ int *blocksize);
+int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data);
void dm_lock_md_type(struct mapped_device *md);
void dm_unlock_md_type(struct mapped_device *md);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a114b05e3db4..24638ccedce4 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5316,7 +5316,8 @@ int mddev_init_writes_pending(struct mddev *mddev)
{
if (mddev->writes_pending.percpu_count_ptr)
return 0;
- if (percpu_ref_init(&mddev->writes_pending, no_op, 0, GFP_KERNEL) < 0)
+ if (percpu_ref_init(&mddev->writes_pending, no_op,
+ PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0)
return -ENOMEM;
/* We want to start with the refcount at zero */
percpu_ref_put(&mddev->writes_pending);
OpenPOWER on IntegriCloud