summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig2
-rw-r--r--drivers/md/bcache/alloc.c3
-rw-r--r--drivers/md/bcache/bcache.h57
-rw-r--r--drivers/md/bcache/bset.c4
-rw-r--r--drivers/md/bcache/bset.h5
-rw-r--r--drivers/md/bcache/btree.c26
-rw-r--r--drivers/md/bcache/closure.c17
-rw-r--r--drivers/md/bcache/closure.h5
-rw-r--r--drivers/md/bcache/debug.c14
-rw-r--r--drivers/md/bcache/extents.c2
-rw-r--r--drivers/md/bcache/io.c16
-rw-r--r--drivers/md/bcache/journal.c8
-rw-r--r--drivers/md/bcache/request.c186
-rw-r--r--drivers/md/bcache/super.c160
-rw-r--r--drivers/md/bcache/sysfs.c55
-rw-r--r--drivers/md/bcache/util.c25
-rw-r--r--drivers/md/bcache/util.h6
-rw-r--r--drivers/md/bcache/writeback.c92
-rw-r--r--drivers/md/bcache/writeback.h4
-rw-r--r--drivers/md/dm-bufio.c279
-rw-r--r--drivers/md/dm-bufio.h148
-rw-r--r--drivers/md/dm-cache-target.c3
-rw-r--r--drivers/md/dm-crypt.c69
-rw-r--r--drivers/md/dm-era-target.c3
-rw-r--r--drivers/md/dm-flakey.c3
-rw-r--r--drivers/md/dm-integrity.c5
-rw-r--r--drivers/md/dm-ioctl.c2
-rw-r--r--drivers/md/dm-linear.c10
-rw-r--r--drivers/md/dm-log-writes.c112
-rw-r--r--drivers/md/dm-mpath.c8
-rw-r--r--drivers/md/dm-raid.c33
-rw-r--r--drivers/md/dm-snap-persistent.c2
-rw-r--r--drivers/md/dm-stripe.c18
-rw-r--r--drivers/md/dm-switch.c7
-rw-r--r--drivers/md/dm-table.c47
-rw-r--r--drivers/md/dm-target.c2
-rw-r--r--drivers/md/dm-thin.c3
-rw-r--r--drivers/md/dm-unstripe.c37
-rw-r--r--drivers/md/dm-verity-target.c71
-rw-r--r--drivers/md/dm-verity.h3
-rw-r--r--drivers/md/dm-zoned-target.c3
-rw-r--r--drivers/md/dm.c157
-rw-r--r--drivers/md/md-linear.c4
-rw-r--r--drivers/md/md.c10
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c2
-rw-r--r--drivers/md/raid0.c4
-rw-r--r--drivers/md/raid1.c6
-rw-r--r--drivers/md/raid10.c6
-rw-r--r--drivers/md/raid5.c4
49 files changed, 1092 insertions, 656 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 2c8ac3688815..edff083f7c4e 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -201,7 +201,7 @@ config BLK_DEV_DM_BUILTIN
config BLK_DEV_DM
tristate "Device mapper support"
select BLK_DEV_DM_BUILTIN
- select DAX
+ depends on DAX || DAX=n
---help---
Device-mapper is a low level volume manager. It works by allowing
people to specify mappings for ranges of logical sectors. Various
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 458e1d38577d..004cc3cc6123 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -287,7 +287,8 @@ do { \
break; \
\
mutex_unlock(&(ca)->set->bucket_lock); \
- if (kthread_should_stop()) { \
+ if (kthread_should_stop() || \
+ test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
set_current_state(TASK_RUNNING); \
return 0; \
} \
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 12e5197f186c..d338b7086013 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -188,6 +188,7 @@
#include <linux/refcount.h>
#include <linux/types.h>
#include <linux/workqueue.h>
+#include <linux/kthread.h>
#include "bset.h"
#include "util.h"
@@ -258,10 +259,11 @@ struct bcache_device {
struct gendisk *disk;
unsigned long flags;
-#define BCACHE_DEV_CLOSING 0
-#define BCACHE_DEV_DETACHING 1
-#define BCACHE_DEV_UNLINK_DONE 2
-
+#define BCACHE_DEV_CLOSING 0
+#define BCACHE_DEV_DETACHING 1
+#define BCACHE_DEV_UNLINK_DONE 2
+#define BCACHE_DEV_WB_RUNNING 3
+#define BCACHE_DEV_RATE_DW_RUNNING 4
unsigned nr_stripes;
unsigned stripe_size;
atomic_t *stripe_sectors_dirty;
@@ -286,6 +288,12 @@ struct io {
sector_t last;
};
+enum stop_on_failure {
+ BCH_CACHED_DEV_STOP_AUTO = 0,
+ BCH_CACHED_DEV_STOP_ALWAYS,
+ BCH_CACHED_DEV_STOP_MODE_MAX,
+};
+
struct cached_dev {
struct list_head list;
struct bcache_device disk;
@@ -359,6 +367,7 @@ struct cached_dev {
unsigned sequential_cutoff;
unsigned readahead;
+ unsigned io_disable:1;
unsigned verify:1;
unsigned bypass_torture_test:1;
@@ -378,6 +387,11 @@ struct cached_dev {
unsigned writeback_rate_i_term_inverse;
unsigned writeback_rate_p_term_inverse;
unsigned writeback_rate_minimum;
+
+ enum stop_on_failure stop_when_cache_set_failed;
+#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
+ atomic_t io_errors;
+ unsigned error_limit;
};
enum alloc_reserve {
@@ -474,10 +488,15 @@ struct gc_stat {
*
* CACHE_SET_RUNNING means all cache devices have been registered and journal
* replay is complete.
+ *
+ * CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all
+ * external and internal I/O should be denied when this flag is set.
+ *
*/
#define CACHE_SET_UNREGISTERING 0
#define CACHE_SET_STOPPING 1
#define CACHE_SET_RUNNING 2
+#define CACHE_SET_IO_DISABLE 3
struct cache_set {
struct closure cl;
@@ -867,8 +886,36 @@ static inline void wake_up_allocators(struct cache_set *c)
wake_up_process(ca->alloc_thread);
}
+static inline void closure_bio_submit(struct cache_set *c,
+ struct bio *bio,
+ struct closure *cl)
+{
+ closure_get(cl);
+ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return;
+ }
+ generic_make_request(bio);
+}
+
+/*
+ * Prevent the kthread exits directly, and make sure when kthread_stop()
+ * is called to stop a kthread, it is still alive. If a kthread might be
+ * stopped by CACHE_SET_IO_DISABLE bit set, wait_for_kthread_stop() is
+ * necessary before the kthread returns.
+ */
+static inline void wait_for_kthread_stop(void)
+{
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ }
+}
+
/* Forward declarations */
+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
blk_status_t, const char *);
@@ -896,6 +943,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned,
struct bkey *, int, bool);
bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
unsigned, unsigned, bool);
+bool bch_cached_dev_error(struct cached_dev *dc);
__printf(2, 3)
bool bch_cache_set_error(struct cache_set *, const char *, ...);
@@ -905,6 +953,7 @@ void bch_write_bdev_super(struct cached_dev *, struct closure *);
extern struct workqueue_struct *bcache_wq;
extern const char * const bch_cache_modes[];
+extern const char * const bch_stop_on_failure_modes[];
extern struct mutex bch_register_lock;
extern struct list_head bch_cache_sets;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index e56d3ecdbfcb..579c696a5fe0 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1072,7 +1072,7 @@ EXPORT_SYMBOL(bch_btree_iter_init);
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
btree_iter_cmp_fn *cmp)
{
- struct btree_iter_set unused;
+ struct btree_iter_set b __maybe_unused;
struct bkey *ret = NULL;
if (!btree_iter_end(iter)) {
@@ -1087,7 +1087,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
}
if (iter->data->k == iter->data->end)
- heap_pop(iter, unused, cmp);
+ heap_pop(iter, b, cmp);
else
heap_sift(iter, 0, cmp);
}
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index fa506c1aa524..0c24280f3b98 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -531,14 +531,15 @@ int __bch_keylist_realloc(struct keylist *, unsigned);
#ifdef CONFIG_BCACHE_DEBUG
int __bch_count_data(struct btree_keys *);
-void __bch_check_keys(struct btree_keys *, const char *, ...);
+void __printf(2, 3) __bch_check_keys(struct btree_keys *, const char *, ...);
void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
void bch_dump_bucket(struct btree_keys *);
#else
static inline int __bch_count_data(struct btree_keys *b) { return -1; }
-static inline void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
+static inline void __printf(2, 3)
+ __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
static inline void bch_dump_bucket(struct btree_keys *b) {}
void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index fad9fe8817eb..17936b2dc7d6 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -665,6 +665,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
struct btree *b, *t;
unsigned long i, nr = sc->nr_to_scan;
unsigned long freed = 0;
+ unsigned int btree_cache_used;
if (c->shrinker_disabled)
return SHRINK_STOP;
@@ -689,9 +690,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
nr = min_t(unsigned long, nr, mca_can_free(c));
i = 0;
+ btree_cache_used = c->btree_cache_used;
list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
- if (freed >= nr)
- break;
+ if (nr <= 0)
+ goto out;
if (++i > 3 &&
!mca_reap(b, 0, false)) {
@@ -699,9 +701,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
rw_unlock(true, b);
freed++;
}
+ nr--;
}
- for (i = 0; (nr--) && i < c->btree_cache_used; i++) {
+ for (; (nr--) && i < btree_cache_used; i++) {
if (list_empty(&c->btree_cache))
goto out;
@@ -719,7 +722,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
}
out:
mutex_unlock(&c->bucket_lock);
- return freed;
+ return freed * c->btree_pages;
}
static unsigned long bch_mca_count(struct shrinker *shrink,
@@ -959,7 +962,7 @@ err:
return b;
}
-/**
+/*
* bch_btree_node_get - find a btree node in the cache and lock it, reading it
* in from disk if necessary.
*
@@ -1744,6 +1747,7 @@ static void bch_btree_gc(struct cache_set *c)
btree_gc_start(c);
+ /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
do {
ret = btree_root(gc_root, c, &op, &writes, &stats);
closure_sync(&writes);
@@ -1751,7 +1755,7 @@ static void bch_btree_gc(struct cache_set *c)
if (ret && ret != -EAGAIN)
pr_warn("gc failed!");
- } while (ret);
+ } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
bch_btree_gc_finish(c);
wake_up_allocators(c);
@@ -1789,15 +1793,19 @@ static int bch_gc_thread(void *arg)
while (1) {
wait_event_interruptible(c->gc_wait,
- kthread_should_stop() || gc_should_run(c));
+ kthread_should_stop() ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags) ||
+ gc_should_run(c));
- if (kthread_should_stop())
+ if (kthread_should_stop() ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags))
break;
set_gc_sectors(c);
bch_btree_gc(c);
}
+ wait_for_kthread_stop();
return 0;
}
@@ -2170,7 +2178,7 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
if (b->key.ptr[0] != btree_ptr ||
b->seq != seq + 1) {
- op->lock = b->level;
+ op->lock = b->level;
goto out;
}
}
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index 7f12920c14f7..0e14969182c6 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -46,7 +46,7 @@ void closure_sub(struct closure *cl, int v)
}
EXPORT_SYMBOL(closure_sub);
-/**
+/*
* closure_put - decrement a closure's refcount
*/
void closure_put(struct closure *cl)
@@ -55,7 +55,7 @@ void closure_put(struct closure *cl)
}
EXPORT_SYMBOL(closure_put);
-/**
+/*
* closure_wake_up - wake up all closures on a wait list, without memory barrier
*/
void __closure_wake_up(struct closure_waitlist *wait_list)
@@ -79,9 +79,9 @@ EXPORT_SYMBOL(__closure_wake_up);
/**
* closure_wait - add a closure to a waitlist
- *
- * @waitlist will own a ref on @cl, which will be released when
+ * @waitlist: will own a ref on @cl, which will be released when
* closure_wake_up() is called on @waitlist.
+ * @cl: closure pointer.
*
*/
bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
@@ -157,7 +157,7 @@ void closure_debug_destroy(struct closure *cl)
}
EXPORT_SYMBOL(closure_debug_destroy);
-static struct dentry *debug;
+static struct dentry *closure_debug;
static int debug_seq_show(struct seq_file *f, void *data)
{
@@ -199,11 +199,12 @@ static const struct file_operations debug_ops = {
.release = single_release
};
-void __init closure_debug_init(void)
+int __init closure_debug_init(void)
{
- debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops);
+ closure_debug = debugfs_create_file("closures",
+ 0400, bcache_debug, NULL, &debug_ops);
+ return IS_ERR_OR_NULL(closure_debug);
}
-
#endif
MODULE_AUTHOR("Kent Overstreet <koverstreet@google.com>");
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 3b9dfc9962ad..71427eb5fdae 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -105,6 +105,7 @@
struct closure;
struct closure_syncer;
typedef void (closure_fn) (struct closure *);
+extern struct dentry *bcache_debug;
struct closure_waitlist {
struct llist_head list;
@@ -185,13 +186,13 @@ static inline void closure_sync(struct closure *cl)
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
-void closure_debug_init(void);
+int closure_debug_init(void);
void closure_debug_create(struct closure *cl);
void closure_debug_destroy(struct closure *cl);
#else
-static inline void closure_debug_init(void) {}
+static inline int closure_debug_init(void) { return 0; }
static inline void closure_debug_create(struct closure *cl) {}
static inline void closure_debug_destroy(struct closure *cl) {}
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index af89408befe8..028f7b386e01 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -17,7 +17,7 @@
#include <linux/random.h>
#include <linux/seq_file.h>
-static struct dentry *debug;
+struct dentry *bcache_debug;
#ifdef CONFIG_BCACHE_DEBUG
@@ -232,11 +232,11 @@ static const struct file_operations cache_set_debug_ops = {
void bch_debug_init_cache_set(struct cache_set *c)
{
- if (!IS_ERR_OR_NULL(debug)) {
+ if (!IS_ERR_OR_NULL(bcache_debug)) {
char name[50];
snprintf(name, 50, "bcache-%pU", c->sb.set_uuid);
- c->debug = debugfs_create_file(name, 0400, debug, c,
+ c->debug = debugfs_create_file(name, 0400, bcache_debug, c,
&cache_set_debug_ops);
}
}
@@ -245,13 +245,13 @@ void bch_debug_init_cache_set(struct cache_set *c)
void bch_debug_exit(void)
{
- if (!IS_ERR_OR_NULL(debug))
- debugfs_remove_recursive(debug);
+ if (!IS_ERR_OR_NULL(bcache_debug))
+ debugfs_remove_recursive(bcache_debug);
}
int __init bch_debug_init(struct kobject *kobj)
{
- debug = debugfs_create_dir("bcache", NULL);
+ bcache_debug = debugfs_create_dir("bcache", NULL);
- return IS_ERR_OR_NULL(debug);
+ return IS_ERR_OR_NULL(bcache_debug);
}
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index f9d391711595..c334e6666461 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -534,7 +534,6 @@ err:
static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
{
struct btree *b = container_of(bk, struct btree, keys);
- struct bucket *g;
unsigned i, stale;
if (!KEY_PTRS(k) ||
@@ -549,7 +548,6 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
return false;
for (i = 0; i < KEY_PTRS(k); i++) {
- g = PTR_BUCKET(b->c, k, i);
stale = ptr_stale(b->c, k, i);
btree_bug_on(stale > 96, b,
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index a783c5a41ff1..7fac97ae036e 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
b->submit_time_us = local_clock_us();
- closure_bio_submit(bio, bio->bi_private);
+ closure_bio_submit(c, bio, bio->bi_private);
}
void bch_submit_bbio(struct bio *bio, struct cache_set *c,
@@ -50,6 +50,20 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
}
/* IO errors */
+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
+{
+ char buf[BDEVNAME_SIZE];
+ unsigned errors;
+
+ WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
+
+ errors = atomic_add_return(1, &dc->io_errors);
+ if (errors < dc->error_limit)
+ pr_err("%s: IO error on backing device, unrecoverable",
+ bio_devname(bio, buf));
+ else
+ bch_cached_dev_error(dc);
+}
void bch_count_io_errors(struct cache *ca,
blk_status_t error,
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 1b736b860739..18f1b5239620 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
bch_bio_map(bio, data);
- closure_bio_submit(bio, &cl);
+ closure_bio_submit(ca->set, bio, &cl);
closure_sync(&cl);
/* This function could be simpler now since we no longer write
@@ -493,7 +493,7 @@ static void journal_reclaim(struct cache_set *c)
struct cache *ca;
uint64_t last_seq;
unsigned iter, n = 0;
- atomic_t p;
+ atomic_t p __maybe_unused;
atomic_long_inc(&c->reclaim);
@@ -594,6 +594,7 @@ static void journal_write_done(struct closure *cl)
}
static void journal_write_unlock(struct closure *cl)
+ __releases(&c->journal.lock)
{
struct cache_set *c = container_of(cl, struct cache_set, journal.io);
@@ -674,7 +675,7 @@ static void journal_write_unlocked(struct closure *cl)
spin_unlock(&c->journal.lock);
while ((bio = bio_list_pop(&list)))
- closure_bio_submit(bio, cl);
+ closure_bio_submit(c, bio, cl);
continue_at(cl, journal_write_done, NULL);
}
@@ -705,6 +706,7 @@ static void journal_try_write(struct cache_set *c)
static struct journal_write *journal_wait_for_write(struct cache_set *c,
unsigned nkeys)
+ __acquires(&c->journal.lock)
{
size_t sectors;
struct closure cl;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 6422846b546e..a65e3365eeb9 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -139,6 +139,7 @@ static void bch_data_invalidate(struct closure *cl)
}
op->insert_data_done = true;
+ /* get in bch_data_insert() */
bio_put(bio);
out:
continue_at(cl, bch_data_insert_keys, op->wq);
@@ -295,6 +296,7 @@ err:
/**
* bch_data_insert - stick some data in the cache
+ * @cl: closure pointer.
*
* This is the starting point for any data to end up in a cache device; it could
* be from a normal write, or a writeback write, or a write to a flash only
@@ -630,6 +632,41 @@ static void request_endio(struct bio *bio)
closure_put(cl);
}
+static void backing_request_endio(struct bio *bio)
+{
+ struct closure *cl = bio->bi_private;
+
+ if (bio->bi_status) {
+ struct search *s = container_of(cl, struct search, cl);
+ struct cached_dev *dc = container_of(s->d,
+ struct cached_dev, disk);
+ /*
+ * If a bio has REQ_PREFLUSH for writeback mode, it is
+ * speically assembled in cached_dev_write() for a non-zero
+ * write request which has REQ_PREFLUSH. we don't set
+ * s->iop.status by this failure, the status will be decided
+ * by result of bch_data_insert() operation.
+ */
+ if (unlikely(s->iop.writeback &&
+ bio->bi_opf & REQ_PREFLUSH)) {
+ char buf[BDEVNAME_SIZE];
+
+ bio_devname(bio, buf);
+ pr_err("Can't flush %s: returned bi_status %i",
+ buf, bio->bi_status);
+ } else {
+ /* set to orig_bio->bi_status in bio_complete() */
+ s->iop.status = bio->bi_status;
+ }
+ s->recoverable = false;
+ /* should count I/O error for backing device here */
+ bch_count_backing_io_errors(dc, bio);
+ }
+
+ bio_put(bio);
+ closure_put(cl);
+}
+
static void bio_complete(struct search *s)
{
if (s->orig_bio) {
@@ -644,13 +681,21 @@ static void bio_complete(struct search *s)
}
}
-static void do_bio_hook(struct search *s, struct bio *orig_bio)
+static void do_bio_hook(struct search *s,
+ struct bio *orig_bio,
+ bio_end_io_t *end_io_fn)
{
struct bio *bio = &s->bio.bio;
bio_init(bio, NULL, 0);
__bio_clone_fast(bio, orig_bio);
- bio->bi_end_io = request_endio;
+ /*
+ * bi_end_io can be set separately somewhere else, e.g. the
+ * variants in,
+ * - cache_bio->bi_end_io from cached_dev_cache_miss()
+ * - n->bi_end_io from cache_lookup_fn()
+ */
+ bio->bi_end_io = end_io_fn;
bio->bi_private = &s->cl;
bio_cnt_set(bio, 3);
@@ -676,7 +721,7 @@ static inline struct search *search_alloc(struct bio *bio,
s = mempool_alloc(d->c->search, GFP_NOIO);
closure_init(&s->cl, NULL);
- do_bio_hook(s, bio);
+ do_bio_hook(s, bio, request_endio);
s->orig_bio = bio;
s->cache_miss = NULL;
@@ -743,11 +788,12 @@ static void cached_dev_read_error(struct closure *cl)
trace_bcache_read_retry(s->orig_bio);
s->iop.status = 0;
- do_bio_hook(s, s->orig_bio);
+ do_bio_hook(s, s->orig_bio, backing_request_endio);
/* XXX: invalidate cache */
- closure_bio_submit(bio, cl);
+ /* I/O request sent to backing device */
+ closure_bio_submit(s->iop.c, bio, cl);
}
continue_at(cl, cached_dev_cache_miss_done, NULL);
@@ -859,7 +905,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
bio_copy_dev(cache_bio, miss);
cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
- cache_bio->bi_end_io = request_endio;
+ cache_bio->bi_end_io = backing_request_endio;
cache_bio->bi_private = &s->cl;
bch_bio_map(cache_bio, NULL);
@@ -872,15 +918,17 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
s->cache_miss = miss;
s->iop.bio = cache_bio;
bio_get(cache_bio);
- closure_bio_submit(cache_bio, &s->cl);
+ /* I/O request sent to backing device */
+ closure_bio_submit(s->iop.c, cache_bio, &s->cl);
return ret;
out_put:
bio_put(cache_bio);
out_submit:
- miss->bi_end_io = request_endio;
+ miss->bi_end_io = backing_request_endio;
miss->bi_private = &s->cl;
- closure_bio_submit(miss, &s->cl);
+ /* I/O request sent to backing device */
+ closure_bio_submit(s->iop.c, miss, &s->cl);
return ret;
}
@@ -943,31 +991,46 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
s->iop.bio = s->orig_bio;
bio_get(s->iop.bio);
- if ((bio_op(bio) != REQ_OP_DISCARD) ||
- blk_queue_discard(bdev_get_queue(dc->bdev)))
- closure_bio_submit(bio, cl);
+ if (bio_op(bio) == REQ_OP_DISCARD &&
+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
+ goto insert_data;
+
+ /* I/O request sent to backing device */
+ bio->bi_end_io = backing_request_endio;
+ closure_bio_submit(s->iop.c, bio, cl);
+
} else if (s->iop.writeback) {
bch_writeback_add(dc);
s->iop.bio = bio;
if (bio->bi_opf & REQ_PREFLUSH) {
- /* Also need to send a flush to the backing device */
- struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0,
- dc->disk.bio_split);
-
+ /*
+ * Also need to send a flush to the backing
+ * device.
+ */
+ struct bio *flush;
+
+ flush = bio_alloc_bioset(GFP_NOIO, 0,
+ dc->disk.bio_split);
+ if (!flush) {
+ s->iop.status = BLK_STS_RESOURCE;
+ goto insert_data;
+ }
bio_copy_dev(flush, bio);
- flush->bi_end_io = request_endio;
+ flush->bi_end_io = backing_request_endio;
flush->bi_private = cl;
flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-
- closure_bio_submit(flush, cl);
+ /* I/O request sent to backing device */
+ closure_bio_submit(s->iop.c, flush, cl);
}
} else {
s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
-
- closure_bio_submit(bio, cl);
+ /* I/O request sent to backing device */
+ bio->bi_end_io = backing_request_endio;
+ closure_bio_submit(s->iop.c, bio, cl);
}
+insert_data:
closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
continue_at(cl, cached_dev_write_complete, NULL);
}
@@ -981,11 +1044,67 @@ static void cached_dev_nodata(struct closure *cl)
bch_journal_meta(s->iop.c, cl);
/* If it's a flush, we send the flush to the backing device too */
- closure_bio_submit(bio, cl);
+ bio->bi_end_io = backing_request_endio;
+ closure_bio_submit(s->iop.c, bio, cl);
continue_at(cl, cached_dev_bio_complete, NULL);
}
+struct detached_dev_io_private {
+ struct bcache_device *d;
+ unsigned long start_time;
+ bio_end_io_t *bi_end_io;
+ void *bi_private;
+};
+
+static void detached_dev_end_io(struct bio *bio)
+{
+ struct detached_dev_io_private *ddip;
+
+ ddip = bio->bi_private;
+ bio->bi_end_io = ddip->bi_end_io;
+ bio->bi_private = ddip->bi_private;
+
+ generic_end_io_acct(ddip->d->disk->queue,
+ bio_data_dir(bio),
+ &ddip->d->disk->part0, ddip->start_time);
+
+ if (bio->bi_status) {
+ struct cached_dev *dc = container_of(ddip->d,
+ struct cached_dev, disk);
+ /* should count I/O error for backing device here */
+ bch_count_backing_io_errors(dc, bio);
+ }
+
+ kfree(ddip);
+ bio->bi_end_io(bio);
+}
+
+static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
+{
+ struct detached_dev_io_private *ddip;
+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
+ /*
+ * no need to call closure_get(&dc->disk.cl),
+ * because upper layer had already opened bcache device,
+ * which would call closure_get(&dc->disk.cl)
+ */
+ ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
+ ddip->d = d;
+ ddip->start_time = jiffies;
+ ddip->bi_end_io = bio->bi_end_io;
+ ddip->bi_private = bio->bi_private;
+ bio->bi_end_io = detached_dev_end_io;
+ bio->bi_private = ddip;
+
+ if ((bio_op(bio) == REQ_OP_DISCARD) &&
+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
+ bio->bi_end_io(bio);
+ else
+ generic_make_request(bio);
+}
+
/* Cached devices - read & write stuff */
static blk_qc_t cached_dev_make_request(struct request_queue *q,
@@ -996,6 +1115,13 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
int rw = bio_data_dir(bio);
+ if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
+ dc->io_disable)) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return BLK_QC_T_NONE;
+ }
+
atomic_set(&dc->backing_idle, 0);
generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
@@ -1022,13 +1148,9 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
else
cached_dev_read(dc, s);
}
- } else {
- if ((bio_op(bio) == REQ_OP_DISCARD) &&
- !blk_queue_discard(bdev_get_queue(dc->bdev)))
- bio_endio(bio);
- else
- generic_make_request(bio);
- }
+ } else
+ /* I/O request sent to backing device */
+ detached_dev_do_request(d, bio);
return BLK_QC_T_NONE;
}
@@ -1112,6 +1234,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
struct bcache_device *d = bio->bi_disk->private_data;
int rw = bio_data_dir(bio);
+ if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return BLK_QC_T_NONE;
+ }
+
generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
s = search_alloc(bio, d);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f2273143b3cb..d90d9e59ca00 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -47,6 +47,14 @@ const char * const bch_cache_modes[] = {
NULL
};
+/* Default is -1; we skip past it for stop_when_cache_set_failed */
+const char * const bch_stop_on_failure_modes[] = {
+ "default",
+ "auto",
+ "always",
+ NULL
+};
+
static struct kobject *bcache_kobj;
struct mutex bch_register_lock;
LIST_HEAD(bch_cache_sets);
@@ -265,6 +273,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
bio->bi_private = dc;
closure_get(cl);
+ /* I/O request sent to backing device */
__write_super(&dc->sb, bio);
closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
@@ -521,7 +530,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
bch_bio_map(bio, ca->disk_buckets);
- closure_bio_submit(bio, &ca->prio);
+ closure_bio_submit(ca->set, bio, &ca->prio);
closure_sync(cl);
}
@@ -769,6 +778,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
sector_t sectors)
{
struct request_queue *q;
+ const size_t max_stripes = min_t(size_t, INT_MAX,
+ SIZE_MAX / sizeof(atomic_t));
size_t n;
int idx;
@@ -777,9 +788,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
- if (!d->nr_stripes ||
- d->nr_stripes > INT_MAX ||
- d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) {
+ if (!d->nr_stripes || d->nr_stripes > max_stripes) {
pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
(unsigned)d->nr_stripes);
return -ENOMEM;
@@ -833,9 +842,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
q->limits.io_min = block_size;
q->limits.logical_block_size = block_size;
q->limits.physical_block_size = block_size;
- set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags);
- clear_bit(QUEUE_FLAG_ADD_RANDOM, &d->disk->queue->queue_flags);
- set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue);
blk_queue_write_cache(q, true, true);
@@ -899,6 +908,31 @@ void bch_cached_dev_run(struct cached_dev *dc)
pr_debug("error creating sysfs link");
}
+/*
+ * If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed
+ * work dc->writeback_rate_update is running. Wait until the routine
+ * quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to
+ * cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out
+ * seconds, give up waiting here and continue to cancel it too.
+ */
+static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
+{
+ int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ;
+
+ do {
+ if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING,
+ &dc->disk.flags))
+ break;
+ time_out--;
+ schedule_timeout_interruptible(1);
+ } while (time_out > 0);
+
+ if (time_out == 0)
+ pr_warn("give up waiting for dc->writeback_write_update to quit");
+
+ cancel_delayed_work_sync(&dc->writeback_rate_update);
+}
+
static void cached_dev_detach_finish(struct work_struct *w)
{
struct cached_dev *dc = container_of(w, struct cached_dev, detach);
@@ -911,7 +945,9 @@ static void cached_dev_detach_finish(struct work_struct *w)
mutex_lock(&bch_register_lock);
- cancel_delayed_work_sync(&dc->writeback_rate_update);
+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
+ cancel_writeback_rate_update_dwork(dc);
+
if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
kthread_stop(dc->writeback_thread);
dc->writeback_thread = NULL;
@@ -954,6 +990,7 @@ void bch_cached_dev_detach(struct cached_dev *dc)
closure_get(&dc->disk.cl);
bch_writeback_queue(dc);
+
cached_dev_put(dc);
}
@@ -1065,7 +1102,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
bch_sectors_dirty_init(&dc->disk);
atomic_set(&dc->has_dirty, 1);
- refcount_inc(&dc->count);
bch_writeback_queue(dc);
}
@@ -1093,14 +1129,16 @@ static void cached_dev_free(struct closure *cl)
{
struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
- cancel_delayed_work_sync(&dc->writeback_rate_update);
+ mutex_lock(&bch_register_lock);
+
+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
+ cancel_writeback_rate_update_dwork(dc);
+
if (!IS_ERR_OR_NULL(dc->writeback_thread))
kthread_stop(dc->writeback_thread);
if (dc->writeback_write_wq)
destroy_workqueue(dc->writeback_write_wq);
- mutex_lock(&bch_register_lock);
-
if (atomic_read(&dc->running))
bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
bcache_device_free(&dc->disk);
@@ -1170,6 +1208,12 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
max(dc->disk.disk->queue->backing_dev_info->ra_pages,
q->backing_dev_info->ra_pages);
+ atomic_set(&dc->io_errors, 0);
+ dc->io_disable = false;
+ dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
+ /* default to auto */
+ dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;
+
bch_cached_dev_request_init(dc);
bch_cached_dev_writeback_init(dc);
return 0;
@@ -1321,6 +1365,24 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
return flash_dev_run(c, u);
}
+bool bch_cached_dev_error(struct cached_dev *dc)
+{
+ char name[BDEVNAME_SIZE];
+
+ if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
+ return false;
+
+ dc->io_disable = true;
+ /* make others know io_disable is true earlier */
+ smp_mb();
+
+ pr_err("stop %s: too many IO errors on backing device %s\n",
+ dc->disk.disk->disk_name, bdevname(dc->bdev, name));
+
+ bcache_device_stop(&dc->disk);
+ return true;
+}
+
/* Cache set */
__printf(2, 3)
@@ -1332,6 +1394,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
test_bit(CACHE_SET_STOPPING, &c->flags))
return false;
+ if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
+ pr_warn("CACHE_SET_IO_DISABLE already set");
+
/* XXX: we can be called from atomic context
acquire_console_sem();
*/
@@ -1443,25 +1508,72 @@ static void cache_set_flush(struct closure *cl)
closure_return(cl);
}
+/*
+ * This function is only called when CACHE_SET_IO_DISABLE is set, which means
+ * cache set is unregistering due to too many I/O errors. In this condition,
+ * the bcache device might be stopped, it depends on stop_when_cache_set_failed
+ * value and whether the broken cache has dirty data:
+ *
+ * dc->stop_when_cache_set_failed dc->has_dirty stop bcache device
+ * BCH_CACHED_STOP_AUTO 0 NO
+ * BCH_CACHED_STOP_AUTO 1 YES
+ * BCH_CACHED_DEV_STOP_ALWAYS 0 YES
+ * BCH_CACHED_DEV_STOP_ALWAYS 1 YES
+ *
+ * The expected behavior is, if stop_when_cache_set_failed is configured to
+ * "auto" via sysfs interface, the bcache device will not be stopped if the
+ * backing device is clean on the broken cache device.
+ */
+static void conditional_stop_bcache_device(struct cache_set *c,
+ struct bcache_device *d,
+ struct cached_dev *dc)
+{
+ if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
+ pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.",
+ d->disk->disk_name, c->sb.set_uuid);
+ bcache_device_stop(d);
+ } else if (atomic_read(&dc->has_dirty)) {
+ /*
+ * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
+ * and dc->has_dirty == 1
+ */
+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.",
+ d->disk->disk_name);
+ bcache_device_stop(d);
+ } else {
+ /*
+ * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
+ * and dc->has_dirty == 0
+ */
+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.",
+ d->disk->disk_name);
+ }
+}
+
static void __cache_set_unregister(struct closure *cl)
{
struct cache_set *c = container_of(cl, struct cache_set, caching);
struct cached_dev *dc;
+ struct bcache_device *d;
size_t i;
mutex_lock(&bch_register_lock);
- for (i = 0; i < c->devices_max_used; i++)
- if (c->devices[i]) {
- if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
- test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
- dc = container_of(c->devices[i],
- struct cached_dev, disk);
- bch_cached_dev_detach(dc);
- } else {
- bcache_device_stop(c->devices[i]);
- }
+ for (i = 0; i < c->devices_max_used; i++) {
+ d = c->devices[i];
+ if (!d)
+ continue;
+
+ if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
+ test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
+ dc = container_of(d, struct cached_dev, disk);
+ bch_cached_dev_detach(dc);
+ if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
+ conditional_stop_bcache_device(c, d, dc);
+ } else {
+ bcache_device_stop(d);
}
+ }
mutex_unlock(&bch_register_lock);
@@ -1567,6 +1679,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
c->error_limit = DEFAULT_IO_ERROR_LIMIT;
+ WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
return c;
err:
@@ -2148,7 +2261,6 @@ static int __init bcache_init(void)
mutex_init(&bch_register_lock);
init_waitqueue_head(&unregister_wait);
register_reboot_notifier(&reboot);
- closure_debug_init();
bcache_major = register_blkdev(0, "bcache");
if (bcache_major < 0) {
@@ -2160,7 +2272,7 @@ static int __init bcache_init(void)
if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) ||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
bch_request_init() ||
- bch_debug_init(bcache_kobj) ||
+ bch_debug_init(bcache_kobj) || closure_debug_init() ||
sysfs_create_files(bcache_kobj, files))
goto err;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 78cd7bd50fdd..dfeef583ee50 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -78,6 +78,7 @@ rw_attribute(congested_write_threshold_us);
rw_attribute(sequential_cutoff);
rw_attribute(data_csum);
rw_attribute(cache_mode);
+rw_attribute(stop_when_cache_set_failed);
rw_attribute(writeback_metadata);
rw_attribute(writeback_running);
rw_attribute(writeback_percent);
@@ -95,6 +96,7 @@ read_attribute(partial_stripes_expensive);
rw_attribute(synchronous);
rw_attribute(journal_delay_ms);
+rw_attribute(io_disable);
rw_attribute(discard);
rw_attribute(running);
rw_attribute(label);
@@ -125,6 +127,12 @@ SHOW(__bch_cached_dev)
bch_cache_modes + 1,
BDEV_CACHE_MODE(&dc->sb));
+ if (attr == &sysfs_stop_when_cache_set_failed)
+ return bch_snprint_string_list(buf, PAGE_SIZE,
+ bch_stop_on_failure_modes + 1,
+ dc->stop_when_cache_set_failed);
+
+
sysfs_printf(data_csum, "%i", dc->disk.data_csum);
var_printf(verify, "%i");
var_printf(bypass_torture_test, "%i");
@@ -133,7 +141,9 @@ SHOW(__bch_cached_dev)
var_print(writeback_delay);
var_print(writeback_percent);
sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9);
-
+ sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
+ sysfs_printf(io_error_limit, "%i", dc->error_limit);
+ sysfs_printf(io_disable, "%i", dc->io_disable);
var_print(writeback_rate_update_seconds);
var_print(writeback_rate_i_term_inverse);
var_print(writeback_rate_p_term_inverse);
@@ -173,7 +183,7 @@ SHOW(__bch_cached_dev)
sysfs_hprint(dirty_data,
bcache_dev_sectors_dirty(&dc->disk) << 9);
- sysfs_hprint(stripe_size, dc->disk.stripe_size << 9);
+ sysfs_hprint(stripe_size, ((uint64_t)dc->disk.stripe_size) << 9);
var_printf(partial_stripes_expensive, "%u");
var_hprint(sequential_cutoff);
@@ -224,6 +234,14 @@ STORE(__cached_dev)
d_strtoul(writeback_rate_i_term_inverse);
d_strtoul_nonzero(writeback_rate_p_term_inverse);
+ sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
+
+ if (attr == &sysfs_io_disable) {
+ int v = strtoul_or_return(buf);
+
+ dc->io_disable = v ? 1 : 0;
+ }
+
d_strtoi_h(sequential_cutoff);
d_strtoi_h(readahead);
@@ -246,6 +264,15 @@ STORE(__cached_dev)
}
}
+ if (attr == &sysfs_stop_when_cache_set_failed) {
+ v = bch_read_string_list(buf, bch_stop_on_failure_modes + 1);
+
+ if (v < 0)
+ return v;
+
+ dc->stop_when_cache_set_failed = v;
+ }
+
if (attr == &sysfs_label) {
if (size > SB_LABEL_SIZE)
return -EINVAL;
@@ -309,7 +336,8 @@ STORE(bch_cached_dev)
bch_writeback_queue(dc);
if (attr == &sysfs_writeback_percent)
- schedule_delayed_work(&dc->writeback_rate_update,
+ if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
+ schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ);
mutex_unlock(&bch_register_lock);
@@ -324,6 +352,7 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_data_csum,
#endif
&sysfs_cache_mode,
+ &sysfs_stop_when_cache_set_failed,
&sysfs_writeback_metadata,
&sysfs_writeback_running,
&sysfs_writeback_delay,
@@ -333,6 +362,9 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_writeback_rate_i_term_inverse,
&sysfs_writeback_rate_p_term_inverse,
&sysfs_writeback_rate_debug,
+ &sysfs_errors,
+ &sysfs_io_error_limit,
+ &sysfs_io_disable,
&sysfs_dirty_data,
&sysfs_stripe_size,
&sysfs_partial_stripes_expensive,
@@ -590,6 +622,8 @@ SHOW(__bch_cache_set)
sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
+ sysfs_printf(io_disable, "%i",
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags));
if (attr == &sysfs_bset_tree_stats)
return bch_bset_print_stats(c, buf);
@@ -679,6 +713,20 @@ STORE(__bch_cache_set)
if (attr == &sysfs_io_error_halflife)
c->error_decay = strtoul_or_return(buf) / 88;
+ if (attr == &sysfs_io_disable) {
+ int v = strtoul_or_return(buf);
+
+ if (v) {
+ if (test_and_set_bit(CACHE_SET_IO_DISABLE,
+ &c->flags))
+ pr_warn("CACHE_SET_IO_DISABLE already set");
+ } else {
+ if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
+ &c->flags))
+ pr_warn("CACHE_SET_IO_DISABLE already cleared");
+ }
+ }
+
sysfs_strtoul(journal_delay_ms, c->journal_delay_ms);
sysfs_strtoul(verify, c->verify);
sysfs_strtoul(key_merging_disabled, c->key_merging_disabled);
@@ -764,6 +812,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
&sysfs_gc_always_rewrite,
&sysfs_btree_shrinker_disabled,
&sysfs_copy_gc_enabled,
+ &sysfs_io_disable,
NULL
};
KTYPE(bch_cache_set_internal);
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index a23cd6a14b74..74febd5230df 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -32,20 +32,27 @@ int bch_ ## name ## _h(const char *cp, type *res) \
case 'y': \
case 'z': \
u++; \
+ /* fall through */ \
case 'e': \
u++; \
+ /* fall through */ \
case 'p': \
u++; \
+ /* fall through */ \
case 't': \
u++; \
+ /* fall through */ \
case 'g': \
u++; \
+ /* fall through */ \
case 'm': \
u++; \
+ /* fall through */ \
case 'k': \
u++; \
if (e++ == cp) \
return -EINVAL; \
+ /* fall through */ \
case '\n': \
case '\0': \
if (*e == '\n') \
@@ -75,10 +82,9 @@ STRTO_H(strtoll, long long)
STRTO_H(strtoull, unsigned long long)
/**
- * bch_hprint() - formats @v to human readable string for sysfs.
- *
- * @v - signed 64 bit integer
- * @buf - the (at least 8 byte) buffer to format the result into.
+ * bch_hprint - formats @v to human readable string for sysfs.
+ * @buf: the (at least 8 byte) buffer to format the result into.
+ * @v: signed 64 bit integer
*
* Returns the number of bytes used by format.
*/
@@ -218,13 +224,12 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
}
/**
- * bch_next_delay() - increment @d by the amount of work done, and return how
- * long to delay until the next time to do some work.
- *
- * @d - the struct bch_ratelimit to update
- * @done - the amount of work done, in arbitrary units
+ * bch_next_delay() - update ratelimiting statistics and calculate next delay
+ * @d: the struct bch_ratelimit to update
+ * @done: the amount of work done, in arbitrary units
*
- * Returns the amount of time to delay by, in jiffies
+ * Increment @d by the amount of work done, and return how long to delay in
+ * jiffies until the next time to do some work.
*/
uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
{
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index a6763db7f061..268024529edd 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -567,12 +567,6 @@ static inline sector_t bdev_sectors(struct block_device *bdev)
return bdev->bd_inode->i_size >> 9;
}
-#define closure_bio_submit(bio, cl) \
-do { \
- closure_get(cl); \
- generic_make_request(bio); \
-} while (0)
-
uint64_t bch_crc64_update(uint64_t, const void *, size_t);
uint64_t bch_crc64(const void *, size_t);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index f1d2fc15abcc..4a9547cdcdc5 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -114,6 +114,27 @@ static void update_writeback_rate(struct work_struct *work)
struct cached_dev *dc = container_of(to_delayed_work(work),
struct cached_dev,
writeback_rate_update);
+ struct cache_set *c = dc->disk.c;
+
+ /*
+ * should check BCACHE_DEV_RATE_DW_RUNNING before calling
+ * cancel_delayed_work_sync().
+ */
+ set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
+ smp_mb();
+
+ /*
+ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
+ * check it here too.
+ */
+ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
+ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
+ smp_mb();
+ return;
+ }
down_read(&dc->writeback_lock);
@@ -123,8 +144,23 @@ static void update_writeback_rate(struct work_struct *work)
up_read(&dc->writeback_lock);
- schedule_delayed_work(&dc->writeback_rate_update,
+ /*
+ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
+ * check it here too.
+ */
+ if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) &&
+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
+ schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ);
+ }
+
+ /*
+ * should check BCACHE_DEV_RATE_DW_RUNNING before calling
+ * cancel_delayed_work_sync().
+ */
+ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
+ smp_mb();
}
static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
@@ -253,7 +289,8 @@ static void write_dirty(struct closure *cl)
bio_set_dev(&io->bio, io->dc->bdev);
io->bio.bi_end_io = dirty_endio;
- closure_bio_submit(&io->bio, cl);
+ /* I/O request sent to backing device */
+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
}
atomic_set(&dc->writeback_sequence_next, next_sequence);
@@ -279,7 +316,7 @@ static void read_dirty_submit(struct closure *cl)
{
struct dirty_io *io = container_of(cl, struct dirty_io, cl);
- closure_bio_submit(&io->bio, cl);
+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
continue_at(cl, write_dirty, io->dc->writeback_write_wq);
}
@@ -305,7 +342,9 @@ static void read_dirty(struct cached_dev *dc)
next = bch_keybuf_next(&dc->writeback_keys);
- while (!kthread_should_stop() && next) {
+ while (!kthread_should_stop() &&
+ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
+ next) {
size = 0;
nk = 0;
@@ -402,7 +441,9 @@ static void read_dirty(struct cached_dev *dc)
}
}
- while (!kthread_should_stop() && delay) {
+ while (!kthread_should_stop() &&
+ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
+ delay) {
schedule_timeout_interruptible(delay);
delay = writeback_delay(dc, 0);
}
@@ -558,21 +599,30 @@ static bool refill_dirty(struct cached_dev *dc)
static int bch_writeback_thread(void *arg)
{
struct cached_dev *dc = arg;
+ struct cache_set *c = dc->disk.c;
bool searched_full_index;
bch_ratelimit_reset(&dc->writeback_rate);
- while (!kthread_should_stop()) {
+ while (!kthread_should_stop() &&
+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
down_write(&dc->writeback_lock);
set_current_state(TASK_INTERRUPTIBLE);
- if (!atomic_read(&dc->has_dirty) ||
- (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
- !dc->writeback_running)) {
+ /*
+ * If the bache device is detaching, skip here and continue
+ * to perform writeback. Otherwise, if no dirty data on cache,
+ * or there is dirty data on cache but writeback is disabled,
+ * the writeback thread should sleep here and wait for others
+ * to wake up it.
+ */
+ if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
+ (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
up_write(&dc->writeback_lock);
- if (kthread_should_stop()) {
+ if (kthread_should_stop() ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
set_current_state(TASK_RUNNING);
- return 0;
+ break;
}
schedule();
@@ -585,9 +635,16 @@ static int bch_writeback_thread(void *arg)
if (searched_full_index &&
RB_EMPTY_ROOT(&dc->writeback_keys.keys)) {
atomic_set(&dc->has_dirty, 0);
- cached_dev_put(dc);
SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
bch_write_bdev_super(dc, NULL);
+ /*
+ * If bcache device is detaching via sysfs interface,
+ * writeback thread should stop after there is no dirty
+ * data on cache. BCACHE_DEV_DETACHING flag is set in
+ * bch_cached_dev_detach().
+ */
+ if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
+ break;
}
up_write(&dc->writeback_lock);
@@ -599,6 +656,7 @@ static int bch_writeback_thread(void *arg)
while (delay &&
!kthread_should_stop() &&
+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags) &&
!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
delay = schedule_timeout_interruptible(delay);
@@ -606,6 +664,9 @@ static int bch_writeback_thread(void *arg)
}
}
+ cached_dev_put(dc);
+ wait_for_kthread_stop();
+
return 0;
}
@@ -659,6 +720,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_rate_p_term_inverse = 40;
dc->writeback_rate_i_term_inverse = 10000;
+ WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
}
@@ -669,11 +731,15 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
if (!dc->writeback_write_wq)
return -ENOMEM;
+ cached_dev_get(dc);
dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
"bcache_writeback");
- if (IS_ERR(dc->writeback_thread))
+ if (IS_ERR(dc->writeback_thread)) {
+ cached_dev_put(dc);
return PTR_ERR(dc->writeback_thread);
+ }
+ WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ);
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 587b25599856..610fb01de629 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -39,7 +39,7 @@ static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c)
if (!d || !UUID_FLASH_ONLY(&c->uuids[i]))
continue;
- ret += bcache_dev_sectors_dirty(d);
+ ret += bcache_dev_sectors_dirty(d);
}
mutex_unlock(&bch_register_lock);
@@ -105,8 +105,6 @@ static inline void bch_writeback_add(struct cached_dev *dc)
{
if (!atomic_read(&dc->has_dirty) &&
!atomic_xchg(&dc->has_dirty, 1)) {
- refcount_inc(&dc->count);
-
if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
/* XXX: should do this synchronously */
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index aa2032fa80d4..12aa9ca21d8c 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -6,7 +6,7 @@
* This file is released under the GPL.
*/
-#include "dm-bufio.h"
+#include <linux/dm-bufio.h>
#include <linux/device-mapper.h>
#include <linux/dm-io.h>
@@ -51,19 +51,6 @@
#define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024)
/*
- * The number of bvec entries that are embedded directly in the buffer.
- * If the chunk size is larger, dm-io is used to do the io.
- */
-#define DM_BUFIO_INLINE_VECS 16
-
-/*
- * Don't try to use kmem_cache_alloc for blocks larger than this.
- * For explanation, see alloc_buffer_data below.
- */
-#define DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT (PAGE_SIZE >> 1)
-#define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1))
-
-/*
* Align buffer writes to this boundary.
* Tests show that SSDs have the highest IOPS when using 4k writes.
*/
@@ -99,13 +86,12 @@ struct dm_bufio_client {
struct block_device *bdev;
unsigned block_size;
- unsigned char sectors_per_block_bits;
- unsigned char pages_per_block_bits;
- unsigned char blocks_per_page_bits;
- unsigned aux_size;
+ s8 sectors_per_block_bits;
void (*alloc_callback)(struct dm_buffer *);
void (*write_callback)(struct dm_buffer *);
+ struct kmem_cache *slab_buffer;
+ struct kmem_cache *slab_cache;
struct dm_io_client *dm_io;
struct list_head reserved_buffers;
@@ -148,11 +134,11 @@ struct dm_buffer {
struct list_head lru_list;
sector_t block;
void *data;
- enum data_mode data_mode;
+ unsigned char data_mode; /* DATA_MODE_* */
unsigned char list_mode; /* LIST_* */
- unsigned hold_count;
blk_status_t read_error;
blk_status_t write_error;
+ unsigned hold_count;
unsigned long state;
unsigned long last_accessed;
unsigned dirty_start;
@@ -161,8 +147,7 @@ struct dm_buffer {
unsigned write_end;
struct dm_bufio_client *c;
struct list_head write_list;
- struct bio bio;
- struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS];
+ void (*end_io)(struct dm_buffer *, blk_status_t);
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
#define MAX_STACK 10
struct stack_trace stack_trace;
@@ -172,21 +157,6 @@ struct dm_buffer {
/*----------------------------------------------------------------*/
-static struct kmem_cache *dm_bufio_caches[PAGE_SHIFT - SECTOR_SHIFT];
-static char *dm_bufio_cache_names[PAGE_SHIFT - SECTOR_SHIFT];
-
-static inline int dm_bufio_cache_index(struct dm_bufio_client *c)
-{
- unsigned ret = c->blocks_per_page_bits - 1;
-
- BUG_ON(ret >= ARRAY_SIZE(dm_bufio_caches));
-
- return ret;
-}
-
-#define DM_BUFIO_CACHE(c) (dm_bufio_caches[dm_bufio_cache_index(c)])
-#define DM_BUFIO_CACHE_NAME(c) (dm_bufio_cache_names[dm_bufio_cache_index(c)])
-
#define dm_bufio_in_request() (!!current->bio_list)
static void dm_bufio_lock(struct dm_bufio_client *c)
@@ -319,7 +289,7 @@ static void __remove(struct dm_bufio_client *c, struct dm_buffer *b)
/*----------------------------------------------------------------*/
-static void adjust_total_allocated(enum data_mode data_mode, long diff)
+static void adjust_total_allocated(unsigned char data_mode, long diff)
{
static unsigned long * const class_ptr[DATA_MODE_LIMIT] = {
&dm_bufio_allocated_kmem_cache,
@@ -384,18 +354,18 @@ static void __cache_size_refresh(void)
* space.
*/
static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
- enum data_mode *data_mode)
+ unsigned char *data_mode)
{
- if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
+ if (unlikely(c->slab_cache != NULL)) {
*data_mode = DATA_MODE_SLAB;
- return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
+ return kmem_cache_alloc(c->slab_cache, gfp_mask);
}
- if (c->block_size <= DM_BUFIO_BLOCK_SIZE_GFP_LIMIT &&
+ if (c->block_size <= KMALLOC_MAX_SIZE &&
gfp_mask & __GFP_NORETRY) {
*data_mode = DATA_MODE_GET_FREE_PAGES;
return (void *)__get_free_pages(gfp_mask,
- c->pages_per_block_bits);
+ c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT));
}
*data_mode = DATA_MODE_VMALLOC;
@@ -424,15 +394,16 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
* Free buffer's data.
*/
static void free_buffer_data(struct dm_bufio_client *c,
- void *data, enum data_mode data_mode)
+ void *data, unsigned char data_mode)
{
switch (data_mode) {
case DATA_MODE_SLAB:
- kmem_cache_free(DM_BUFIO_CACHE(c), data);
+ kmem_cache_free(c->slab_cache, data);
break;
case DATA_MODE_GET_FREE_PAGES:
- free_pages((unsigned long)data, c->pages_per_block_bits);
+ free_pages((unsigned long)data,
+ c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT));
break;
case DATA_MODE_VMALLOC:
@@ -451,8 +422,7 @@ static void free_buffer_data(struct dm_bufio_client *c,
*/
static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
{
- struct dm_buffer *b = kmalloc(sizeof(struct dm_buffer) + c->aux_size,
- gfp_mask);
+ struct dm_buffer *b = kmem_cache_alloc(c->slab_buffer, gfp_mask);
if (!b)
return NULL;
@@ -461,7 +431,7 @@ static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode);
if (!b->data) {
- kfree(b);
+ kmem_cache_free(c->slab_buffer, b);
return NULL;
}
@@ -483,7 +453,7 @@ static void free_buffer(struct dm_buffer *b)
adjust_total_allocated(b->data_mode, -(long)c->block_size);
free_buffer_data(c, b->data, b->data_mode);
- kfree(b);
+ kmem_cache_free(c->slab_buffer, b);
}
/*
@@ -540,10 +510,6 @@ static void __relink_lru(struct dm_buffer *b, int dirty)
*
* the memory must be direct-mapped, not vmalloced;
*
- * the I/O driver can reject requests spuriously if it thinks that
- * the requests are too big for the device or if they cross a
- * controller-defined memory boundary.
- *
* If the buffer is small enough (up to DM_BUFIO_INLINE_VECS pages) and
* it is not vmalloced, try using the bio interface.
*
@@ -561,12 +527,11 @@ static void dmio_complete(unsigned long error, void *context)
{
struct dm_buffer *b = context;
- b->bio.bi_status = error ? BLK_STS_IOERR : 0;
- b->bio.bi_end_io(&b->bio);
+ b->end_io(b, unlikely(error != 0) ? BLK_STS_IOERR : 0);
}
static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
- unsigned n_sectors, unsigned offset, bio_end_io_t *end_io)
+ unsigned n_sectors, unsigned offset)
{
int r;
struct dm_io_request io_req = {
@@ -590,76 +555,77 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
io_req.mem.ptr.vma = (char *)b->data + offset;
}
- b->bio.bi_end_io = end_io;
-
r = dm_io(&io_req, 1, &region, NULL);
- if (r) {
- b->bio.bi_status = errno_to_blk_status(r);
- end_io(&b->bio);
- }
+ if (unlikely(r))
+ b->end_io(b, errno_to_blk_status(r));
}
-static void inline_endio(struct bio *bio)
+static void bio_complete(struct bio *bio)
{
- bio_end_io_t *end_fn = bio->bi_private;
+ struct dm_buffer *b = bio->bi_private;
blk_status_t status = bio->bi_status;
-
- /*
- * Reset the bio to free any attached resources
- * (e.g. bio integrity profiles).
- */
- bio_reset(bio);
-
- bio->bi_status = status;
- end_fn(bio);
+ bio_put(bio);
+ b->end_io(b, status);
}
-static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector,
- unsigned n_sectors, unsigned offset, bio_end_io_t *end_io)
+static void use_bio(struct dm_buffer *b, int rw, sector_t sector,
+ unsigned n_sectors, unsigned offset)
{
+ struct bio *bio;
char *ptr;
- unsigned len;
+ unsigned vec_size, len;
- bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS);
- b->bio.bi_iter.bi_sector = sector;
- bio_set_dev(&b->bio, b->c->bdev);
- b->bio.bi_end_io = inline_endio;
- /*
- * Use of .bi_private isn't a problem here because
- * the dm_buffer's inline bio is local to bufio.
- */
- b->bio.bi_private = end_io;
- bio_set_op_attrs(&b->bio, rw, 0);
+ vec_size = b->c->block_size >> PAGE_SHIFT;
+ if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT))
+ vec_size += 2;
+
+ bio = bio_kmalloc(GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN, vec_size);
+ if (!bio) {
+dmio:
+ use_dmio(b, rw, sector, n_sectors, offset);
+ return;
+ }
+
+ bio->bi_iter.bi_sector = sector;
+ bio_set_dev(bio, b->c->bdev);
+ bio_set_op_attrs(bio, rw, 0);
+ bio->bi_end_io = bio_complete;
+ bio->bi_private = b;
ptr = (char *)b->data + offset;
len = n_sectors << SECTOR_SHIFT;
do {
unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len);
- if (!bio_add_page(&b->bio, virt_to_page(ptr), this_step,
+ if (!bio_add_page(bio, virt_to_page(ptr), this_step,
offset_in_page(ptr))) {
- BUG_ON(b->c->block_size <= PAGE_SIZE);
- use_dmio(b, rw, sector, n_sectors, offset, end_io);
- return;
+ bio_put(bio);
+ goto dmio;
}
len -= this_step;
ptr += this_step;
} while (len > 0);
- submit_bio(&b->bio);
+ submit_bio(bio);
}
-static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io)
+static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t))
{
unsigned n_sectors;
sector_t sector;
unsigned offset, end;
- sector = (b->block << b->c->sectors_per_block_bits) + b->c->start;
+ b->end_io = end_io;
+
+ if (likely(b->c->sectors_per_block_bits >= 0))
+ sector = b->block << b->c->sectors_per_block_bits;
+ else
+ sector = b->block * (b->c->block_size >> SECTOR_SHIFT);
+ sector += b->c->start;
if (rw != REQ_OP_WRITE) {
- n_sectors = 1 << b->c->sectors_per_block_bits;
+ n_sectors = b->c->block_size >> SECTOR_SHIFT;
offset = 0;
} else {
if (b->c->write_callback)
@@ -676,11 +642,10 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io)
n_sectors = (end - offset) >> SECTOR_SHIFT;
}
- if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) &&
- b->data_mode != DATA_MODE_VMALLOC)
- use_inline_bio(b, rw, sector, n_sectors, offset, end_io);
+ if (b->data_mode != DATA_MODE_VMALLOC)
+ use_bio(b, rw, sector, n_sectors, offset);
else
- use_dmio(b, rw, sector, n_sectors, offset, end_io);
+ use_dmio(b, rw, sector, n_sectors, offset);
}
/*----------------------------------------------------------------
@@ -693,16 +658,14 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io)
* Set the error, clear B_WRITING bit and wake anyone who was waiting on
* it.
*/
-static void write_endio(struct bio *bio)
+static void write_endio(struct dm_buffer *b, blk_status_t status)
{
- struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
-
- b->write_error = bio->bi_status;
- if (unlikely(bio->bi_status)) {
+ b->write_error = status;
+ if (unlikely(status)) {
struct dm_bufio_client *c = b->c;
(void)cmpxchg(&c->async_write_error, 0,
- blk_status_to_errno(bio->bi_status));
+ blk_status_to_errno(status));
}
BUG_ON(!test_bit(B_WRITING, &b->state));
@@ -963,8 +926,11 @@ static void __get_memory_limit(struct dm_bufio_client *c,
}
}
- buffers = dm_bufio_cache_size_per_client >>
- (c->sectors_per_block_bits + SECTOR_SHIFT);
+ buffers = dm_bufio_cache_size_per_client;
+ if (likely(c->sectors_per_block_bits >= 0))
+ buffers >>= c->sectors_per_block_bits + SECTOR_SHIFT;
+ else
+ buffers /= c->block_size;
if (buffers < c->minimum_buffers)
buffers = c->minimum_buffers;
@@ -1076,11 +1042,9 @@ found_buffer:
* The endio routine for reading: set the error, clear the bit and wake up
* anyone waiting on the buffer.
*/
-static void read_endio(struct bio *bio)
+static void read_endio(struct dm_buffer *b, blk_status_t status)
{
- struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
-
- b->read_error = bio->bi_status;
+ b->read_error = status;
BUG_ON(!test_bit(B_READING, &b->state));
@@ -1482,13 +1446,13 @@ void dm_bufio_forget(struct dm_bufio_client *c, sector_t block)
dm_bufio_unlock(c);
}
-EXPORT_SYMBOL(dm_bufio_forget);
+EXPORT_SYMBOL_GPL(dm_bufio_forget);
void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n)
{
c->minimum_buffers = n;
}
-EXPORT_SYMBOL(dm_bufio_set_minimum_buffers);
+EXPORT_SYMBOL_GPL(dm_bufio_set_minimum_buffers);
unsigned dm_bufio_get_block_size(struct dm_bufio_client *c)
{
@@ -1498,8 +1462,12 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_size);
sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
{
- return i_size_read(c->bdev->bd_inode) >>
- (SECTOR_SHIFT + c->sectors_per_block_bits);
+ sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT;
+ if (likely(c->sectors_per_block_bits >= 0))
+ s >>= c->sectors_per_block_bits;
+ else
+ sector_div(s, c->block_size >> SECTOR_SHIFT);
+ return s;
}
EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);
@@ -1597,8 +1565,12 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
static unsigned long get_retain_buffers(struct dm_bufio_client *c)
{
- unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes);
- return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT);
+ unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes);
+ if (likely(c->sectors_per_block_bits >= 0))
+ retain_bytes >>= c->sectors_per_block_bits + SECTOR_SHIFT;
+ else
+ retain_bytes /= c->block_size;
+ return retain_bytes;
}
static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
@@ -1662,9 +1634,13 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
int r;
struct dm_bufio_client *c;
unsigned i;
+ char slab_name[27];
- BUG_ON(block_size < 1 << SECTOR_SHIFT ||
- (block_size & (block_size - 1)));
+ if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) {
+ DMERR("%s: block size not specified or is not multiple of 512b", __func__);
+ r = -EINVAL;
+ goto bad_client;
+ }
c = kzalloc(sizeof(*c), GFP_KERNEL);
if (!c) {
@@ -1675,13 +1651,11 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
c->bdev = bdev;
c->block_size = block_size;
- c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT;
- c->pages_per_block_bits = (__ffs(block_size) >= PAGE_SHIFT) ?
- __ffs(block_size) - PAGE_SHIFT : 0;
- c->blocks_per_page_bits = (__ffs(block_size) < PAGE_SHIFT ?
- PAGE_SHIFT - __ffs(block_size) : 0);
+ if (is_power_of_2(block_size))
+ c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT;
+ else
+ c->sectors_per_block_bits = -1;
- c->aux_size = aux_size;
c->alloc_callback = alloc_callback;
c->write_callback = write_callback;
@@ -1694,7 +1668,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
INIT_LIST_HEAD(&c->reserved_buffers);
c->need_reserved_buffers = reserved_buffers;
- c->minimum_buffers = DM_BUFIO_MIN_BUFFERS;
+ dm_bufio_set_minimum_buffers(c, DM_BUFIO_MIN_BUFFERS);
init_waitqueue_head(&c->free_buffer_wait);
c->async_write_error = 0;
@@ -1705,29 +1679,26 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
goto bad_dm_io;
}
- mutex_lock(&dm_bufio_clients_lock);
- if (c->blocks_per_page_bits) {
- if (!DM_BUFIO_CACHE_NAME(c)) {
- DM_BUFIO_CACHE_NAME(c) = kasprintf(GFP_KERNEL, "dm_bufio_cache-%u", c->block_size);
- if (!DM_BUFIO_CACHE_NAME(c)) {
- r = -ENOMEM;
- mutex_unlock(&dm_bufio_clients_lock);
- goto bad;
- }
- }
-
- if (!DM_BUFIO_CACHE(c)) {
- DM_BUFIO_CACHE(c) = kmem_cache_create(DM_BUFIO_CACHE_NAME(c),
- c->block_size,
- c->block_size, 0, NULL);
- if (!DM_BUFIO_CACHE(c)) {
- r = -ENOMEM;
- mutex_unlock(&dm_bufio_clients_lock);
- goto bad;
- }
+ if (block_size <= KMALLOC_MAX_SIZE &&
+ (block_size < PAGE_SIZE || !is_power_of_2(block_size))) {
+ snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", c->block_size);
+ c->slab_cache = kmem_cache_create(slab_name, c->block_size, ARCH_KMALLOC_MINALIGN,
+ SLAB_RECLAIM_ACCOUNT, NULL);
+ if (!c->slab_cache) {
+ r = -ENOMEM;
+ goto bad;
}
}
- mutex_unlock(&dm_bufio_clients_lock);
+ if (aux_size)
+ snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer-%u", aux_size);
+ else
+ snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer");
+ c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size,
+ 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ if (!c->slab_buffer) {
+ r = -ENOMEM;
+ goto bad;
+ }
while (c->need_reserved_buffers) {
struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL);
@@ -1762,6 +1733,8 @@ bad:
list_del(&b->lru_list);
free_buffer(b);
}
+ kmem_cache_destroy(c->slab_cache);
+ kmem_cache_destroy(c->slab_buffer);
dm_io_client_destroy(c->dm_io);
bad_dm_io:
mutex_destroy(&c->lock);
@@ -1808,6 +1781,8 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c)
for (i = 0; i < LIST_SIZE; i++)
BUG_ON(c->n_buffers[i]);
+ kmem_cache_destroy(c->slab_cache);
+ kmem_cache_destroy(c->slab_buffer);
dm_io_client_destroy(c->dm_io);
mutex_destroy(&c->lock);
kfree(c);
@@ -1911,9 +1886,6 @@ static int __init dm_bufio_init(void)
dm_bufio_allocated_vmalloc = 0;
dm_bufio_current_allocated = 0;
- memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches);
- memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names);
-
mem = (__u64)mult_frac(totalram_pages - totalhigh_pages,
DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT;
@@ -1948,17 +1920,10 @@ static int __init dm_bufio_init(void)
static void __exit dm_bufio_exit(void)
{
int bug = 0;
- int i;
cancel_delayed_work_sync(&dm_bufio_work);
destroy_workqueue(dm_bufio_wq);
- for (i = 0; i < ARRAY_SIZE(dm_bufio_caches); i++)
- kmem_cache_destroy(dm_bufio_caches[i]);
-
- for (i = 0; i < ARRAY_SIZE(dm_bufio_cache_names); i++)
- kfree(dm_bufio_cache_names[i]);
-
if (dm_bufio_client_count) {
DMCRIT("%s: dm_bufio_client_count leaked: %d",
__func__, dm_bufio_client_count);
diff --git a/drivers/md/dm-bufio.h b/drivers/md/dm-bufio.h
deleted file mode 100644
index be732d3f8611..000000000000
--- a/drivers/md/dm-bufio.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (C) 2009-2011 Red Hat, Inc.
- *
- * Author: Mikulas Patocka <mpatocka@redhat.com>
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_BUFIO_H
-#define DM_BUFIO_H
-
-#include <linux/blkdev.h>
-#include <linux/types.h>
-
-/*----------------------------------------------------------------*/
-
-struct dm_bufio_client;
-struct dm_buffer;
-
-/*
- * Create a buffered IO cache on a given device
- */
-struct dm_bufio_client *
-dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
- unsigned reserved_buffers, unsigned aux_size,
- void (*alloc_callback)(struct dm_buffer *),
- void (*write_callback)(struct dm_buffer *));
-
-/*
- * Release a buffered IO cache.
- */
-void dm_bufio_client_destroy(struct dm_bufio_client *c);
-
-/*
- * Set the sector range.
- * When this function is called, there must be no I/O in progress on the bufio
- * client.
- */
-void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start);
-
-/*
- * WARNING: to avoid deadlocks, these conditions are observed:
- *
- * - At most one thread can hold at most "reserved_buffers" simultaneously.
- * - Each other threads can hold at most one buffer.
- * - Threads which call only dm_bufio_get can hold unlimited number of
- * buffers.
- */
-
-/*
- * Read a given block from disk. Returns pointer to data. Returns a
- * pointer to dm_buffer that can be used to release the buffer or to make
- * it dirty.
- */
-void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
- struct dm_buffer **bp);
-
-/*
- * Like dm_bufio_read, but return buffer from cache, don't read
- * it. If the buffer is not in the cache, return NULL.
- */
-void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
- struct dm_buffer **bp);
-
-/*
- * Like dm_bufio_read, but don't read anything from the disk. It is
- * expected that the caller initializes the buffer and marks it dirty.
- */
-void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
- struct dm_buffer **bp);
-
-/*
- * Prefetch the specified blocks to the cache.
- * The function starts to read the blocks and returns without waiting for
- * I/O to finish.
- */
-void dm_bufio_prefetch(struct dm_bufio_client *c,
- sector_t block, unsigned n_blocks);
-
-/*
- * Release a reference obtained with dm_bufio_{read,get,new}. The data
- * pointer and dm_buffer pointer is no longer valid after this call.
- */
-void dm_bufio_release(struct dm_buffer *b);
-
-/*
- * Mark a buffer dirty. It should be called after the buffer is modified.
- *
- * In case of memory pressure, the buffer may be written after
- * dm_bufio_mark_buffer_dirty, but before dm_bufio_write_dirty_buffers. So
- * dm_bufio_write_dirty_buffers guarantees that the buffer is on-disk but
- * the actual writing may occur earlier.
- */
-void dm_bufio_mark_buffer_dirty(struct dm_buffer *b);
-
-/*
- * Mark a part of the buffer dirty.
- *
- * The specified part of the buffer is scheduled to be written. dm-bufio may
- * write the specified part of the buffer or it may write a larger superset.
- */
-void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b,
- unsigned start, unsigned end);
-
-/*
- * Initiate writing of dirty buffers, without waiting for completion.
- */
-void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
-
-/*
- * Write all dirty buffers. Guarantees that all dirty buffers created prior
- * to this call are on disk when this call exits.
- */
-int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c);
-
-/*
- * Send an empty write barrier to the device to flush hardware disk cache.
- */
-int dm_bufio_issue_flush(struct dm_bufio_client *c);
-
-/*
- * Like dm_bufio_release but also move the buffer to the new
- * block. dm_bufio_write_dirty_buffers is needed to commit the new block.
- */
-void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block);
-
-/*
- * Free the given buffer.
- * This is just a hint, if the buffer is in use or dirty, this function
- * does nothing.
- */
-void dm_bufio_forget(struct dm_bufio_client *c, sector_t block);
-
-/*
- * Set the minimum number of buffers before cleanup happens.
- */
-void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n);
-
-unsigned dm_bufio_get_block_size(struct dm_bufio_client *c);
-sector_t dm_bufio_get_device_size(struct dm_bufio_client *c);
-sector_t dm_bufio_get_block_number(struct dm_buffer *b);
-void *dm_bufio_get_block_data(struct dm_buffer *b);
-void *dm_bufio_get_aux_data(struct dm_buffer *b);
-struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b);
-
-/*----------------------------------------------------------------*/
-
-#endif
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 47407e43b96a..da208638fba4 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -3387,7 +3387,8 @@ static int process_invalidate_cblocks_message(struct cache *cache, unsigned coun
*
* The key migration_threshold is supported by the cache target core.
*/
-static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
+static int cache_message(struct dm_target *ti, unsigned argc, char **argv,
+ char *result, unsigned maxlen)
{
struct cache *cache = ti->private;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 8168f737590e..44ff473dab3e 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -148,6 +148,8 @@ struct crypt_config {
mempool_t *tag_pool;
unsigned tag_pool_max_sectors;
+ struct percpu_counter n_allocated_pages;
+
struct bio_set *bs;
struct mutex bio_alloc_lock;
@@ -219,6 +221,12 @@ struct crypt_config {
#define MAX_TAG_SIZE 480
#define POOL_ENTRY_SIZE 512
+static DEFINE_SPINLOCK(dm_crypt_clients_lock);
+static unsigned dm_crypt_clients_n = 0;
+static volatile unsigned long dm_crypt_pages_per_client;
+#define DM_CRYPT_MEMORY_PERCENT 2
+#define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_PAGES * 16)
+
static void clone_init(struct dm_crypt_io *, struct bio *);
static void kcryptd_queue_crypt(struct dm_crypt_io *io);
static struct scatterlist *crypt_get_sg_data(struct crypt_config *cc,
@@ -2155,6 +2163,43 @@ static int crypt_wipe_key(struct crypt_config *cc)
return r;
}
+static void crypt_calculate_pages_per_client(void)
+{
+ unsigned long pages = (totalram_pages - totalhigh_pages) * DM_CRYPT_MEMORY_PERCENT / 100;
+
+ if (!dm_crypt_clients_n)
+ return;
+
+ pages /= dm_crypt_clients_n;
+ if (pages < DM_CRYPT_MIN_PAGES_PER_CLIENT)
+ pages = DM_CRYPT_MIN_PAGES_PER_CLIENT;
+ dm_crypt_pages_per_client = pages;
+}
+
+static void *crypt_page_alloc(gfp_t gfp_mask, void *pool_data)
+{
+ struct crypt_config *cc = pool_data;
+ struct page *page;
+
+ if (unlikely(percpu_counter_compare(&cc->n_allocated_pages, dm_crypt_pages_per_client) >= 0) &&
+ likely(gfp_mask & __GFP_NORETRY))
+ return NULL;
+
+ page = alloc_page(gfp_mask);
+ if (likely(page != NULL))
+ percpu_counter_add(&cc->n_allocated_pages, 1);
+
+ return page;
+}
+
+static void crypt_page_free(void *page, void *pool_data)
+{
+ struct crypt_config *cc = pool_data;
+
+ __free_page(page);
+ percpu_counter_sub(&cc->n_allocated_pages, 1);
+}
+
static void crypt_dtr(struct dm_target *ti)
{
struct crypt_config *cc = ti->private;
@@ -2181,6 +2226,10 @@ static void crypt_dtr(struct dm_target *ti)
mempool_destroy(cc->req_pool);
mempool_destroy(cc->tag_pool);
+ if (cc->page_pool)
+ WARN_ON(percpu_counter_sum(&cc->n_allocated_pages) != 0);
+ percpu_counter_destroy(&cc->n_allocated_pages);
+
if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
cc->iv_gen_ops->dtr(cc);
@@ -2197,6 +2246,12 @@ static void crypt_dtr(struct dm_target *ti)
/* Must zero key material before freeing */
kzfree(cc);
+
+ spin_lock(&dm_crypt_clients_lock);
+ WARN_ON(!dm_crypt_clients_n);
+ dm_crypt_clients_n--;
+ crypt_calculate_pages_per_client();
+ spin_unlock(&dm_crypt_clients_lock);
}
static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
@@ -2644,6 +2699,15 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->private = cc;
+ spin_lock(&dm_crypt_clients_lock);
+ dm_crypt_clients_n++;
+ crypt_calculate_pages_per_client();
+ spin_unlock(&dm_crypt_clients_lock);
+
+ ret = percpu_counter_init(&cc->n_allocated_pages, 0, GFP_KERNEL);
+ if (ret < 0)
+ goto bad;
+
/* Optional parameters need to be read before cipher constructor */
if (argc > 5) {
ret = crypt_ctr_optional(ti, argc - 5, &argv[5]);
@@ -2698,7 +2762,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + additional_req_size,
ARCH_KMALLOC_MINALIGN);
- cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0);
+ cc->page_pool = mempool_create(BIO_MAX_PAGES, crypt_page_alloc, crypt_page_free, cc);
if (!cc->page_pool) {
ti->error = "Cannot allocate page mempool";
goto bad;
@@ -2942,7 +3006,8 @@ static void crypt_resume(struct dm_target *ti)
* key set <key>
* key wipe
*/
-static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
+static int crypt_message(struct dm_target *ti, unsigned argc, char **argv,
+ char *result, unsigned maxlen)
{
struct crypt_config *cc = ti->private;
int key_size, ret = -EINVAL;
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index 73a5c198113a..8e48920a3ffa 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -1635,7 +1635,8 @@ err:
DMEMIT("Error");
}
-static int era_message(struct dm_target *ti, unsigned argc, char **argv)
+static int era_message(struct dm_target *ti, unsigned argc, char **argv,
+ char *result, unsigned maxlen)
{
struct era *era = ti->private;
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 1b907b15f5c3..21d126a5078c 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -442,8 +442,7 @@ static void flakey_status(struct dm_target *ti, status_type_t type,
}
}
-static int flakey_prepare_ioctl(struct dm_target *ti,
- struct block_device **bdev, fmode_t *mode)
+static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
{
struct flakey_c *fc = ti->private;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 46d7c8749222..77d9fe58dae2 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -18,7 +18,7 @@
#include <crypto/hash.h>
#include <crypto/skcipher.h>
#include <linux/async_tx.h>
-#include "dm-bufio.h"
+#include <linux/dm-bufio.h>
#define DM_MSG_PREFIX "integrity"
@@ -2548,6 +2548,9 @@ static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error,
*error = error_key;
return r;
}
+ } else if (crypto_shash_get_flags(*hash) & CRYPTO_TFM_NEED_KEY) {
+ *error = error_key;
+ return -ENOKEY;
}
}
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index a89fd8f44453..5acf77de5945 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1595,7 +1595,7 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para
DMWARN("Target message sector outside device.");
r = -EINVAL;
} else if (ti->type->message)
- r = ti->type->message(ti, argc, argv);
+ r = ti->type->message(ti, argc, argv, result, maxlen);
else {
DMWARN("Target type does not support messages");
r = -EINVAL;
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index d5f8eff7c11d..775c06d953b7 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -59,6 +59,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
+ ti->num_secure_erase_bios = 1;
ti->num_write_same_bios = 1;
ti->num_write_zeroes_bios = 1;
ti->private = lc;
@@ -129,8 +130,7 @@ static void linear_status(struct dm_target *ti, status_type_t type,
}
}
-static int linear_prepare_ioctl(struct dm_target *ti,
- struct block_device **bdev, fmode_t *mode)
+static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
{
struct linear_c *lc = (struct linear_c *) ti->private;
struct dm_dev *dev = lc->dev;
@@ -154,6 +154,7 @@ static int linear_iterate_devices(struct dm_target *ti,
return fn(ti, lc->dev, lc->start, ti->len, data);
}
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn)
{
@@ -184,6 +185,11 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
+#else
+#define linear_dax_direct_access NULL
+#define linear_dax_copy_from_iter NULL
+#endif
+
static struct target_type linear_target = {
.name = "linear",
.version = {1, 4, 0},
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 3362d866793b..c90c7c08a77f 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -52,10 +52,11 @@
* in fact we want to do the data and the discard in the order that they
* completed.
*/
-#define LOG_FLUSH_FLAG (1 << 0)
-#define LOG_FUA_FLAG (1 << 1)
-#define LOG_DISCARD_FLAG (1 << 2)
-#define LOG_MARK_FLAG (1 << 3)
+#define LOG_FLUSH_FLAG (1 << 0)
+#define LOG_FUA_FLAG (1 << 1)
+#define LOG_DISCARD_FLAG (1 << 2)
+#define LOG_MARK_FLAG (1 << 3)
+#define LOG_METADATA_FLAG (1 << 4)
#define WRITE_LOG_VERSION 1ULL
#define WRITE_LOG_MAGIC 0x6a736677736872ULL
@@ -610,51 +611,6 @@ static int log_mark(struct log_writes_c *lc, char *data)
return 0;
}
-static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
- struct iov_iter *i)
-{
- struct pending_block *block;
-
- if (!bytes)
- return 0;
-
- block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
- if (!block) {
- DMERR("Error allocating dax pending block");
- return -ENOMEM;
- }
-
- block->data = kzalloc(bytes, GFP_KERNEL);
- if (!block->data) {
- DMERR("Error allocating dax data space");
- kfree(block);
- return -ENOMEM;
- }
-
- /* write data provided via the iterator */
- if (!copy_from_iter(block->data, bytes, i)) {
- DMERR("Error copying dax data");
- kfree(block->data);
- kfree(block);
- return -EIO;
- }
-
- /* rewind the iterator so that the block driver can use it */
- iov_iter_revert(i, bytes);
-
- block->datalen = bytes;
- block->sector = bio_to_dev_sectors(lc, sector);
- block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
-
- atomic_inc(&lc->pending_blocks);
- spin_lock_irq(&lc->blocks_lock);
- list_add_tail(&block->list, &lc->unflushed_blocks);
- spin_unlock_irq(&lc->blocks_lock);
- wake_up_process(lc->log_kthread);
-
- return 0;
-}
-
static void log_writes_dtr(struct dm_target *ti)
{
struct log_writes_c *lc = ti->private;
@@ -699,6 +655,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
bool flush_bio = (bio->bi_opf & REQ_PREFLUSH);
bool fua_bio = (bio->bi_opf & REQ_FUA);
bool discard_bio = (bio_op(bio) == REQ_OP_DISCARD);
+ bool meta_bio = (bio->bi_opf & REQ_META);
pb->block = NULL;
@@ -743,6 +700,8 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
block->flags |= LOG_FUA_FLAG;
if (discard_bio)
block->flags |= LOG_DISCARD_FLAG;
+ if (meta_bio)
+ block->flags |= LOG_METADATA_FLAG;
block->sector = bio_to_dev_sectors(lc, bio->bi_iter.bi_sector);
block->nr_sectors = bio_to_dev_sectors(lc, bio_sectors(bio));
@@ -860,7 +819,7 @@ static void log_writes_status(struct dm_target *ti, status_type_t type,
}
static int log_writes_prepare_ioctl(struct dm_target *ti,
- struct block_device **bdev, fmode_t *mode)
+ struct block_device **bdev)
{
struct log_writes_c *lc = ti->private;
struct dm_dev *dev = lc->dev;
@@ -887,7 +846,8 @@ static int log_writes_iterate_devices(struct dm_target *ti,
* Messages supported:
* mark <mark data> - specify the marked data.
*/
-static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv)
+static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv,
+ char *result, unsigned maxlen)
{
int r = -EINVAL;
struct log_writes_c *lc = ti->private;
@@ -920,6 +880,52 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit
limits->io_min = limits->physical_block_size;
}
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
+static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
+ struct iov_iter *i)
+{
+ struct pending_block *block;
+
+ if (!bytes)
+ return 0;
+
+ block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
+ if (!block) {
+ DMERR("Error allocating dax pending block");
+ return -ENOMEM;
+ }
+
+ block->data = kzalloc(bytes, GFP_KERNEL);
+ if (!block->data) {
+ DMERR("Error allocating dax data space");
+ kfree(block);
+ return -ENOMEM;
+ }
+
+ /* write data provided via the iterator */
+ if (!copy_from_iter(block->data, bytes, i)) {
+ DMERR("Error copying dax data");
+ kfree(block->data);
+ kfree(block);
+ return -EIO;
+ }
+
+ /* rewind the iterator so that the block driver can use it */
+ iov_iter_revert(i, bytes);
+
+ block->datalen = bytes;
+ block->sector = bio_to_dev_sectors(lc, sector);
+ block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
+
+ atomic_inc(&lc->pending_blocks);
+ spin_lock_irq(&lc->blocks_lock);
+ list_add_tail(&block->list, &lc->unflushed_blocks);
+ spin_unlock_irq(&lc->blocks_lock);
+ wake_up_process(lc->log_kthread);
+
+ return 0;
+}
+
static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn)
{
@@ -956,6 +962,10 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
dax_copy:
return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
}
+#else
+#define log_writes_dax_direct_access NULL
+#define log_writes_dax_copy_from_iter NULL
+#endif
static struct target_type log_writes_target = {
.name = "log-writes",
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index a6b7baf31cdd..203a0419d2b0 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -714,7 +714,7 @@ static void process_queued_bios(struct work_struct *work)
case DM_MAPIO_REMAPPED:
generic_make_request(bio);
break;
- case 0:
+ case DM_MAPIO_SUBMITTED:
break;
default:
WARN_ONCE(true, "__multipath_map_bio() returned %d\n", r);
@@ -1811,7 +1811,8 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
spin_unlock_irqrestore(&m->lock, flags);
}
-static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
+static int multipath_message(struct dm_target *ti, unsigned argc, char **argv,
+ char *result, unsigned maxlen)
{
int r = -EINVAL;
struct dm_dev *dev;
@@ -1875,7 +1876,7 @@ out:
}
static int multipath_prepare_ioctl(struct dm_target *ti,
- struct block_device **bdev, fmode_t *mode)
+ struct block_device **bdev)
{
struct multipath *m = ti->private;
struct pgpath *current_pgpath;
@@ -1888,7 +1889,6 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
if (current_pgpath) {
if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) {
*bdev = current_pgpath->path.dev->bdev;
- *mode = current_pgpath->path.dev->mode;
r = 0;
} else {
/* pg_init has not started or completed */
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index c1d1034ff7b7..6f823f44b4aa 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1370,19 +1370,18 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
* In device-mapper, we specify things in sectors, but
* MD records this value in kB
*/
- value /= 2;
- if (value > COUNTER_MAX) {
+ if (value < 0 || value / 2 > COUNTER_MAX) {
rs->ti->error = "Max write-behind limit out of range";
return -EINVAL;
}
- rs->md.bitmap_info.max_write_behind = value;
+ rs->md.bitmap_info.max_write_behind = value / 2;
} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP))) {
if (test_and_set_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags)) {
rs->ti->error = "Only one daemon_sleep argument pair allowed";
return -EINVAL;
}
- if (!value || (value > MAX_SCHEDULE_TIMEOUT)) {
+ if (value < 0) {
rs->ti->error = "daemon sleep period out of range";
return -EINVAL;
}
@@ -1424,27 +1423,33 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
return -EINVAL;
}
+ if (value < 0) {
+ rs->ti->error = "Bogus stripe cache entries value";
+ return -EINVAL;
+ }
rs->stripe_cache_entries = value;
} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE))) {
if (test_and_set_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) {
rs->ti->error = "Only one min_recovery_rate argument pair allowed";
return -EINVAL;
}
- if (value > INT_MAX) {
+
+ if (value < 0) {
rs->ti->error = "min_recovery_rate out of range";
return -EINVAL;
}
- rs->md.sync_speed_min = (int)value;
+ rs->md.sync_speed_min = value;
} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE))) {
if (test_and_set_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags)) {
rs->ti->error = "Only one max_recovery_rate argument pair allowed";
return -EINVAL;
}
- if (value > INT_MAX) {
+
+ if (value < 0) {
rs->ti->error = "max_recovery_rate out of range";
return -EINVAL;
}
- rs->md.sync_speed_max = (int)value;
+ rs->md.sync_speed_max = value;
} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_REGION_SIZE))) {
if (test_and_set_bit(__CTR_FLAG_REGION_SIZE, &rs->ctr_flags)) {
rs->ti->error = "Only one region_size argument pair allowed";
@@ -1490,6 +1495,12 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
return -EINVAL;
}
+ if (rs->md.sync_speed_max &&
+ rs->md.sync_speed_min > rs->md.sync_speed_max) {
+ rs->ti->error = "Bogus recovery rates";
+ return -EINVAL;
+ }
+
if (validate_region_size(rs, region_size))
return -EINVAL;
@@ -3408,7 +3419,8 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
} else {
- if (!test_bit(MD_RECOVERY_INTR, &recovery) &&
+ if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) &&
+ !test_bit(MD_RECOVERY_INTR, &recovery) &&
(test_bit(MD_RECOVERY_NEEDED, &recovery) ||
test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
test_bit(MD_RECOVERY_RUNNING, &recovery)))
@@ -3663,7 +3675,8 @@ static void raid_status(struct dm_target *ti, status_type_t type,
}
}
-static int raid_message(struct dm_target *ti, unsigned int argc, char **argv)
+static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
+ char *result, unsigned maxlen)
{
struct raid_set *rs = ti->private;
struct mddev *mddev = &rs->md;
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index c5534d294773..3c50c4e4da8f 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -14,7 +14,7 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/dm-io.h>
-#include "dm-bufio.h"
+#include <linux/dm-bufio.h>
#define DM_MSG_PREFIX "persistent snapshot"
#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index b5e892149c54..fe7fb9b1aec3 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -169,6 +169,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_flush_bios = stripes;
ti->num_discard_bios = stripes;
+ ti->num_secure_erase_bios = stripes;
ti->num_write_same_bios = stripes;
ti->num_write_zeroes_bios = stripes;
@@ -295,6 +296,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
if (unlikely(bio_op(bio) == REQ_OP_DISCARD) ||
+ unlikely(bio_op(bio) == REQ_OP_SECURE_ERASE) ||
unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES) ||
unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) {
target_bio_nr = dm_bio_get_target_bio_nr(bio);
@@ -311,6 +313,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn)
{
@@ -351,6 +354,11 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
+#else
+#define stripe_dax_direct_access NULL
+#define stripe_dax_copy_from_iter NULL
+#endif
+
/*
* Stripe status:
*
@@ -368,7 +376,6 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
{
struct stripe_c *sc = (struct stripe_c *) ti->private;
- char buffer[sc->stripes + 1];
unsigned int sz = 0;
unsigned int i;
@@ -377,11 +384,12 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
DMEMIT("%d ", sc->stripes);
for (i = 0; i < sc->stripes; i++) {
DMEMIT("%s ", sc->stripe[i].dev->name);
- buffer[i] = atomic_read(&(sc->stripe[i].error_count)) ?
- 'D' : 'A';
}
- buffer[i] = '\0';
- DMEMIT("1 %s", buffer);
+ DMEMIT("1 ");
+ for (i = 0; i < sc->stripes; i++) {
+ DMEMIT("%c", atomic_read(&(sc->stripe[i].error_count)) ?
+ 'D' : 'A');
+ }
break;
case STATUSTYPE_TABLE:
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index 8d0ba879777e..7924a6a33ddc 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -466,7 +466,8 @@ static int process_set_region_mappings(struct switch_ctx *sctx,
*
* Only set_region_mappings is supported.
*/
-static int switch_message(struct dm_target *ti, unsigned argc, char **argv)
+static int switch_message(struct dm_target *ti, unsigned argc, char **argv,
+ char *result, unsigned maxlen)
{
static DEFINE_MUTEX(message_mutex);
@@ -511,8 +512,7 @@ static void switch_status(struct dm_target *ti, status_type_t type,
*
* Passthrough all ioctls to the path for sector 0
*/
-static int switch_prepare_ioctl(struct dm_target *ti,
- struct block_device **bdev, fmode_t *mode)
+static int switch_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
{
struct switch_ctx *sctx = ti->private;
unsigned path_nr;
@@ -520,7 +520,6 @@ static int switch_prepare_ioctl(struct dm_target *ti,
path_nr = switch_get_path_nr(sctx, 0);
*bdev = sctx->path_list[path_nr].dmdev->bdev;
- *mode = sctx->path_list[path_nr].dmdev->mode;
/*
* Only pass ioctls through if the device sizes match exactly.
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 7eb3e2a3c07d..0589a4da12bb 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1846,6 +1846,34 @@ static bool dm_table_supports_discards(struct dm_table *t)
return true;
}
+static int device_not_secure_erase_capable(struct dm_target *ti,
+ struct dm_dev *dev, sector_t start,
+ sector_t len, void *data)
+{
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+
+ return q && !blk_queue_secure_erase(q);
+}
+
+static bool dm_table_supports_secure_erase(struct dm_table *t)
+{
+ struct dm_target *ti;
+ unsigned int i;
+
+ for (i = 0; i < dm_table_get_num_targets(t); i++) {
+ ti = dm_table_get_target(t, i);
+
+ if (!ti->num_secure_erase_bios)
+ return false;
+
+ if (!ti->type->iterate_devices ||
+ ti->type->iterate_devices(ti, device_not_secure_erase_capable, NULL))
+ return false;
+ }
+
+ return true;
+}
+
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
@@ -1857,7 +1885,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
q->limits = *limits;
if (!dm_table_supports_discards(t)) {
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
/* Must also clear discard limits... */
q->limits.max_discard_sectors = 0;
q->limits.max_hw_discard_sectors = 0;
@@ -1865,7 +1893,10 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
q->limits.discard_alignment = 0;
q->limits.discard_misaligned = 0;
} else
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
+
+ if (dm_table_supports_secure_erase(t))
+ blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
wc = true;
@@ -1875,15 +1906,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
blk_queue_write_cache(q, wc, fua);
if (dm_table_supports_dax(t))
- queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
+ blk_queue_flag_set(QUEUE_FLAG_DAX, q);
if (dm_table_supports_dax_write_cache(t))
dax_write_cache(t->md->dax_dev, true);
/* Ensure that all underlying devices are non-rotational. */
if (dm_table_all_devices_attribute(t, device_is_nonrot))
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
else
- queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
+ blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
if (!dm_table_supports_write_same(t))
q->limits.max_write_same_sectors = 0;
@@ -1891,9 +1922,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
q->limits.max_write_zeroes_sectors = 0;
if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
- queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
+ blk_queue_flag_clear(QUEUE_FLAG_NO_SG_MERGE, q);
else
- queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
+ blk_queue_flag_set(QUEUE_FLAG_NO_SG_MERGE, q);
dm_table_verify_integrity(t);
@@ -1904,7 +1935,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
* have it set.
*/
if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
- queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
}
unsigned int dm_table_get_num_targets(struct dm_table *t)
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index c0d7e60820c4..314d17ca6466 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -16,8 +16,6 @@
static LIST_HEAD(_targets);
static DECLARE_RWSEM(_lock);
-#define DM_MOD_NAME_SIZE 32
-
static inline struct target_type *__find_target_type(const char *name)
{
struct target_type *tt;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 629c555890c1..b11107497d2e 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3705,7 +3705,8 @@ static int process_release_metadata_snap_mesg(unsigned argc, char **argv, struct
* reserve_metadata_snap
* release_metadata_snap
*/
-static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
+static int pool_message(struct dm_target *ti, unsigned argc, char **argv,
+ char *result, unsigned maxlen)
{
int r = -EINVAL;
struct pool_c *pt = ti->private;
diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c
index 65f838fa2e99..954b7ab4e684 100644
--- a/drivers/md/dm-unstripe.c
+++ b/drivers/md/dm-unstripe.c
@@ -7,12 +7,6 @@
#include "dm.h"
#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/slab.h>
-#include <linux/bitops.h>
-#include <linux/device-mapper.h>
struct unstripe_c {
struct dm_dev *dev;
@@ -69,12 +63,6 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto err;
}
- // FIXME: must support non power of 2 chunk_size, dm-stripe.c does
- if (!is_power_of_2(uc->chunk_size)) {
- ti->error = "Non power of 2 chunk_size is not supported yet";
- goto err;
- }
-
if (kstrtouint(argv[2], 10, &uc->unstripe)) {
ti->error = "Invalid stripe number";
goto err;
@@ -98,7 +86,7 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
uc->unstripe_offset = uc->unstripe * uc->chunk_size;
uc->unstripe_width = (uc->stripes - 1) * uc->chunk_size;
- uc->chunk_shift = fls(uc->chunk_size) - 1;
+ uc->chunk_shift = is_power_of_2(uc->chunk_size) ? fls(uc->chunk_size) - 1 : 0;
tmp_len = ti->len;
if (sector_div(tmp_len, uc->chunk_size)) {
@@ -129,14 +117,18 @@ static sector_t map_to_core(struct dm_target *ti, struct bio *bio)
{
struct unstripe_c *uc = ti->private;
sector_t sector = bio->bi_iter.bi_sector;
+ sector_t tmp_sector = sector;
/* Shift us up to the right "row" on the stripe */
- sector += uc->unstripe_width * (sector >> uc->chunk_shift);
+ if (uc->chunk_shift)
+ tmp_sector >>= uc->chunk_shift;
+ else
+ sector_div(tmp_sector, uc->chunk_size);
- /* Account for what stripe we're operating on */
- sector += uc->unstripe_offset;
+ sector += uc->unstripe_width * tmp_sector;
- return sector;
+ /* Account for what stripe we're operating on */
+ return sector + uc->unstripe_offset;
}
static int unstripe_map(struct dm_target *ti, struct bio *bio)
@@ -185,7 +177,7 @@ static void unstripe_io_hints(struct dm_target *ti,
static struct target_type unstripe_target = {
.name = "unstriped",
- .version = {1, 0, 0},
+ .version = {1, 1, 0},
.module = THIS_MODULE,
.ctr = unstripe_ctr,
.dtr = unstripe_dtr,
@@ -197,13 +189,7 @@ static struct target_type unstripe_target = {
static int __init dm_unstripe_init(void)
{
- int r;
-
- r = dm_register_target(&unstripe_target);
- if (r < 0)
- DMERR("target registration failed");
-
- return r;
+ return dm_register_target(&unstripe_target);
}
static void __exit dm_unstripe_exit(void)
@@ -215,5 +201,6 @@ module_init(dm_unstripe_init);
module_exit(dm_unstripe_exit);
MODULE_DESCRIPTION(DM_NAME " unstriped target");
+MODULE_ALIAS("dm-unstriped");
MODULE_AUTHOR("Scott Bauer <scott.bauer@intel.com>");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index aedb8222836b..fc893f636a98 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -32,6 +32,7 @@
#define DM_VERITY_OPT_LOGGING "ignore_corruption"
#define DM_VERITY_OPT_RESTART "restart_on_corruption"
#define DM_VERITY_OPT_IGN_ZEROES "ignore_zero_blocks"
+#define DM_VERITY_OPT_AT_MOST_ONCE "check_at_most_once"
#define DM_VERITY_OPTS_MAX (2 + DM_VERITY_OPTS_FEC)
@@ -347,8 +348,8 @@ out:
/*
* Calculates the digest for the given bio
*/
-int verity_for_io_block(struct dm_verity *v, struct dm_verity_io *io,
- struct bvec_iter *iter, struct crypto_wait *wait)
+static int verity_for_io_block(struct dm_verity *v, struct dm_verity_io *io,
+ struct bvec_iter *iter, struct crypto_wait *wait)
{
unsigned int todo = 1 << v->data_dev_block_bits;
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
@@ -433,6 +434,18 @@ static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io,
}
/*
+ * Moves the bio iter one data block forward.
+ */
+static inline void verity_bv_skip_block(struct dm_verity *v,
+ struct dm_verity_io *io,
+ struct bvec_iter *iter)
+{
+ struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
+
+ bio_advance_iter(bio, iter, 1 << v->data_dev_block_bits);
+}
+
+/*
* Verify one "dm_verity_io" structure.
*/
static int verity_verify_io(struct dm_verity_io *io)
@@ -445,9 +458,16 @@ static int verity_verify_io(struct dm_verity_io *io)
for (b = 0; b < io->n_blocks; b++) {
int r;
+ sector_t cur_block = io->block + b;
struct ahash_request *req = verity_io_hash_req(v, io);
- r = verity_hash_for_block(v, io, io->block + b,
+ if (v->validated_blocks &&
+ likely(test_bit(cur_block, v->validated_blocks))) {
+ verity_bv_skip_block(v, io, &io->iter);
+ continue;
+ }
+
+ r = verity_hash_for_block(v, io, cur_block,
verity_io_want_digest(v, io),
&is_zero);
if (unlikely(r < 0))
@@ -481,13 +501,16 @@ static int verity_verify_io(struct dm_verity_io *io)
return r;
if (likely(memcmp(verity_io_real_digest(v, io),
- verity_io_want_digest(v, io), v->digest_size) == 0))
+ verity_io_want_digest(v, io), v->digest_size) == 0)) {
+ if (v->validated_blocks)
+ set_bit(cur_block, v->validated_blocks);
continue;
+ }
else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA,
- io->block + b, NULL, &start) == 0)
+ cur_block, NULL, &start) == 0)
continue;
else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
- io->block + b))
+ cur_block))
return -EIO;
}
@@ -673,6 +696,8 @@ static void verity_status(struct dm_target *ti, status_type_t type,
args += DM_VERITY_OPTS_FEC;
if (v->zero_digest)
args++;
+ if (v->validated_blocks)
+ args++;
if (!args)
return;
DMEMIT(" %u", args);
@@ -691,13 +716,14 @@ static void verity_status(struct dm_target *ti, status_type_t type,
}
if (v->zero_digest)
DMEMIT(" " DM_VERITY_OPT_IGN_ZEROES);
+ if (v->validated_blocks)
+ DMEMIT(" " DM_VERITY_OPT_AT_MOST_ONCE);
sz = verity_fec_status_table(v, sz, result, maxlen);
break;
}
}
-static int verity_prepare_ioctl(struct dm_target *ti,
- struct block_device **bdev, fmode_t *mode)
+static int verity_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
{
struct dm_verity *v = ti->private;
@@ -740,6 +766,7 @@ static void verity_dtr(struct dm_target *ti)
if (v->bufio)
dm_bufio_client_destroy(v->bufio);
+ kvfree(v->validated_blocks);
kfree(v->salt);
kfree(v->root_digest);
kfree(v->zero_digest);
@@ -760,6 +787,26 @@ static void verity_dtr(struct dm_target *ti)
kfree(v);
}
+static int verity_alloc_most_once(struct dm_verity *v)
+{
+ struct dm_target *ti = v->ti;
+
+ /* the bitset can only handle INT_MAX blocks */
+ if (v->data_blocks > INT_MAX) {
+ ti->error = "device too large to use check_at_most_once";
+ return -E2BIG;
+ }
+
+ v->validated_blocks = kvzalloc(BITS_TO_LONGS(v->data_blocks) *
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!v->validated_blocks) {
+ ti->error = "failed to allocate bitset for check_at_most_once";
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static int verity_alloc_zero_digest(struct dm_verity *v)
{
int r = -ENOMEM;
@@ -829,6 +876,12 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
}
continue;
+ } else if (!strcasecmp(arg_name, DM_VERITY_OPT_AT_MOST_ONCE)) {
+ r = verity_alloc_most_once(v);
+ if (r)
+ return r;
+ continue;
+
} else if (verity_is_fec_opt_arg(arg_name)) {
r = verity_fec_parse_opt_args(as, v, &argc, arg_name);
if (r)
@@ -1096,7 +1149,7 @@ bad:
static struct target_type verity_target = {
.name = "verity",
- .version = {1, 3, 0},
+ .version = {1, 4, 0},
.module = THIS_MODULE,
.ctr = verity_ctr,
.dtr = verity_dtr,
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index b675bc015512..3441c10b840c 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -12,7 +12,7 @@
#ifndef DM_VERITY_H
#define DM_VERITY_H
-#include "dm-bufio.h"
+#include <linux/dm-bufio.h>
#include <linux/device-mapper.h>
#include <crypto/hash.h>
@@ -63,6 +63,7 @@ struct dm_verity {
sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
struct dm_verity_fec *fec; /* forward error correction */
+ unsigned long *validated_blocks; /* bitset blocks validated */
};
struct dm_verity_io {
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index caff02caf083..e73b0776683c 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -898,8 +898,7 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
/*
* Pass on ioctl to the backend device.
*/
-static int dmz_prepare_ioctl(struct dm_target *ti,
- struct block_device **bdev, fmode_t *mode)
+static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
{
struct dmz_target *dmz = ti->private;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 353ea0ede091..4ea404dbcf0b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -458,67 +458,56 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return dm_get_geometry(md, geo);
}
-static char *_dm_claim_ptr = "I belong to device-mapper";
-
-static int dm_get_bdev_for_ioctl(struct mapped_device *md,
- struct block_device **bdev,
- fmode_t *mode)
+static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
+ struct block_device **bdev)
+ __acquires(md->io_barrier)
{
struct dm_target *tgt;
struct dm_table *map;
- int srcu_idx, r, r2;
+ int r;
retry:
r = -ENOTTY;
- map = dm_get_live_table(md, &srcu_idx);
+ map = dm_get_live_table(md, srcu_idx);
if (!map || !dm_table_get_size(map))
- goto out;
+ return r;
/* We only support devices that have a single target */
if (dm_table_get_num_targets(map) != 1)
- goto out;
+ return r;
tgt = dm_table_get_target(map, 0);
if (!tgt->type->prepare_ioctl)
- goto out;
-
- if (dm_suspended_md(md)) {
- r = -EAGAIN;
- goto out;
- }
-
- r = tgt->type->prepare_ioctl(tgt, bdev, mode);
- if (r < 0)
- goto out;
-
- bdgrab(*bdev);
- r2 = blkdev_get(*bdev, *mode, _dm_claim_ptr);
- if (r2 < 0) {
- r = r2;
- goto out;
- }
+ return r;
- dm_put_live_table(md, srcu_idx);
- return r;
+ if (dm_suspended_md(md))
+ return -EAGAIN;
-out:
- dm_put_live_table(md, srcu_idx);
+ r = tgt->type->prepare_ioctl(tgt, bdev);
if (r == -ENOTCONN && !fatal_signal_pending(current)) {
+ dm_put_live_table(md, *srcu_idx);
msleep(10);
goto retry;
}
+
return r;
}
+static void dm_unprepare_ioctl(struct mapped_device *md, int srcu_idx)
+ __releases(md->io_barrier)
+{
+ dm_put_live_table(md, srcu_idx);
+}
+
static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct mapped_device *md = bdev->bd_disk->private_data;
- int r;
+ int r, srcu_idx;
- r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
+ r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
if (r < 0)
- return r;
+ goto out;
if (r > 0) {
/*
@@ -536,7 +525,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
out:
- blkdev_put(bdev, mode);
+ dm_unprepare_ioctl(md, srcu_idx);
return r;
}
@@ -710,6 +699,8 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
rcu_read_unlock();
}
+static char *_dm_claim_ptr = "I belong to device-mapper";
+
/*
* Open a table device so we can use it as a map destination.
*/
@@ -1414,6 +1405,11 @@ static unsigned get_num_discard_bios(struct dm_target *ti)
return ti->num_discard_bios;
}
+static unsigned get_num_secure_erase_bios(struct dm_target *ti)
+{
+ return ti->num_secure_erase_bios;
+}
+
static unsigned get_num_write_same_bios(struct dm_target *ti)
{
return ti->num_write_same_bios;
@@ -1467,6 +1463,11 @@ static int __send_discard(struct clone_info *ci, struct dm_target *ti)
is_split_required_for_discard);
}
+static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti)
+{
+ return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios, NULL);
+}
+
static int __send_write_same(struct clone_info *ci, struct dm_target *ti)
{
return __send_changing_extent_only(ci, ti, get_num_write_same_bios, NULL);
@@ -1477,6 +1478,25 @@ static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios, NULL);
}
+static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti,
+ int *result)
+{
+ struct bio *bio = ci->bio;
+
+ if (bio_op(bio) == REQ_OP_DISCARD)
+ *result = __send_discard(ci, ti);
+ else if (bio_op(bio) == REQ_OP_SECURE_ERASE)
+ *result = __send_secure_erase(ci, ti);
+ else if (bio_op(bio) == REQ_OP_WRITE_SAME)
+ *result = __send_write_same(ci, ti);
+ else if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
+ *result = __send_write_zeroes(ci, ti);
+ else
+ return false;
+
+ return true;
+}
+
/*
* Select the correct strategy for processing a non-flush bio.
*/
@@ -1491,12 +1511,8 @@ static int __split_and_process_non_flush(struct clone_info *ci)
if (!dm_target_is_valid(ti))
return -EIO;
- if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
- return __send_discard(ci, ti);
- else if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
- return __send_write_same(ci, ti);
- else if (unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES))
- return __send_write_zeroes(ci, ti);
+ if (unlikely(__process_abnormal_io(ci, ti, &r)))
+ return r;
if (bio_op(bio) == REQ_OP_ZONE_REPORT)
len = ci->sector_count;
@@ -1617,9 +1633,12 @@ static blk_qc_t __process_bio(struct mapped_device *md,
goto out;
}
- tio = alloc_tio(&ci, ti, 0, GFP_NOIO);
ci.bio = bio;
ci.sector_count = bio_sectors(bio);
+ if (unlikely(__process_abnormal_io(&ci, ti, &error)))
+ goto out;
+
+ tio = alloc_tio(&ci, ti, 0, GFP_NOIO);
ret = __clone_and_map_simple_bio(&ci, tio, NULL);
}
out:
@@ -1807,7 +1826,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
static struct mapped_device *alloc_dev(int minor)
{
int r, numa_node_id = dm_get_numa_node();
- struct dax_device *dax_dev;
+ struct dax_device *dax_dev = NULL;
struct mapped_device *md;
void *old_md;
@@ -1848,7 +1867,7 @@ static struct mapped_device *alloc_dev(int minor)
INIT_LIST_HEAD(&md->table_devices);
spin_lock_init(&md->uevent_lock);
- md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
+ md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id, NULL);
if (!md->queue)
goto bad;
md->queue->queuedata = md;
@@ -1873,9 +1892,11 @@ static struct mapped_device *alloc_dev(int minor)
md->disk->private_data = md;
sprintf(md->disk->disk_name, "dm-%d", minor);
- dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
- if (!dax_dev)
- goto bad;
+ if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
+ dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
+ if (!dax_dev)
+ goto bad;
+ }
md->dax_dev = dax_dev;
add_disk_no_queue_reg(md->disk);
@@ -3015,20 +3036,19 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
{
struct mapped_device *md = bdev->bd_disk->private_data;
const struct pr_ops *ops;
- fmode_t mode;
- int r;
+ int r, srcu_idx;
- r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
+ r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
if (r < 0)
- return r;
+ goto out;
ops = bdev->bd_disk->fops->pr_ops;
if (ops && ops->pr_reserve)
r = ops->pr_reserve(bdev, key, type, flags);
else
r = -EOPNOTSUPP;
-
- blkdev_put(bdev, mode);
+out:
+ dm_unprepare_ioctl(md, srcu_idx);
return r;
}
@@ -3036,20 +3056,19 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
{
struct mapped_device *md = bdev->bd_disk->private_data;
const struct pr_ops *ops;
- fmode_t mode;
- int r;
+ int r, srcu_idx;
- r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
+ r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
if (r < 0)
- return r;
+ goto out;
ops = bdev->bd_disk->fops->pr_ops;
if (ops && ops->pr_release)
r = ops->pr_release(bdev, key, type);
else
r = -EOPNOTSUPP;
-
- blkdev_put(bdev, mode);
+out:
+ dm_unprepare_ioctl(md, srcu_idx);
return r;
}
@@ -3058,20 +3077,19 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
{
struct mapped_device *md = bdev->bd_disk->private_data;
const struct pr_ops *ops;
- fmode_t mode;
- int r;
+ int r, srcu_idx;
- r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
+ r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
if (r < 0)
- return r;
+ goto out;
ops = bdev->bd_disk->fops->pr_ops;
if (ops && ops->pr_preempt)
r = ops->pr_preempt(bdev, old_key, new_key, type, abort);
else
r = -EOPNOTSUPP;
-
- blkdev_put(bdev, mode);
+out:
+ dm_unprepare_ioctl(md, srcu_idx);
return r;
}
@@ -3079,20 +3097,19 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
{
struct mapped_device *md = bdev->bd_disk->private_data;
const struct pr_ops *ops;
- fmode_t mode;
- int r;
+ int r, srcu_idx;
- r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
+ r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
if (r < 0)
- return r;
+ goto out;
ops = bdev->bd_disk->fops->pr_ops;
if (ops && ops->pr_clear)
r = ops->pr_clear(bdev, key);
else
r = -EOPNOTSUPP;
-
- blkdev_put(bdev, mode);
+out:
+ dm_unprepare_ioctl(md, srcu_idx);
return r;
}
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 773fc70dced7..4964323d936b 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -138,9 +138,9 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
}
if (!discard_supported)
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
else
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
/*
* Here we calculate the device offsets.
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 254e44e44668..3bea45e8ccff 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5206,12 +5206,12 @@ static void md_free(struct kobject *ko)
if (mddev->sysfs_state)
sysfs_put(mddev->sysfs_state);
+ if (mddev->gendisk)
+ del_gendisk(mddev->gendisk);
if (mddev->queue)
blk_cleanup_queue(mddev->queue);
- if (mddev->gendisk) {
- del_gendisk(mddev->gendisk);
+ if (mddev->gendisk)
put_disk(mddev->gendisk);
- }
percpu_ref_exit(&mddev->writes_pending);
kfree(mddev);
@@ -5619,9 +5619,9 @@ int md_run(struct mddev *mddev)
if (mddev->degraded)
nonrot = false;
if (nonrot)
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
else
- queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
mddev->queue->backing_dev_info->congested_data = mddev;
mddev->queue->backing_dev_info->congested_fn = md_congested;
}
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index ea15d220ced7..492a3f8ac119 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -5,8 +5,8 @@
*/
#include "dm-block-manager.h"
#include "dm-persistent-data-internal.h"
-#include "../dm-bufio.h"
+#include <linux/dm-bufio.h>
#include <linux/crc32c.h>
#include <linux/module.h>
#include <linux/slab.h>
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 5ecba9eef441..584c10347267 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -399,9 +399,9 @@ static int raid0_run(struct mddev *mddev)
discard_supported = true;
}
if (!discard_supported)
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
else
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
}
/* calculate array device size */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index fe872dc6712e..e2943fb74056 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1760,7 +1760,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
}
}
if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
print_conf(conf);
return err;
}
@@ -3110,10 +3110,10 @@ static int raid1_run(struct mddev *mddev)
if (mddev->queue) {
if (discard_supported)
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD,
mddev->queue);
else
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
mddev->queue);
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index c5e6c60fc0d4..3c60774c8430 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1845,7 +1845,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
break;
}
if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
print_conf(conf);
return err;
@@ -3846,10 +3846,10 @@ static int raid10_run(struct mddev *mddev)
if (mddev->queue) {
if (discard_supported)
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD,
mddev->queue);
else
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
mddev->queue);
}
/* need to check that every block has at least one working mirror */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b5d2601483e3..be117d0a65a8 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7443,10 +7443,10 @@ static int raid5_run(struct mddev *mddev)
if (devices_handle_discard_safely &&
mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
mddev->queue->limits.discard_granularity >= stripe)
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD,
mddev->queue);
else
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
mddev->queue);
blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
OpenPOWER on IntegriCloud