summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/bcache.h7
-rw-r--r--drivers/md/bcache/btree.c6
-rw-r--r--drivers/md/bcache/btree.h2
-rw-r--r--drivers/md/bcache/debug.c2
-rw-r--r--drivers/md/bcache/io.c6
-rw-r--r--drivers/md/bcache/journal.c2
-rw-r--r--drivers/md/bcache/movinggc.c10
-rw-r--r--drivers/md/bcache/request.c28
-rw-r--r--drivers/md/bcache/request.h2
-rw-r--r--drivers/md/bcache/super.c14
-rw-r--r--drivers/md/bcache/writeback.c4
-rw-r--r--drivers/md/bitmap.c8
-rw-r--r--drivers/md/dm-bio-prison-v1.c4
-rw-r--r--drivers/md/dm-bio-prison-v1.h2
-rw-r--r--drivers/md/dm-bufio.c46
-rw-r--r--drivers/md/dm-cache-background-tracker.c5
-rw-r--r--drivers/md/dm-cache-policy-smq.c31
-rw-r--r--drivers/md/dm-cache-target.c63
-rw-r--r--drivers/md/dm-crypt.c41
-rw-r--r--drivers/md/dm-flakey.c13
-rw-r--r--drivers/md/dm-integrity.c72
-rw-r--r--drivers/md/dm-io.c17
-rw-r--r--drivers/md/dm-ioctl.c5
-rw-r--r--drivers/md/dm-log-writes.c13
-rw-r--r--drivers/md/dm-mpath.c100
-rw-r--r--drivers/md/dm-raid.c17
-rw-r--r--drivers/md/dm-raid1.c50
-rw-r--r--drivers/md/dm-rq.c31
-rw-r--r--drivers/md/dm-rq.h2
-rw-r--r--drivers/md/dm-snap-persistent.c3
-rw-r--r--drivers/md/dm-snap.c15
-rw-r--r--drivers/md/dm-stripe.c17
-rw-r--r--drivers/md/dm-target.c2
-rw-r--r--drivers/md/dm-thin-metadata.c4
-rw-r--r--drivers/md/dm-thin.c93
-rw-r--r--drivers/md/dm-verity-target.c20
-rw-r--r--drivers/md/dm-zero.c4
-rw-r--r--drivers/md/dm.c90
-rw-r--r--drivers/md/md-cluster.c4
-rw-r--r--drivers/md/md.c60
-rw-r--r--drivers/md/md.h3
-rw-r--r--drivers/md/multipath.c10
-rw-r--r--drivers/md/persistent-data/dm-space-map-disk.c15
-rw-r--r--drivers/md/raid0.c116
-rw-r--r--drivers/md/raid1.c61
-rw-r--r--drivers/md/raid10.c48
-rw-r--r--drivers/md/raid5-cache.c57
-rw-r--r--drivers/md/raid5-log.h3
-rw-r--r--drivers/md/raid5-ppl.c8
-rw-r--r--drivers/md/raid5.c124
50 files changed, 781 insertions, 579 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index c3ea03c9a1a8..dee542fff68e 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -849,10 +849,11 @@ static inline void wake_up_allocators(struct cache_set *c)
/* Forward declarations */
-void bch_count_io_errors(struct cache *, int, const char *);
+void bch_count_io_errors(struct cache *, blk_status_t, const char *);
void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
- int, const char *);
-void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
+ blk_status_t, const char *);
+void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
+ const char *);
void bch_bbio_free(struct bio *, struct cache_set *);
struct bio *bch_bbio_alloc(struct cache_set *);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 450d0e848ae4..866dcf78ff8e 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -307,7 +307,7 @@ static void bch_btree_node_read(struct btree *b)
bch_submit_bbio(bio, b->c, &b->key, 0);
closure_sync(&cl);
- if (bio->bi_error)
+ if (bio->bi_status)
set_btree_node_io_error(b);
bch_bbio_free(bio, b->c);
@@ -374,10 +374,10 @@ static void btree_node_write_endio(struct bio *bio)
struct closure *cl = bio->bi_private;
struct btree *b = container_of(cl, struct btree, io);
- if (bio->bi_error)
+ if (bio->bi_status)
set_btree_node_io_error(b);
- bch_bbio_count_io_errors(b->c, bio, bio->bi_error, "writing btree");
+ bch_bbio_count_io_errors(b->c, bio, bio->bi_status, "writing btree");
closure_put(cl);
}
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 9b80417cd547..73da1f5626cb 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -207,7 +207,7 @@ void bkey_put(struct cache_set *c, struct bkey *k);
struct btree_op {
/* for waiting on btree reserve in btree_split() */
- wait_queue_t wait;
+ wait_queue_entry_t wait;
/* Btree level at which we start taking write locks */
short lock;
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 06f55056aaae..35a5a7210e51 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -110,7 +110,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
struct bio_vec bv, cbv;
struct bvec_iter iter, citer = { 0 };
- check = bio_clone(bio, GFP_NOIO);
+ check = bio_clone_kmalloc(bio, GFP_NOIO);
if (!check)
return;
check->bi_opf = REQ_OP_READ;
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index db45a88c0ce9..6a9b85095e7b 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -50,7 +50,7 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
/* IO errors */
-void bch_count_io_errors(struct cache *ca, int error, const char *m)
+void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m)
{
/*
* The halflife of an error is:
@@ -103,7 +103,7 @@ void bch_count_io_errors(struct cache *ca, int error, const char *m)
}
void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
- int error, const char *m)
+ blk_status_t error, const char *m)
{
struct bbio *b = container_of(bio, struct bbio, bio);
struct cache *ca = PTR_CACHE(c, &b->key, 0);
@@ -132,7 +132,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
}
void bch_bbio_endio(struct cache_set *c, struct bio *bio,
- int error, const char *m)
+ blk_status_t error, const char *m)
{
struct closure *cl = bio->bi_private;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 1198e53d5670..0352d05e495c 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -549,7 +549,7 @@ static void journal_write_endio(struct bio *bio)
{
struct journal_write *w = bio->bi_private;
- cache_set_err_on(bio->bi_error, w->c, "journal io error");
+ cache_set_err_on(bio->bi_status, w->c, "journal io error");
closure_put(&w->c->journal.io);
}
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 13b8a907006d..f633b30c962e 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -63,14 +63,14 @@ static void read_moving_endio(struct bio *bio)
struct moving_io *io = container_of(bio->bi_private,
struct moving_io, cl);
- if (bio->bi_error)
- io->op.error = bio->bi_error;
+ if (bio->bi_status)
+ io->op.status = bio->bi_status;
else if (!KEY_DIRTY(&b->key) &&
ptr_stale(io->op.c, &b->key, 0)) {
- io->op.error = -EINTR;
+ io->op.status = BLK_STS_IOERR;
}
- bch_bbio_endio(io->op.c, bio, bio->bi_error, "reading data to move");
+ bch_bbio_endio(io->op.c, bio, bio->bi_status, "reading data to move");
}
static void moving_init(struct moving_io *io)
@@ -92,7 +92,7 @@ static void write_moving(struct closure *cl)
struct moving_io *io = container_of(cl, struct moving_io, cl);
struct data_insert_op *op = &io->op;
- if (!op->error) {
+ if (!op->status) {
moving_init(io);
io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 709c9cc34369..019b3df9f1c6 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -81,7 +81,7 @@ static void bch_data_insert_keys(struct closure *cl)
if (ret == -ESRCH) {
op->replace_collision = true;
} else if (ret) {
- op->error = -ENOMEM;
+ op->status = BLK_STS_RESOURCE;
op->insert_data_done = true;
}
@@ -178,17 +178,17 @@ static void bch_data_insert_endio(struct bio *bio)
struct closure *cl = bio->bi_private;
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
- if (bio->bi_error) {
+ if (bio->bi_status) {
/* TODO: We could try to recover from this. */
if (op->writeback)
- op->error = bio->bi_error;
+ op->status = bio->bi_status;
else if (!op->replace)
set_closure_fn(cl, bch_data_insert_error, op->wq);
else
set_closure_fn(cl, NULL, NULL);
}
- bch_bbio_endio(op->c, bio, bio->bi_error, "writing data to cache");
+ bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
}
static void bch_data_insert_start(struct closure *cl)
@@ -488,15 +488,15 @@ static void bch_cache_read_endio(struct bio *bio)
* from the backing device.
*/
- if (bio->bi_error)
- s->iop.error = bio->bi_error;
+ if (bio->bi_status)
+ s->iop.status = bio->bi_status;
else if (!KEY_DIRTY(&b->key) &&
ptr_stale(s->iop.c, &b->key, 0)) {
atomic_long_inc(&s->iop.c->cache_read_races);
- s->iop.error = -EINTR;
+ s->iop.status = BLK_STS_IOERR;
}
- bch_bbio_endio(s->iop.c, bio, bio->bi_error, "reading from cache");
+ bch_bbio_endio(s->iop.c, bio, bio->bi_status, "reading from cache");
}
/*
@@ -593,9 +593,9 @@ static void request_endio(struct bio *bio)
{
struct closure *cl = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
struct search *s = container_of(cl, struct search, cl);
- s->iop.error = bio->bi_error;
+ s->iop.status = bio->bi_status;
/* Only cache read errors are recoverable */
s->recoverable = false;
}
@@ -611,7 +611,7 @@ static void bio_complete(struct search *s)
&s->d->disk->part0, s->start_time);
trace_bcache_request_end(s->d, s->orig_bio);
- s->orig_bio->bi_error = s->iop.error;
+ s->orig_bio->bi_status = s->iop.status;
bio_endio(s->orig_bio);
s->orig_bio = NULL;
}
@@ -664,7 +664,7 @@ static inline struct search *search_alloc(struct bio *bio,
s->iop.inode = d->id;
s->iop.write_point = hash_long((unsigned long) current, 16);
s->iop.write_prio = 0;
- s->iop.error = 0;
+ s->iop.status = 0;
s->iop.flags = 0;
s->iop.flush_journal = op_is_flush(bio->bi_opf);
s->iop.wq = bcache_wq;
@@ -707,7 +707,7 @@ static void cached_dev_read_error(struct closure *cl)
/* Retry from the backing device: */
trace_bcache_read_retry(s->orig_bio);
- s->iop.error = 0;
+ s->iop.status = 0;
do_bio_hook(s, s->orig_bio);
/* XXX: invalidate cache */
@@ -767,7 +767,7 @@ static void cached_dev_read_done_bh(struct closure *cl)
!s->cache_miss, s->iop.bypass);
trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass);
- if (s->iop.error)
+ if (s->iop.status)
continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
else if (s->iop.bio || verify(dc, &s->bio.bio))
continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 1ff36875c2b3..7689176951ce 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -10,7 +10,7 @@ struct data_insert_op {
unsigned inode;
uint16_t write_point;
uint16_t write_prio;
- short error;
+ blk_status_t status;
union {
uint16_t flags;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index e57353e39168..8352fad765f6 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -271,7 +271,7 @@ static void write_super_endio(struct bio *bio)
{
struct cache *ca = bio->bi_private;
- bch_count_io_errors(ca, bio->bi_error, "writing superblock");
+ bch_count_io_errors(ca, bio->bi_status, "writing superblock");
closure_put(&ca->set->sb_write);
}
@@ -321,7 +321,7 @@ static void uuid_endio(struct bio *bio)
struct closure *cl = bio->bi_private;
struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
- cache_set_err_on(bio->bi_error, c, "accessing uuids");
+ cache_set_err_on(bio->bi_status, c, "accessing uuids");
bch_bbio_free(bio, c);
closure_put(cl);
}
@@ -494,7 +494,7 @@ static void prio_endio(struct bio *bio)
{
struct cache *ca = bio->bi_private;
- cache_set_err_on(bio->bi_error, ca->set, "accessing priorities");
+ cache_set_err_on(bio->bi_status, ca->set, "accessing priorities");
bch_bbio_free(bio, ca->set);
closure_put(&ca->prio);
}
@@ -782,7 +782,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
minor *= BCACHE_MINORS;
- if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+ if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+ BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER)) ||
!(d->disk = alloc_disk(BCACHE_MINORS))) {
ida_simple_remove(&bcache_minor, minor);
return -ENOMEM;
@@ -1516,7 +1518,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
sizeof(struct bbio) + sizeof(struct bio_vec) *
bucket_pages(c))) ||
!(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
- !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+ !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+ BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER)) ||
!(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
!(c->moving_gc_wq = alloc_workqueue("bcache_gc",
WQ_MEM_RECLAIM, 0)) ||
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 6ac2e48b9235..42c66e76f05e 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -167,7 +167,7 @@ static void dirty_endio(struct bio *bio)
struct keybuf_key *w = bio->bi_private;
struct dirty_io *io = w->private;
- if (bio->bi_error)
+ if (bio->bi_status)
SET_KEY_DIRTY(&w->key, false);
closure_put(&io->cl);
@@ -195,7 +195,7 @@ static void read_dirty_endio(struct bio *bio)
struct dirty_io *io = w->private;
bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
- bio->bi_error, "reading dirty data from cache");
+ bio->bi_status, "reading dirty data from cache");
dirty_endio(bio);
}
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index bf7419a56454..f4eace5ea184 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -485,10 +485,10 @@ void bitmap_print_sb(struct bitmap *bitmap)
pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic));
pr_debug(" version: %d\n", le32_to_cpu(sb->version));
pr_debug(" uuid: %08x.%08x.%08x.%08x\n",
- *(__u32 *)(sb->uuid+0),
- *(__u32 *)(sb->uuid+4),
- *(__u32 *)(sb->uuid+8),
- *(__u32 *)(sb->uuid+12));
+ le32_to_cpu(*(__u32 *)(sb->uuid+0)),
+ le32_to_cpu(*(__u32 *)(sb->uuid+4)),
+ le32_to_cpu(*(__u32 *)(sb->uuid+8)),
+ le32_to_cpu(*(__u32 *)(sb->uuid+12)));
pr_debug(" events: %llu\n",
(unsigned long long) le64_to_cpu(sb->events));
pr_debug("events cleared: %llu\n",
diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c
index ae7da2c30a57..82d27384d31f 100644
--- a/drivers/md/dm-bio-prison-v1.c
+++ b/drivers/md/dm-bio-prison-v1.c
@@ -229,7 +229,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
void dm_cell_error(struct dm_bio_prison *prison,
- struct dm_bio_prison_cell *cell, int error)
+ struct dm_bio_prison_cell *cell, blk_status_t error)
{
struct bio_list bios;
struct bio *bio;
@@ -238,7 +238,7 @@ void dm_cell_error(struct dm_bio_prison *prison,
dm_cell_release(prison, cell, &bios);
while ((bio = bio_list_pop(&bios))) {
- bio->bi_error = error;
+ bio->bi_status = error;
bio_endio(bio);
}
}
diff --git a/drivers/md/dm-bio-prison-v1.h b/drivers/md/dm-bio-prison-v1.h
index cddd4ac07e2c..cec52ac5e1ae 100644
--- a/drivers/md/dm-bio-prison-v1.h
+++ b/drivers/md/dm-bio-prison-v1.h
@@ -91,7 +91,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
struct dm_bio_prison_cell *cell,
struct bio_list *inmates);
void dm_cell_error(struct dm_bio_prison *prison,
- struct dm_bio_prison_cell *cell, int error);
+ struct dm_bio_prison_cell *cell, blk_status_t error);
/*
* Visits the cell and then releases. Guarantees no new inmates are
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 5db11a405129..850ff6c67994 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -145,8 +145,8 @@ struct dm_buffer {
enum data_mode data_mode;
unsigned char list_mode; /* LIST_* */
unsigned hold_count;
- int read_error;
- int write_error;
+ blk_status_t read_error;
+ blk_status_t write_error;
unsigned long state;
unsigned long last_accessed;
struct dm_bufio_client *c;
@@ -218,7 +218,7 @@ static DEFINE_SPINLOCK(param_spinlock);
* Buffers are freed after this timeout
*/
static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
-static unsigned dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
+static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
static unsigned long dm_bufio_peak_allocated;
static unsigned long dm_bufio_allocated_kmem_cache;
@@ -555,7 +555,7 @@ static void dmio_complete(unsigned long error, void *context)
{
struct dm_buffer *b = context;
- b->bio.bi_error = error ? -EIO : 0;
+ b->bio.bi_status = error ? BLK_STS_IOERR : 0;
b->bio.bi_end_io(&b->bio);
}
@@ -588,7 +588,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
r = dm_io(&io_req, 1, &region, NULL);
if (r) {
- b->bio.bi_error = r;
+ b->bio.bi_status = errno_to_blk_status(r);
end_io(&b->bio);
}
}
@@ -596,7 +596,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
static void inline_endio(struct bio *bio)
{
bio_end_io_t *end_fn = bio->bi_private;
- int error = bio->bi_error;
+ blk_status_t status = bio->bi_status;
/*
* Reset the bio to free any attached resources
@@ -604,7 +604,7 @@ static void inline_endio(struct bio *bio)
*/
bio_reset(bio);
- bio->bi_error = error;
+ bio->bi_status = status;
end_fn(bio);
}
@@ -685,11 +685,12 @@ static void write_endio(struct bio *bio)
{
struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
- b->write_error = bio->bi_error;
- if (unlikely(bio->bi_error)) {
+ b->write_error = bio->bi_status;
+ if (unlikely(bio->bi_status)) {
struct dm_bufio_client *c = b->c;
- int error = bio->bi_error;
- (void)cmpxchg(&c->async_write_error, 0, error);
+
+ (void)cmpxchg(&c->async_write_error, 0,
+ blk_status_to_errno(bio->bi_status));
}
BUG_ON(!test_bit(B_WRITING, &b->state));
@@ -1063,7 +1064,7 @@ static void read_endio(struct bio *bio)
{
struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
- b->read_error = bio->bi_error;
+ b->read_error = bio->bi_status;
BUG_ON(!test_bit(B_READING, &b->state));
@@ -1107,7 +1108,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
if (b->read_error) {
- int error = b->read_error;
+ int error = blk_status_to_errno(b->read_error);
dm_bufio_release(b);
@@ -1257,7 +1258,8 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
*/
int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
{
- int a, f;
+ blk_status_t a;
+ int f;
unsigned long buffers_processed = 0;
struct dm_buffer *b, *tmp;
@@ -1334,7 +1336,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c)
{
struct dm_io_request io_req = {
.bi_op = REQ_OP_WRITE,
- .bi_op_flags = REQ_PREFLUSH,
+ .bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
.mem.type = DM_IO_KMEM,
.mem.ptr.addr = NULL,
.client = c->dm_io,
@@ -1558,10 +1560,10 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
return true;
}
-static unsigned get_retain_buffers(struct dm_bufio_client *c)
+static unsigned long get_retain_buffers(struct dm_bufio_client *c)
{
- unsigned retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes);
- return retain_bytes / c->block_size;
+ unsigned long retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes);
+ return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT);
}
static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
@@ -1571,7 +1573,7 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
struct dm_buffer *b, *tmp;
unsigned long freed = 0;
unsigned long count = nr_to_scan;
- unsigned retain_target = get_retain_buffers(c);
+ unsigned long retain_target = get_retain_buffers(c);
for (l = 0; l < LIST_SIZE; l++) {
list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
@@ -1794,8 +1796,8 @@ static bool older_than(struct dm_buffer *b, unsigned long age_hz)
static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
{
struct dm_buffer *b, *tmp;
- unsigned retain_target = get_retain_buffers(c);
- unsigned count;
+ unsigned long retain_target = get_retain_buffers(c);
+ unsigned long count;
LIST_HEAD(write_list);
dm_bufio_lock(c);
@@ -1955,7 +1957,7 @@ MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
-module_param_named(retain_bytes, dm_bufio_retain_bytes, uint, S_IRUGO | S_IWUSR);
+module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");
module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR);
diff --git a/drivers/md/dm-cache-background-tracker.c b/drivers/md/dm-cache-background-tracker.c
index 9b1afdfb13f0..707233891291 100644
--- a/drivers/md/dm-cache-background-tracker.c
+++ b/drivers/md/dm-cache-background-tracker.c
@@ -33,6 +33,11 @@ struct background_tracker *btracker_create(unsigned max_work)
{
struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL);
+ if (!b) {
+ DMERR("couldn't create background_tracker");
+ return NULL;
+ }
+
b->max_work = max_work;
atomic_set(&b->pending_promotes, 0);
atomic_set(&b->pending_writebacks, 0);
diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index 72479bd61e11..e5eb9c9b4bc8 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -1120,8 +1120,6 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
* Cache entries may not be populated. So we cannot rely on the
* size of the clean queue.
*/
- unsigned nr_clean;
-
if (idle) {
/*
* We'd like to clean everything.
@@ -1129,18 +1127,16 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
return q_size(&mq->dirty) == 0u;
}
- nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty);
- return (nr_clean + btracker_nr_writebacks_queued(mq->bg_work)) >=
- percent_to_target(mq, CLEAN_TARGET);
+ /*
+ * If we're busy we don't worry about cleaning at all.
+ */
+ return true;
}
-static bool free_target_met(struct smq_policy *mq, bool idle)
+static bool free_target_met(struct smq_policy *mq)
{
unsigned nr_free;
- if (!idle)
- return true;
-
nr_free = from_cblock(mq->cache_size) - mq->cache_alloc.nr_allocated;
return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >=
percent_to_target(mq, FREE_TARGET);
@@ -1190,9 +1186,9 @@ static void queue_demotion(struct smq_policy *mq)
if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed)))
return;
- e = q_peek(&mq->clean, mq->clean.nr_levels, true);
+ e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true);
if (!e) {
- if (!clean_target_met(mq, false))
+ if (!clean_target_met(mq, true))
queue_writeback(mq);
return;
}
@@ -1220,7 +1216,7 @@ static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock,
* We always claim to be 'idle' to ensure some demotions happen
* with continuous loads.
*/
- if (!free_target_met(mq, true))
+ if (!free_target_met(mq))
queue_demotion(mq);
return;
}
@@ -1421,14 +1417,10 @@ static int smq_get_background_work(struct dm_cache_policy *p, bool idle,
spin_lock_irqsave(&mq->lock, flags);
r = btracker_issue(mq->bg_work, result);
if (r == -ENODATA) {
- /* find some writeback work to do */
- if (mq->migrations_allowed && !free_target_met(mq, idle))
- queue_demotion(mq);
-
- else if (!clean_target_met(mq, idle))
+ if (!clean_target_met(mq, idle)) {
queue_writeback(mq);
-
- r = btracker_issue(mq->bg_work, result);
+ r = btracker_issue(mq->bg_work, result);
+ }
}
spin_unlock_irqrestore(&mq->lock, flags);
@@ -1452,6 +1444,7 @@ static void __complete_background_work(struct smq_policy *mq,
clear_pending(mq, e);
if (success) {
e->oblock = work->oblock;
+ e->level = NR_CACHE_LEVELS - 1;
push(mq, e);
// h, q, a
} else {
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 1db375f50a13..c5ea03fc7ee1 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -94,6 +94,9 @@ static void iot_io_begin(struct io_tracker *iot, sector_t len)
static void __iot_io_end(struct io_tracker *iot, sector_t len)
{
+ if (!len)
+ return;
+
iot->in_flight -= len;
if (!iot->in_flight)
iot->idle_time = jiffies;
@@ -116,7 +119,7 @@ static void iot_io_end(struct io_tracker *iot, sector_t len)
*/
struct continuation {
struct work_struct ws;
- int input;
+ blk_status_t input;
};
static inline void init_continuation(struct continuation *k,
@@ -142,7 +145,7 @@ struct batcher {
/*
* The operation that everyone is waiting for.
*/
- int (*commit_op)(void *context);
+ blk_status_t (*commit_op)(void *context);
void *commit_context;
/*
@@ -168,8 +171,7 @@ struct batcher {
static void __commit(struct work_struct *_ws)
{
struct batcher *b = container_of(_ws, struct batcher, commit_work);
-
- int r;
+ blk_status_t r;
unsigned long flags;
struct list_head work_items;
struct work_struct *ws, *tmp;
@@ -202,7 +204,7 @@ static void __commit(struct work_struct *_ws)
while ((bio = bio_list_pop(&bios))) {
if (r) {
- bio->bi_error = r;
+ bio->bi_status = r;
bio_endio(bio);
} else
b->issue_op(bio, b->issue_context);
@@ -210,7 +212,7 @@ static void __commit(struct work_struct *_ws)
}
static void batcher_init(struct batcher *b,
- int (*commit_op)(void *),
+ blk_status_t (*commit_op)(void *),
void *commit_context,
void (*issue_op)(struct bio *bio, void *),
void *issue_context,
@@ -474,7 +476,7 @@ struct cache {
spinlock_t invalidation_lock;
struct list_head invalidation_requests;
- struct io_tracker origin_tracker;
+ struct io_tracker tracker;
struct work_struct commit_ws;
struct batcher committer;
@@ -901,8 +903,7 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
static bool accountable_bio(struct cache *cache, struct bio *bio)
{
- return ((bio->bi_bdev == cache->origin_dev->bdev) &&
- bio_op(bio) != REQ_OP_DISCARD);
+ return bio_op(bio) != REQ_OP_DISCARD;
}
static void accounted_begin(struct cache *cache, struct bio *bio)
@@ -912,7 +913,7 @@ static void accounted_begin(struct cache *cache, struct bio *bio)
if (accountable_bio(cache, bio)) {
pb->len = bio_sectors(bio);
- iot_io_begin(&cache->origin_tracker, pb->len);
+ iot_io_begin(&cache->tracker, pb->len);
}
}
@@ -921,7 +922,7 @@ static void accounted_complete(struct cache *cache, struct bio *bio)
size_t pb_data_size = get_per_bio_data_size(cache);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
- iot_io_end(&cache->origin_tracker, pb->len);
+ iot_io_end(&cache->tracker, pb->len);
}
static void accounted_request(struct cache *cache, struct bio *bio)
@@ -953,7 +954,7 @@ static void writethrough_endio(struct bio *bio)
dm_unhook_bio(&pb->hook_info, bio);
- if (bio->bi_error) {
+ if (bio->bi_status) {
bio_endio(bio);
return;
}
@@ -1218,7 +1219,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
if (read_err || write_err)
- mg->k.input = -EIO;
+ mg->k.input = BLK_STS_IOERR;
queue_continuation(mg->cache->wq, &mg->k);
}
@@ -1264,8 +1265,8 @@ static void overwrite_endio(struct bio *bio)
dm_unhook_bio(&pb->hook_info, bio);
- if (bio->bi_error)
- mg->k.input = bio->bi_error;
+ if (bio->bi_status)
+ mg->k.input = bio->bi_status;
queue_continuation(mg->cache->wq, &mg->k);
}
@@ -1321,8 +1322,10 @@ static void mg_complete(struct dm_cache_migration *mg, bool success)
if (mg->overwrite_bio) {
if (success)
force_set_dirty(cache, cblock);
+ else if (mg->k.input)
+ mg->overwrite_bio->bi_status = mg->k.input;
else
- mg->overwrite_bio->bi_error = (mg->k.input ? : -EIO);
+ mg->overwrite_bio->bi_status = BLK_STS_IOERR;
bio_endio(mg->overwrite_bio);
} else {
if (success)
@@ -1502,7 +1505,7 @@ static void mg_copy(struct work_struct *ws)
r = copy(mg, is_policy_promote);
if (r) {
DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
- mg->k.input = -EIO;
+ mg->k.input = BLK_STS_IOERR;
mg_complete(mg, false);
}
}
@@ -1716,20 +1719,19 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
enum busy {
IDLE,
- MODERATE,
BUSY
};
static enum busy spare_migration_bandwidth(struct cache *cache)
{
- bool idle = iot_idle_for(&cache->origin_tracker, HZ);
+ bool idle = iot_idle_for(&cache->tracker, HZ);
sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
cache->sectors_per_block;
- if (current_volume <= cache->migration_threshold)
- return idle ? IDLE : MODERATE;
+ if (idle && current_volume <= cache->migration_threshold)
+ return IDLE;
else
- return idle ? MODERATE : BUSY;
+ return BUSY;
}
static void inc_hit_counter(struct cache *cache, struct bio *bio)
@@ -1906,12 +1908,12 @@ static int commit(struct cache *cache, bool clean_shutdown)
/*
* Used by the batcher.
*/
-static int commit_op(void *context)
+static blk_status_t commit_op(void *context)
{
struct cache *cache = context;
if (dm_cache_changed_this_transaction(cache->cmd))
- return commit(cache, false);
+ return errno_to_blk_status(commit(cache, false));
return 0;
}
@@ -2017,7 +2019,7 @@ static void requeue_deferred_bios(struct cache *cache)
bio_list_init(&cache->deferred_bios);
while ((bio = bio_list_pop(&bios))) {
- bio->bi_error = DM_ENDIO_REQUEUE;
+ bio->bi_status = BLK_STS_DM_REQUEUE;
bio_endio(bio);
}
}
@@ -2045,8 +2047,6 @@ static void check_migrations(struct work_struct *ws)
for (;;) {
b = spare_migration_bandwidth(cache);
- if (b == BUSY)
- break;
r = policy_get_background_work(cache->policy, b == IDLE, &op);
if (r == -ENODATA)
@@ -2717,7 +2717,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
batcher_init(&cache->committer, commit_op, cache,
issue_op, cache, cache->wq);
- iot_init(&cache->origin_tracker);
+ iot_init(&cache->tracker);
init_rwsem(&cache->background_work_lock);
prevent_background_work(cache);
@@ -2821,7 +2821,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
return r;
}
-static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int cache_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct cache *cache = ti->private;
unsigned long flags;
@@ -2839,7 +2840,7 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
bio_drop_shared_lock(cache, bio);
accounted_complete(cache, bio);
- return 0;
+ return DM_ENDIO_DONE;
}
static int write_dirty_bitset(struct cache *cache)
@@ -2941,7 +2942,7 @@ static void cache_postsuspend(struct dm_target *ti)
cancel_delayed_work(&cache->waker);
flush_workqueue(cache->wq);
- WARN_ON(cache->origin_tracker.in_flight);
+ WARN_ON(cache->tracker.in_flight);
/*
* If it's a flush suspend there won't be any deferred bios, so this
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index ebf9e72d479b..9e1b72e8f7ef 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -71,7 +71,7 @@ struct dm_crypt_io {
struct convert_context ctx;
atomic_t io_pending;
- int error;
+ blk_status_t error;
sector_t sector;
struct rb_node rb_node;
@@ -1292,7 +1292,7 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
/*
* Encrypt / decrypt data from one bio to another one (can be the same one)
*/
-static int crypt_convert(struct crypt_config *cc,
+static blk_status_t crypt_convert(struct crypt_config *cc,
struct convert_context *ctx)
{
unsigned int tag_offset = 0;
@@ -1343,13 +1343,13 @@ static int crypt_convert(struct crypt_config *cc,
*/
case -EBADMSG:
atomic_dec(&ctx->cc_pending);
- return -EILSEQ;
+ return BLK_STS_PROTECTION;
/*
* There was an error while processing the request.
*/
default:
atomic_dec(&ctx->cc_pending);
- return -EIO;
+ return BLK_STS_IOERR;
}
}
@@ -1463,7 +1463,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
struct bio *base_bio = io->base_bio;
- int error = io->error;
+ blk_status_t error = io->error;
if (!atomic_dec_and_test(&io->io_pending))
return;
@@ -1476,7 +1476,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
else
kfree(io->integrity_metadata);
- base_bio->bi_error = error;
+ base_bio->bi_status = error;
bio_endio(base_bio);
}
@@ -1502,7 +1502,7 @@ static void crypt_endio(struct bio *clone)
struct dm_crypt_io *io = clone->bi_private;
struct crypt_config *cc = io->cc;
unsigned rw = bio_data_dir(clone);
- int error;
+ blk_status_t error;
/*
* free the processed pages
@@ -1510,7 +1510,7 @@ static void crypt_endio(struct bio *clone)
if (rw == WRITE)
crypt_free_buffer_pages(cc, clone);
- error = clone->bi_error;
+ error = clone->bi_status;
bio_put(clone);
if (rw == READ && !error) {
@@ -1570,7 +1570,7 @@ static void kcryptd_io_read_work(struct work_struct *work)
crypt_inc_pending(io);
if (kcryptd_io_read(io, GFP_NOIO))
- io->error = -ENOMEM;
+ io->error = BLK_STS_RESOURCE;
crypt_dec_pending(io);
}
@@ -1656,7 +1656,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
sector_t sector;
struct rb_node **rbp, *parent;
- if (unlikely(io->error < 0)) {
+ if (unlikely(io->error)) {
crypt_free_buffer_pages(cc, clone);
bio_put(clone);
crypt_dec_pending(io);
@@ -1697,7 +1697,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
struct bio *clone;
int crypt_finished;
sector_t sector = io->sector;
- int r;
+ blk_status_t r;
/*
* Prevent io from disappearing until this function completes.
@@ -1707,7 +1707,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
if (unlikely(!clone)) {
- io->error = -EIO;
+ io->error = BLK_STS_IOERR;
goto dec;
}
@@ -1718,7 +1718,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
crypt_inc_pending(io);
r = crypt_convert(cc, &io->ctx);
- if (r < 0)
+ if (r)
io->error = r;
crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
@@ -1740,7 +1740,7 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
- int r = 0;
+ blk_status_t r;
crypt_inc_pending(io);
@@ -1748,7 +1748,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
io->sector);
r = crypt_convert(cc, &io->ctx);
- if (r < 0)
+ if (r)
io->error = r;
if (atomic_dec_and_test(&io->ctx.cc_pending))
@@ -1781,9 +1781,9 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
if (error == -EBADMSG) {
DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu",
(unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)));
- io->error = -EILSEQ;
+ io->error = BLK_STS_PROTECTION;
} else if (error < 0)
- io->error = -EIO;
+ io->error = BLK_STS_IOERR;
crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
@@ -2677,7 +2677,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- cc->bs = bioset_create(MIN_IOS, 0);
+ cc->bs = bioset_create(MIN_IOS, 0, (BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER));
if (!cc->bs) {
ti->error = "Cannot allocate crypt bioset";
goto bad;
@@ -2795,10 +2796,10 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
* and is aligned to this size as defined in IO hints.
*/
if (unlikely((bio->bi_iter.bi_sector & ((cc->sector_size >> SECTOR_SHIFT) - 1)) != 0))
- return -EIO;
+ return DM_MAPIO_KILL;
if (unlikely(bio->bi_iter.bi_size & (cc->sector_size - 1)))
- return -EIO;
+ return DM_MAPIO_KILL;
io = dm_per_bio_data(bio, cc->per_bio_data_size);
crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 13305a182611..3d04d5ce19d9 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -321,7 +321,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
if (bio_data_dir(bio) == READ) {
if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) &&
!test_bit(ERROR_WRITES, &fc->flags))
- return -EIO;
+ return DM_MAPIO_KILL;
goto map_bio;
}
@@ -349,7 +349,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
/*
* By default, error all I/O.
*/
- return -EIO;
+ return DM_MAPIO_KILL;
}
map_bio:
@@ -358,12 +358,13 @@ map_bio:
return DM_MAPIO_REMAPPED;
}
-static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int flakey_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct flakey_c *fc = ti->private;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
- if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
+ if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
all_corrupt_bio_flags_match(bio, fc)) {
/*
@@ -377,11 +378,11 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
* Error read during the down_interval if drop_writes
* and error_writes were not configured.
*/
- return -EIO;
+ *error = BLK_STS_IOERR;
}
}
- return error;
+ return DM_ENDIO_DONE;
}
static void flakey_status(struct dm_target *ti, status_type_t type,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index c7f7c8d76576..1b224aa9cf15 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -246,7 +246,7 @@ struct dm_integrity_io {
unsigned metadata_offset;
atomic_t in_flight;
- int bi_error;
+ blk_status_t bi_status;
struct completion *completion;
@@ -783,7 +783,8 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi
for (i = 0; i < commit_sections; i++)
rw_section_mac(ic, commit_start + i, true);
}
- rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, commit_sections, &io_comp);
+ rw_journal(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, commit_start,
+ commit_sections, &io_comp);
} else {
unsigned to_end;
io_comp.in_flight = (atomic_t)ATOMIC_INIT(2);
@@ -1104,18 +1105,21 @@ static void schedule_autocommit(struct dm_integrity_c *ic)
static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
{
struct bio *bio;
- spin_lock_irq(&ic->endio_wait.lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ic->endio_wait.lock, flags);
bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
bio_list_add(&ic->flush_bio_list, bio);
- spin_unlock_irq(&ic->endio_wait.lock);
+ spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
+
queue_work(ic->commit_wq, &ic->commit_work);
}
static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
{
int r = dm_integrity_failed(ic);
- if (unlikely(r) && !bio->bi_error)
- bio->bi_error = r;
+ if (unlikely(r) && !bio->bi_status)
+ bio->bi_status = errno_to_blk_status(r);
bio_endio(bio);
}
@@ -1123,7 +1127,7 @@ static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *di
{
struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
- if (unlikely(dio->fua) && likely(!bio->bi_error) && likely(!dm_integrity_failed(ic)))
+ if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
submit_flush_bio(ic, dio);
else
do_endio(ic, bio);
@@ -1142,9 +1146,9 @@ static void dec_in_flight(struct dm_integrity_io *dio)
bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
- if (unlikely(dio->bi_error) && !bio->bi_error)
- bio->bi_error = dio->bi_error;
- if (likely(!bio->bi_error) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
+ if (unlikely(dio->bi_status) && !bio->bi_status)
+ bio->bi_status = dio->bi_status;
+ if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
dio->range.logical_sector += dio->range.n_sectors;
bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
INIT_WORK(&dio->work, integrity_bio_wait);
@@ -1318,7 +1322,7 @@ skip_io:
dec_in_flight(dio);
return;
error:
- dio->bi_error = r;
+ dio->bi_status = errno_to_blk_status(r);
dec_in_flight(dio);
}
@@ -1331,7 +1335,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
sector_t area, offset;
dio->ic = ic;
- dio->bi_error = 0;
+ dio->bi_status = 0;
if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
submit_flush_bio(ic, dio);
@@ -1352,13 +1356,13 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
(unsigned long long)dio->range.logical_sector, bio_sectors(bio),
(unsigned long long)ic->provided_data_sectors);
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
ic->sectors_per_block,
(unsigned long long)dio->range.logical_sector, bio_sectors(bio));
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (ic->sectors_per_block > 1) {
@@ -1368,7 +1372,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
bv.bv_offset, bv.bv_len, ic->sectors_per_block);
- return -EIO;
+ return DM_MAPIO_KILL;
}
}
}
@@ -1383,18 +1387,18 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
wanted_tag_size *= ic->tag_size;
if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size);
- return -EIO;
+ return DM_MAPIO_KILL;
}
}
} else {
if (unlikely(bip != NULL)) {
DMERR("Unexpected integrity data when using internal hash");
- return -EIO;
+ return DM_MAPIO_KILL;
}
}
if (unlikely(ic->mode == 'R') && unlikely(dio->write))
- return -EIO;
+ return DM_MAPIO_KILL;
get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
@@ -2374,21 +2378,6 @@ static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic)
blk_queue_max_integrity_segments(disk->queue, UINT_MAX);
}
-/* FIXME: use new kvmalloc */
-static void *dm_integrity_kvmalloc(size_t size, gfp_t gfp)
-{
- void *ptr = NULL;
-
- if (size <= PAGE_SIZE)
- ptr = kmalloc(size, GFP_KERNEL | gfp);
- if (!ptr && size <= KMALLOC_MAX_SIZE)
- ptr = kmalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | gfp);
- if (!ptr)
- ptr = __vmalloc(size, GFP_KERNEL | gfp, PAGE_KERNEL);
-
- return ptr;
-}
-
static void dm_integrity_free_page_list(struct dm_integrity_c *ic, struct page_list *pl)
{
unsigned i;
@@ -2407,7 +2396,7 @@ static struct page_list *dm_integrity_alloc_page_list(struct dm_integrity_c *ic)
struct page_list *pl;
unsigned i;
- pl = dm_integrity_kvmalloc(page_list_desc_size, __GFP_ZERO);
+ pl = kvmalloc(page_list_desc_size, GFP_KERNEL | __GFP_ZERO);
if (!pl)
return NULL;
@@ -2437,7 +2426,7 @@ static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_int
struct scatterlist **sl;
unsigned i;
- sl = dm_integrity_kvmalloc(ic->journal_sections * sizeof(struct scatterlist *), __GFP_ZERO);
+ sl = kvmalloc(ic->journal_sections * sizeof(struct scatterlist *), GFP_KERNEL | __GFP_ZERO);
if (!sl)
return NULL;
@@ -2453,7 +2442,7 @@ static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_int
n_pages = (end_index - start_index + 1);
- s = dm_integrity_kvmalloc(n_pages * sizeof(struct scatterlist), 0);
+ s = kvmalloc(n_pages * sizeof(struct scatterlist), GFP_KERNEL);
if (!s) {
dm_integrity_free_journal_scatterlist(ic, sl);
return NULL;
@@ -2617,7 +2606,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
goto bad;
}
- sg = dm_integrity_kvmalloc((ic->journal_pages + 1) * sizeof(struct scatterlist), 0);
+ sg = kvmalloc((ic->journal_pages + 1) * sizeof(struct scatterlist), GFP_KERNEL);
if (!sg) {
*error = "Unable to allocate sg list";
r = -ENOMEM;
@@ -2673,7 +2662,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
r = -ENOMEM;
goto bad;
}
- ic->sk_requests = dm_integrity_kvmalloc(ic->journal_sections * sizeof(struct skcipher_request *), __GFP_ZERO);
+ ic->sk_requests = kvmalloc(ic->journal_sections * sizeof(struct skcipher_request *), GFP_KERNEL | __GFP_ZERO);
if (!ic->sk_requests) {
*error = "Unable to allocate sk requests";
r = -ENOMEM;
@@ -2740,7 +2729,7 @@ retest_commit_id:
r = -ENOMEM;
goto bad;
}
- ic->journal_tree = dm_integrity_kvmalloc(journal_tree_size, 0);
+ ic->journal_tree = kvmalloc(journal_tree_size, GFP_KERNEL);
if (!ic->journal_tree) {
*error = "Could not allocate memory for journal tree";
r = -ENOMEM;
@@ -3054,6 +3043,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->error = "The device is too small";
goto bad;
}
+ if (ti->len > ic->provided_data_sectors) {
+ r = -EINVAL;
+ ti->error = "Not enough provided sectors for requested mapping size";
+ goto bad;
+ }
if (!buffer_sectors)
buffer_sectors = 1;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 3702e502466d..25039607f3cb 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -58,7 +58,8 @@ struct dm_io_client *dm_io_client_create(void)
if (!client->pool)
goto bad;
- client->bios = bioset_create(min_ios, 0);
+ client->bios = bioset_create(min_ios, 0, (BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER));
if (!client->bios)
goto bad;
@@ -124,7 +125,7 @@ static void complete_io(struct io *io)
fn(error_bits, context);
}
-static void dec_count(struct io *io, unsigned int region, int error)
+static void dec_count(struct io *io, unsigned int region, blk_status_t error)
{
if (error)
set_bit(region, &io->error_bits);
@@ -137,9 +138,9 @@ static void endio(struct bio *bio)
{
struct io *io;
unsigned region;
- int error;
+ blk_status_t error;
- if (bio->bi_error && bio_data_dir(bio) == READ)
+ if (bio->bi_status && bio_data_dir(bio) == READ)
zero_fill_bio(bio);
/*
@@ -147,7 +148,7 @@ static void endio(struct bio *bio)
*/
retrieve_io_and_region_from_bio(bio, &io, &region);
- error = bio->bi_error;
+ error = bio->bi_status;
bio_put(bio);
dec_count(io, region, error);
@@ -317,9 +318,9 @@ static void do_region(int op, int op_flags, unsigned region,
else if (op == REQ_OP_WRITE_SAME)
special_cmd_max_sectors = q->limits.max_write_same_sectors;
if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
- op == REQ_OP_WRITE_SAME) &&
- special_cmd_max_sectors == 0) {
- dec_count(io, region, -EOPNOTSUPP);
+ op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) {
+ atomic_inc(&io->count);
+ dec_count(io, region, BLK_STS_NOTSUPP);
return;
}
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 0555b4410e05..41852ae287a5 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1710,12 +1710,13 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
}
/*
- * Try to avoid low memory issues when a device is suspended.
+ * Use __GFP_HIGH to avoid low memory issues when a device is
+ * suspended and the ioctl is needed to resume it.
* Use kmalloc() rather than vmalloc() when we can.
*/
dmi = NULL;
noio_flag = memalloc_noio_save();
- dmi = kvmalloc(param_kernel->data_size, GFP_KERNEL);
+ dmi = kvmalloc(param_kernel->data_size, GFP_KERNEL | __GFP_HIGH);
memalloc_noio_restore(noio_flag);
if (!dmi) {
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 4dfe38655a49..a1da0eb58a93 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -150,10 +150,10 @@ static void log_end_io(struct bio *bio)
{
struct log_writes_c *lc = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
unsigned long flags;
- DMERR("Error writing log block, error=%d", bio->bi_error);
+ DMERR("Error writing log block, error=%d", bio->bi_status);
spin_lock_irqsave(&lc->blocks_lock, flags);
lc->logging_enabled = false;
spin_unlock_irqrestore(&lc->blocks_lock, flags);
@@ -586,7 +586,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
spin_lock_irq(&lc->blocks_lock);
lc->logging_enabled = false;
spin_unlock_irq(&lc->blocks_lock);
- return -ENOMEM;
+ return DM_MAPIO_KILL;
}
INIT_LIST_HEAD(&block->list);
pb->block = block;
@@ -639,7 +639,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
spin_lock_irq(&lc->blocks_lock);
lc->logging_enabled = false;
spin_unlock_irq(&lc->blocks_lock);
- return -ENOMEM;
+ return DM_MAPIO_KILL;
}
src = kmap_atomic(bv.bv_page);
@@ -664,7 +664,8 @@ map_bio:
return DM_MAPIO_REMAPPED;
}
-static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int normal_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct log_writes_c *lc = ti->private;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
@@ -686,7 +687,7 @@ static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
spin_unlock_irqrestore(&lc->blocks_lock, flags);
}
- return error;
+ return DM_ENDIO_DONE;
}
/*
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 926a6bcb32c8..0e8ab5bb3575 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -447,7 +447,7 @@ failed:
* it has been invoked.
*/
#define dm_report_EIO(m) \
-({ \
+do { \
struct mapped_device *md = dm_table_get_md((m)->ti->table); \
\
pr_debug("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d\n", \
@@ -455,8 +455,7 @@ failed:
test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \
test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \
dm_noflush_suspending((m)->ti)); \
- -EIO; \
-})
+} while (0)
/*
* Map cloned requests (request-based multipath)
@@ -481,7 +480,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
if (!pgpath) {
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
return DM_MAPIO_DELAY_REQUEUE;
- return dm_report_EIO(m); /* Failed */
+ dm_report_EIO(m); /* Failed */
+ return DM_MAPIO_KILL;
} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
if (pg_init_all_paths(m))
@@ -558,13 +558,14 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
if (!pgpath) {
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
return DM_MAPIO_REQUEUE;
- return dm_report_EIO(m);
+ dm_report_EIO(m);
+ return DM_MAPIO_KILL;
}
mpio->pgpath = pgpath;
mpio->nr_bytes = nr_bytes;
- bio->bi_error = 0;
+ bio->bi_status = 0;
bio->bi_bdev = pgpath->path.dev->bdev;
bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
@@ -620,11 +621,19 @@ static void process_queued_bios(struct work_struct *work)
blk_start_plug(&plug);
while ((bio = bio_list_pop(&bios))) {
r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
- if (r < 0 || r == DM_MAPIO_REQUEUE) {
- bio->bi_error = r;
+ switch (r) {
+ case DM_MAPIO_KILL:
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
- } else if (r == DM_MAPIO_REMAPPED)
+ break;
+ case DM_MAPIO_REQUEUE:
+ bio->bi_status = BLK_STS_DM_REQUEUE;
+ bio_endio(bio);
+ break;
+ case DM_MAPIO_REMAPPED:
generic_make_request(bio);
+ break;
+ }
}
blk_finish_plug(&plug);
}
@@ -1441,22 +1450,15 @@ static void activate_path_work(struct work_struct *work)
activate_or_offline_path(pgpath);
}
-static int noretry_error(int error)
+static int noretry_error(blk_status_t error)
{
switch (error) {
- case -EBADE:
- /*
- * EBADE signals an reservation conflict.
- * We shouldn't fail the path here as we can communicate with
- * the target. We should failover to the next path, but in
- * doing so we might be causing a ping-pong between paths.
- * So just return the reservation conflict error.
- */
- case -EOPNOTSUPP:
- case -EREMOTEIO:
- case -EILSEQ:
- case -ENODATA:
- case -ENOSPC:
+ case BLK_STS_NOTSUPP:
+ case BLK_STS_NOSPC:
+ case BLK_STS_TARGET:
+ case BLK_STS_NEXUS:
+ case BLK_STS_MEDIUM:
+ case BLK_STS_RESOURCE:
return 1;
}
@@ -1465,7 +1467,7 @@ static int noretry_error(int error)
}
static int multipath_end_io(struct dm_target *ti, struct request *clone,
- int error, union map_info *map_context)
+ blk_status_t error, union map_info *map_context)
{
struct dm_mpath_io *mpio = get_mpio(map_context);
struct pgpath *pgpath = mpio->pgpath;
@@ -1492,8 +1494,8 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
if (atomic_read(&m->nr_valid_paths) == 0 &&
!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
- if (error == -EIO)
- error = dm_report_EIO(m);
+ if (error == BLK_STS_IOERR)
+ dm_report_EIO(m);
/* complete with the original error */
r = DM_ENDIO_DONE;
}
@@ -1509,23 +1511,27 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
return r;
}
-static int do_end_io_bio(struct multipath *m, struct bio *clone,
- int error, struct dm_mpath_io *mpio)
+static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
+ blk_status_t *error)
{
+ struct multipath *m = ti->private;
+ struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
+ struct pgpath *pgpath = mpio->pgpath;
unsigned long flags;
+ int r = DM_ENDIO_DONE;
- if (!error)
- return 0; /* I/O complete */
-
- if (noretry_error(error))
- return error;
+ if (!*error || noretry_error(*error))
+ goto done;
- if (mpio->pgpath)
- fail_path(mpio->pgpath);
+ if (pgpath)
+ fail_path(pgpath);
if (atomic_read(&m->nr_valid_paths) == 0 &&
- !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
- return dm_report_EIO(m);
+ !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
+ dm_report_EIO(m);
+ *error = BLK_STS_IOERR;
+ goto done;
+ }
/* Queue for the daemon to resubmit */
dm_bio_restore(get_bio_details_from_bio(clone), clone);
@@ -1536,23 +1542,11 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone,
if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
queue_work(kmultipathd, &m->process_queued_bios);
- return DM_ENDIO_INCOMPLETE;
-}
-
-static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error)
-{
- struct multipath *m = ti->private;
- struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
- struct pgpath *pgpath;
- struct path_selector *ps;
- int r;
-
- BUG_ON(!mpio);
-
- r = do_end_io_bio(m, clone, error, mpio);
- pgpath = mpio->pgpath;
+ r = DM_ENDIO_INCOMPLETE;
+done:
if (pgpath) {
- ps = &pgpath->pg->ps;
+ struct path_selector *ps = &pgpath->pg->ps;
+
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
}
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 7d893228c40f..b4b75dad816a 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1927,7 +1927,7 @@ struct dm_raid_superblock {
/********************************************************************
* BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
*
- * FEATURE_FLAG_SUPPORTS_V190 in the features member indicates that those exist
+ * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
*/
__le32 flags; /* Flags defining array states for reshaping */
@@ -2092,6 +2092,11 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
sb->layout = cpu_to_le32(mddev->layout);
sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
+ /********************************************************************
+ * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
+ *
+ * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
+ */
sb->new_level = cpu_to_le32(mddev->new_level);
sb->new_layout = cpu_to_le32(mddev->new_layout);
sb->new_stripe_sectors = cpu_to_le32(mddev->new_chunk_sectors);
@@ -2438,8 +2443,14 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
if (!test_and_clear_bit(FirstUse, &rdev->flags)) {
- /* Retrieve device size stored in superblock to be prepared for shrink */
- rdev->sectors = le64_to_cpu(sb->sectors);
+ /*
+ * Retrieve rdev size stored in superblock to be prepared for shrink.
+ * Check extended superblock members are present otherwise the size
+ * will not be set!
+ */
+ if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190)
+ rdev->sectors = le64_to_cpu(sb->sectors);
+
rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
if (rdev->recovery_offset == MaxSector)
set_bit(In_sync, &rdev->flags);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index a95cbb80fb34..a4fbd911d566 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -145,6 +145,7 @@ static void dispatch_bios(void *context, struct bio_list *bio_list)
struct dm_raid1_bio_record {
struct mirror *m;
+ /* if details->bi_bdev == NULL, details were not saved */
struct dm_bio_details details;
region_t write_region;
};
@@ -260,7 +261,7 @@ static int mirror_flush(struct dm_target *ti)
struct mirror *m;
struct dm_io_request io_req = {
.bi_op = REQ_OP_WRITE,
- .bi_op_flags = REQ_PREFLUSH,
+ .bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
.mem.type = DM_IO_KMEM,
.mem.ptr.addr = NULL,
.client = ms->io_client,
@@ -490,9 +491,9 @@ static void hold_bio(struct mirror_set *ms, struct bio *bio)
* If device is suspended, complete the bio.
*/
if (dm_noflush_suspending(ms->ti))
- bio->bi_error = DM_ENDIO_REQUEUE;
+ bio->bi_status = BLK_STS_DM_REQUEUE;
else
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return;
@@ -626,7 +627,7 @@ static void write_callback(unsigned long error, void *context)
* degrade the array.
*/
if (bio_op(bio) == REQ_OP_DISCARD) {
- bio->bi_error = -EOPNOTSUPP;
+ bio->bi_status = BLK_STS_NOTSUPP;
bio_endio(bio);
return;
}
@@ -1198,6 +1199,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
struct dm_raid1_bio_record *bio_record =
dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
+ bio_record->details.bi_bdev = NULL;
+
if (rw == WRITE) {
/* Save region for mirror_end_io() handler */
bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio);
@@ -1207,14 +1210,14 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0);
if (r < 0 && r != -EWOULDBLOCK)
- return r;
+ return DM_MAPIO_KILL;
/*
* If region is not in-sync queue the bio.
*/
if (!r || (r == -EWOULDBLOCK)) {
if (bio->bi_opf & REQ_RAHEAD)
- return -EWOULDBLOCK;
+ return DM_MAPIO_KILL;
queue_bio(ms, bio, rw);
return DM_MAPIO_SUBMITTED;
@@ -1226,7 +1229,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
*/
m = choose_mirror(ms, bio->bi_iter.bi_sector);
if (unlikely(!m))
- return -EIO;
+ return DM_MAPIO_KILL;
dm_bio_record(&bio_record->details, bio);
bio_record->m = m;
@@ -1236,7 +1239,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
-static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int mirror_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
int rw = bio_data_dir(bio);
struct mirror_set *ms = (struct mirror_set *) ti->private;
@@ -1252,16 +1256,26 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
if (!(bio->bi_opf & REQ_PREFLUSH) &&
bio_op(bio) != REQ_OP_DISCARD)
dm_rh_dec(ms->rh, bio_record->write_region);
- return error;
+ return DM_ENDIO_DONE;
}
- if (error == -EOPNOTSUPP)
- return error;
+ if (*error == BLK_STS_NOTSUPP)
+ goto out;
+
+ if (bio->bi_opf & REQ_RAHEAD)
+ goto out;
- if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
- return error;
+ if (unlikely(*error)) {
+ if (!bio_record->details.bi_bdev) {
+ /*
+ * There wasn't enough memory to record necessary
+ * information for a retry or there was no other
+ * mirror in-sync.
+ */
+ DMERR_LIMIT("Mirror read failed.");
+ return DM_ENDIO_DONE;
+ }
- if (unlikely(error)) {
m = bio_record->m;
DMERR("Mirror read failed from %s. Trying alternative device.",
@@ -1277,7 +1291,8 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
bd = &bio_record->details;
dm_bio_restore(bd, bio);
- bio->bi_error = 0;
+ bio_record->details.bi_bdev = NULL;
+ bio->bi_status = 0;
queue_bio(ms, bio, rw);
return DM_ENDIO_INCOMPLETE;
@@ -1285,7 +1300,10 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
DMERR("All replicated volumes dead, failing I/O");
}
- return error;
+out:
+ bio_record->details.bi_bdev = NULL;
+
+ return DM_ENDIO_DONE;
}
static void mirror_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 2af27026aa2e..c6ebc5b1e00e 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -71,7 +71,7 @@ static void dm_old_start_queue(struct request_queue *q)
static void dm_mq_start_queue(struct request_queue *q)
{
- blk_mq_start_stopped_hw_queues(q, true);
+ blk_mq_unquiesce_queue(q);
blk_mq_kick_requeue_list(q);
}
@@ -119,7 +119,7 @@ static void end_clone_bio(struct bio *clone)
struct dm_rq_target_io *tio = info->tio;
struct bio *bio = info->orig;
unsigned int nr_bytes = info->orig->bi_iter.bi_size;
- int error = clone->bi_error;
+ blk_status_t error = clone->bi_status;
bio_put(clone);
@@ -158,7 +158,7 @@ static void end_clone_bio(struct bio *clone)
* Do not use blk_end_request() here, because it may complete
* the original request before the clone, and break the ordering.
*/
- blk_update_request(tio->orig, 0, nr_bytes);
+ blk_update_request(tio->orig, BLK_STS_OK, nr_bytes);
}
static struct dm_rq_target_io *tio_from_request(struct request *rq)
@@ -216,7 +216,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
* Must be called without clone's queue lock held,
* see end_clone_request() for more details.
*/
-static void dm_end_request(struct request *clone, int error)
+static void dm_end_request(struct request *clone, blk_status_t error)
{
int rw = rq_data_dir(clone);
struct dm_rq_target_io *tio = clone->end_io_data;
@@ -285,7 +285,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
rq_completed(md, rw, false);
}
-static void dm_done(struct request *clone, int error, bool mapped)
+static void dm_done(struct request *clone, blk_status_t error, bool mapped)
{
int r = DM_ENDIO_DONE;
struct dm_rq_target_io *tio = clone->end_io_data;
@@ -298,7 +298,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
r = rq_end_io(tio->ti, clone, error, &tio->info);
}
- if (unlikely(error == -EREMOTEIO)) {
+ if (unlikely(error == BLK_STS_TARGET)) {
if (req_op(clone) == REQ_OP_WRITE_SAME &&
!clone->q->limits.max_write_same_sectors)
disable_write_same(tio->md);
@@ -358,7 +358,7 @@ static void dm_softirq_done(struct request *rq)
* Complete the clone and the original request with the error status
* through softirq context.
*/
-static void dm_complete_request(struct request *rq, int error)
+static void dm_complete_request(struct request *rq, blk_status_t error)
{
struct dm_rq_target_io *tio = tio_from_request(rq);
@@ -375,7 +375,7 @@ static void dm_complete_request(struct request *rq, int error)
* Target's rq_end_io() function isn't called.
* This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
*/
-static void dm_kill_unmapped_request(struct request *rq, int error)
+static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
{
rq->rq_flags |= RQF_FAILED;
dm_complete_request(rq, error);
@@ -384,7 +384,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
/*
* Called with the clone's queue lock held (in the case of .request_fn)
*/
-static void end_clone_request(struct request *clone, int error)
+static void end_clone_request(struct request *clone, blk_status_t error)
{
struct dm_rq_target_io *tio = clone->end_io_data;
@@ -401,7 +401,7 @@ static void end_clone_request(struct request *clone, int error)
static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
{
- int r;
+ blk_status_t r;
if (blk_queue_io_stat(clone->q))
clone->rq_flags |= RQF_IO_STAT;
@@ -506,7 +506,8 @@ static int map_request(struct dm_rq_target_io *tio)
break;
case DM_MAPIO_KILL:
/* The target wants to complete the I/O */
- dm_kill_unmapped_request(rq, -EIO);
+ dm_kill_unmapped_request(rq, BLK_STS_IOERR);
+ break;
default:
DMWARN("unimplemented target map return value: %d", r);
BUG();
@@ -726,7 +727,7 @@ static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
return __dm_rq_init_rq(set->driver_data, rq);
}
-static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *rq = bd->rq;
@@ -743,7 +744,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
}
if (ti->type->busy && ti->type->busy(ti))
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
dm_start_request(md, rq);
@@ -761,10 +762,10 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
rq_end_stats(md, rq);
rq_completed(md, rq_data_dir(rq), false);
blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static const struct blk_mq_ops dm_mq_ops = {
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index f0020d21b95f..9813922e4fe5 100644
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
@@ -24,7 +24,7 @@ struct dm_rq_target_io {
struct dm_target *ti;
struct request *orig, *clone;
struct kthread_work work;
- int error;
+ blk_status_t error;
union map_info info;
struct dm_stats_aux stats_aux;
unsigned long duration_jiffies;
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index b93476c3ba3f..c5534d294773 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -741,7 +741,8 @@ static void persistent_commit_exception(struct dm_exception_store *store,
/*
* Commit exceptions to disk.
*/
- if (ps->valid && area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA))
+ if (ps->valid && area_io(ps, REQ_OP_WRITE,
+ REQ_PREFLUSH | REQ_FUA | REQ_SYNC))
ps->valid = 0;
/*
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index e152d9817c81..1ba41048b438 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1590,7 +1590,7 @@ static void full_bio_end_io(struct bio *bio)
{
void *callback_data = bio->bi_private;
- dm_kcopyd_do_callback(callback_data, 0, bio->bi_error ? 1 : 0);
+ dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0);
}
static void start_full_bio(struct dm_snap_pending_exception *pe,
@@ -1690,7 +1690,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
/* Full snapshots are not usable */
/* To get here the table must be live so s->active is always set. */
if (!s->valid)
- return -EIO;
+ return DM_MAPIO_KILL;
/* FIXME: should only take write lock if we need
* to copy an exception */
@@ -1698,7 +1698,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
if (!s->valid || (unlikely(s->snapshot_overflowed) &&
bio_data_dir(bio) == WRITE)) {
- r = -EIO;
+ r = DM_MAPIO_KILL;
goto out_unlock;
}
@@ -1723,7 +1723,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
if (!s->valid || s->snapshot_overflowed) {
free_pending_exception(pe);
- r = -EIO;
+ r = DM_MAPIO_KILL;
goto out_unlock;
}
@@ -1741,7 +1741,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
DMERR("Snapshot overflowed: Unable to allocate exception.");
} else
__invalidate_snapshot(s, -ENOMEM);
- r = -EIO;
+ r = DM_MAPIO_KILL;
goto out_unlock;
}
}
@@ -1851,14 +1851,15 @@ out_unlock:
return r;
}
-static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct dm_snapshot *s = ti->private;
if (is_bio_tracked(bio))
stop_tracking_chunk(s, bio);
- return 0;
+ return DM_ENDIO_DONE;
}
static void snapshot_merge_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 75152482f3ad..11621a0af887 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -375,20 +375,21 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
}
}
-static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int stripe_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
unsigned i;
char major_minor[16];
struct stripe_c *sc = ti->private;
- if (!error)
- return 0; /* I/O complete */
+ if (!*error)
+ return DM_ENDIO_DONE; /* I/O complete */
- if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
- return error;
+ if (bio->bi_opf & REQ_RAHEAD)
+ return DM_ENDIO_DONE;
- if (error == -EOPNOTSUPP)
- return error;
+ if (*error == BLK_STS_NOTSUPP)
+ return DM_ENDIO_DONE;
memset(major_minor, 0, sizeof(major_minor));
sprintf(major_minor, "%d:%d",
@@ -409,7 +410,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
schedule_work(&sc->trigger_event);
}
- return error;
+ return DM_ENDIO_DONE;
}
static int stripe_iterate_devices(struct dm_target *ti,
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index b242b750542f..c0d7e60820c4 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -128,7 +128,7 @@ static void io_err_dtr(struct dm_target *tt)
static int io_err_map(struct dm_target *tt, struct bio *bio)
{
- return -EIO;
+ return DM_MAPIO_KILL;
}
static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 0f0251d0d337..d31d18d9727c 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -484,11 +484,11 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd)
if (r < 0)
return r;
- r = save_sm_roots(pmd);
+ r = dm_tm_pre_commit(pmd->tm);
if (r < 0)
return r;
- r = dm_tm_pre_commit(pmd->tm);
+ r = save_sm_roots(pmd);
if (r < 0)
return r;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 17ad50daed08..9dec2f8cc739 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -383,8 +383,8 @@ static void end_discard(struct discard_op *op, int r)
* Even if r is set, there could be sub discards in flight that we
* need to wait for.
*/
- if (r && !op->parent_bio->bi_error)
- op->parent_bio->bi_error = r;
+ if (r && !op->parent_bio->bi_status)
+ op->parent_bio->bi_status = errno_to_blk_status(r);
bio_endio(op->parent_bio);
}
@@ -450,22 +450,20 @@ static void cell_release_no_holder(struct pool *pool,
}
static void cell_error_with_code(struct pool *pool,
- struct dm_bio_prison_cell *cell, int error_code)
+ struct dm_bio_prison_cell *cell, blk_status_t error_code)
{
dm_cell_error(pool->prison, cell, error_code);
dm_bio_prison_free_cell(pool->prison, cell);
}
-static int get_pool_io_error_code(struct pool *pool)
+static blk_status_t get_pool_io_error_code(struct pool *pool)
{
- return pool->out_of_data_space ? -ENOSPC : -EIO;
+ return pool->out_of_data_space ? BLK_STS_NOSPC : BLK_STS_IOERR;
}
static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
{
- int error = get_pool_io_error_code(pool);
-
- cell_error_with_code(pool, cell, error);
+ cell_error_with_code(pool, cell, get_pool_io_error_code(pool));
}
static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
@@ -475,7 +473,7 @@ static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
{
- cell_error_with_code(pool, cell, DM_ENDIO_REQUEUE);
+ cell_error_with_code(pool, cell, BLK_STS_DM_REQUEUE);
}
/*----------------------------------------------------------------*/
@@ -555,17 +553,18 @@ static void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
bio_list_init(master);
}
-static void error_bio_list(struct bio_list *bios, int error)
+static void error_bio_list(struct bio_list *bios, blk_status_t error)
{
struct bio *bio;
while ((bio = bio_list_pop(bios))) {
- bio->bi_error = error;
+ bio->bi_status = error;
bio_endio(bio);
}
}
-static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error)
+static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
+ blk_status_t error)
{
struct bio_list bios;
unsigned long flags;
@@ -608,11 +607,11 @@ static void requeue_io(struct thin_c *tc)
__merge_bio_list(&bios, &tc->retry_on_resume_list);
spin_unlock_irqrestore(&tc->lock, flags);
- error_bio_list(&bios, DM_ENDIO_REQUEUE);
+ error_bio_list(&bios, BLK_STS_DM_REQUEUE);
requeue_deferred_cells(tc);
}
-static void error_retry_list_with_code(struct pool *pool, int error)
+static void error_retry_list_with_code(struct pool *pool, blk_status_t error)
{
struct thin_c *tc;
@@ -624,9 +623,7 @@ static void error_retry_list_with_code(struct pool *pool, int error)
static void error_retry_list(struct pool *pool)
{
- int error = get_pool_io_error_code(pool);
-
- error_retry_list_with_code(pool, error);
+ error_retry_list_with_code(pool, get_pool_io_error_code(pool));
}
/*
@@ -774,7 +771,7 @@ struct dm_thin_new_mapping {
*/
atomic_t prepare_actions;
- int err;
+ blk_status_t status;
struct thin_c *tc;
dm_block_t virt_begin, virt_end;
dm_block_t data_block;
@@ -814,7 +811,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
{
struct dm_thin_new_mapping *m = context;
- m->err = read_err || write_err ? -EIO : 0;
+ m->status = read_err || write_err ? BLK_STS_IOERR : 0;
complete_mapping_preparation(m);
}
@@ -825,7 +822,7 @@ static void overwrite_endio(struct bio *bio)
bio->bi_end_io = m->saved_bi_end_io;
- m->err = bio->bi_error;
+ m->status = bio->bi_status;
complete_mapping_preparation(m);
}
@@ -925,7 +922,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
struct bio *bio = m->bio;
int r;
- if (m->err) {
+ if (m->status) {
cell_error(pool, m->cell);
goto out;
}
@@ -1094,6 +1091,19 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
return;
}
+ /*
+ * Increment the unmapped blocks. This prevents a race between the
+ * passdown io and reallocation of freed blocks.
+ */
+ r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
+ if (r) {
+ metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
+ bio_io_error(m->bio);
+ cell_defer_no_holder(tc, m->cell);
+ mempool_free(m, pool->mapping_pool);
+ return;
+ }
+
discard_parent = bio_alloc(GFP_NOIO, 1);
if (!discard_parent) {
DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.",
@@ -1114,19 +1124,6 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
end_discard(&op, r);
}
}
-
- /*
- * Increment the unmapped blocks. This prevents a race between the
- * passdown io and reallocation of freed blocks.
- */
- r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
- if (r) {
- metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
- bio_io_error(m->bio);
- cell_defer_no_holder(tc, m->cell);
- mempool_free(m, pool->mapping_pool);
- return;
- }
}
static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
@@ -1495,7 +1492,7 @@ static void retry_on_resume(struct bio *bio)
spin_unlock_irqrestore(&tc->lock, flags);
}
-static int should_error_unserviceable_bio(struct pool *pool)
+static blk_status_t should_error_unserviceable_bio(struct pool *pool)
{
enum pool_mode m = get_pool_mode(pool);
@@ -1503,27 +1500,27 @@ static int should_error_unserviceable_bio(struct pool *pool)
case PM_WRITE:
/* Shouldn't get here */
DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
- return -EIO;
+ return BLK_STS_IOERR;
case PM_OUT_OF_DATA_SPACE:
- return pool->pf.error_if_no_space ? -ENOSPC : 0;
+ return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
case PM_READ_ONLY:
case PM_FAIL:
- return -EIO;
+ return BLK_STS_IOERR;
default:
/* Shouldn't get here */
DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
- return -EIO;
+ return BLK_STS_IOERR;
}
}
static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
{
- int error = should_error_unserviceable_bio(pool);
+ blk_status_t error = should_error_unserviceable_bio(pool);
if (error) {
- bio->bi_error = error;
+ bio->bi_status = error;
bio_endio(bio);
} else
retry_on_resume(bio);
@@ -1533,7 +1530,7 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c
{
struct bio *bio;
struct bio_list bios;
- int error;
+ blk_status_t error;
error = should_error_unserviceable_bio(pool);
if (error) {
@@ -2071,7 +2068,8 @@ static void process_thin_deferred_bios(struct thin_c *tc)
unsigned count = 0;
if (tc->requeue_mode) {
- error_thin_bio_list(tc, &tc->deferred_bio_list, DM_ENDIO_REQUEUE);
+ error_thin_bio_list(tc, &tc->deferred_bio_list,
+ BLK_STS_DM_REQUEUE);
return;
}
@@ -2322,7 +2320,7 @@ static void do_no_space_timeout(struct work_struct *ws)
if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
pool->pf.error_if_no_space = true;
notify_of_pool_mode_change_to_oods(pool);
- error_retry_list_with_code(pool, -ENOSPC);
+ error_retry_list_with_code(pool, BLK_STS_NOSPC);
}
}
@@ -2624,7 +2622,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
thin_hook_bio(tc, bio);
if (tc->requeue_mode) {
- bio->bi_error = DM_ENDIO_REQUEUE;
+ bio->bi_status = BLK_STS_DM_REQUEUE;
bio_endio(bio);
return DM_MAPIO_SUBMITTED;
}
@@ -4177,7 +4175,8 @@ static int thin_map(struct dm_target *ti, struct bio *bio)
return thin_bio_map(ti, bio);
}
-static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
+static int thin_endio(struct dm_target *ti, struct bio *bio,
+ blk_status_t *err)
{
unsigned long flags;
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@ -4212,7 +4211,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
if (h->cell)
cell_defer_no_holder(h->tc, h->cell);
- return 0;
+ return DM_ENDIO_DONE;
}
static void thin_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 97de961a3bfc..b46705ebf01f 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -166,7 +166,7 @@ static int verity_hash_init(struct dm_verity *v, struct ahash_request *req,
return r;
}
- if (likely(v->version >= 1))
+ if (likely(v->salt_size && (v->version >= 1)))
r = verity_hash_update(v, req, v->salt, v->salt_size, res);
return r;
@@ -177,7 +177,7 @@ static int verity_hash_final(struct dm_verity *v, struct ahash_request *req,
{
int r;
- if (unlikely(!v->version)) {
+ if (unlikely(v->salt_size && (!v->version))) {
r = verity_hash_update(v, req, v->salt, v->salt_size, res);
if (r < 0) {
@@ -538,13 +538,13 @@ static int verity_verify_io(struct dm_verity_io *io)
/*
* End one "io" structure with a given error.
*/
-static void verity_finish_io(struct dm_verity_io *io, int error)
+static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
{
struct dm_verity *v = io->v;
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
bio->bi_end_io = io->orig_bi_end_io;
- bio->bi_error = error;
+ bio->bi_status = status;
verity_fec_finish_io(io);
@@ -555,15 +555,15 @@ static void verity_work(struct work_struct *w)
{
struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
- verity_finish_io(io, verity_verify_io(io));
+ verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
}
static void verity_end_io(struct bio *bio)
{
struct dm_verity_io *io = bio->bi_private;
- if (bio->bi_error && !verity_fec_is_enabled(io->v)) {
- verity_finish_io(io, bio->bi_error);
+ if (bio->bi_status && !verity_fec_is_enabled(io->v)) {
+ verity_finish_io(io, bio->bi_status);
return;
}
@@ -643,17 +643,17 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
DMERR_LIMIT("unaligned io");
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (bio_end_sector(bio) >>
(v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
DMERR_LIMIT("io out of range");
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (bio_data_dir(bio) == WRITE)
- return -EIO;
+ return DM_MAPIO_KILL;
io = dm_per_bio_data(bio, ti->per_io_data_size);
io->v = v;
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index b616f11d8473..b65ca8dcfbdc 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -39,7 +39,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
case REQ_OP_READ:
if (bio->bi_opf & REQ_RAHEAD) {
/* readahead of null bytes only wastes buffer cache */
- return -EIO;
+ return DM_MAPIO_KILL;
}
zero_fill_bio(bio);
break;
@@ -47,7 +47,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
/* writes get silently dropped */
break;
default:
- return -EIO;
+ return DM_MAPIO_KILL;
}
bio_endio(bio);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6ef9500226c0..402946035308 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -63,7 +63,7 @@ static struct workqueue_struct *deferred_remove_workqueue;
*/
struct dm_io {
struct mapped_device *md;
- int error;
+ blk_status_t status;
atomic_t io_count;
struct bio *bio;
unsigned long start_time;
@@ -768,23 +768,24 @@ static int __noflush_suspending(struct mapped_device *md)
* Decrements the number of outstanding ios that a bio has been
* cloned into, completing the original io if necc.
*/
-static void dec_pending(struct dm_io *io, int error)
+static void dec_pending(struct dm_io *io, blk_status_t error)
{
unsigned long flags;
- int io_error;
+ blk_status_t io_error;
struct bio *bio;
struct mapped_device *md = io->md;
/* Push-back supersedes any I/O errors */
if (unlikely(error)) {
spin_lock_irqsave(&io->endio_lock, flags);
- if (!(io->error > 0 && __noflush_suspending(md)))
- io->error = error;
+ if (!(io->status == BLK_STS_DM_REQUEUE &&
+ __noflush_suspending(md)))
+ io->status = error;
spin_unlock_irqrestore(&io->endio_lock, flags);
}
if (atomic_dec_and_test(&io->io_count)) {
- if (io->error == DM_ENDIO_REQUEUE) {
+ if (io->status == BLK_STS_DM_REQUEUE) {
/*
* Target requested pushing back the I/O.
*/
@@ -793,16 +794,16 @@ static void dec_pending(struct dm_io *io, int error)
bio_list_add_head(&md->deferred, io->bio);
else
/* noflush suspend was interrupted. */
- io->error = -EIO;
+ io->status = BLK_STS_IOERR;
spin_unlock_irqrestore(&md->deferred_lock, flags);
}
- io_error = io->error;
+ io_error = io->status;
bio = io->bio;
end_io_acct(io);
free_io(md, io);
- if (io_error == DM_ENDIO_REQUEUE)
+ if (io_error == BLK_STS_DM_REQUEUE)
return;
if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
@@ -814,7 +815,7 @@ static void dec_pending(struct dm_io *io, int error)
queue_io(md, bio);
} else {
/* done with normal IO or empty flush */
- bio->bi_error = io_error;
+ bio->bi_status = io_error;
bio_endio(bio);
}
}
@@ -838,31 +839,13 @@ void disable_write_zeroes(struct mapped_device *md)
static void clone_endio(struct bio *bio)
{
- int error = bio->bi_error;
- int r = error;
+ blk_status_t error = bio->bi_status;
struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
struct dm_io *io = tio->io;
struct mapped_device *md = tio->io->md;
dm_endio_fn endio = tio->ti->type->end_io;
- if (endio) {
- r = endio(tio->ti, bio, error);
- if (r < 0 || r == DM_ENDIO_REQUEUE)
- /*
- * error and requeue request are handled
- * in dec_pending().
- */
- error = r;
- else if (r == DM_ENDIO_INCOMPLETE)
- /* The target will handle the io */
- return;
- else if (r) {
- DMWARN("unimplemented target endio return value: %d", r);
- BUG();
- }
- }
-
- if (unlikely(r == -EREMOTEIO)) {
+ if (unlikely(error == BLK_STS_TARGET)) {
if (bio_op(bio) == REQ_OP_WRITE_SAME &&
!bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
disable_write_same(md);
@@ -871,6 +854,23 @@ static void clone_endio(struct bio *bio)
disable_write_zeroes(md);
}
+ if (endio) {
+ int r = endio(tio->ti, bio, &error);
+ switch (r) {
+ case DM_ENDIO_REQUEUE:
+ error = BLK_STS_DM_REQUEUE;
+ /*FALLTHRU*/
+ case DM_ENDIO_DONE:
+ break;
+ case DM_ENDIO_INCOMPLETE:
+ /* The target will handle the io */
+ return;
+ default:
+ DMWARN("unimplemented target endio return value: %d", r);
+ BUG();
+ }
+ }
+
free_tio(tio);
dec_pending(io, error);
}
@@ -1036,7 +1036,8 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
while ((bio = bio_list_pop(&list))) {
struct bio_set *bs = bio->bi_pool;
- if (unlikely(!bs) || bs == fs_bio_set) {
+ if (unlikely(!bs) || bs == fs_bio_set ||
+ !bs->rescue_workqueue) {
bio_list_add(&current->bio_list[i], bio);
continue;
}
@@ -1084,18 +1085,24 @@ static void __map_bio(struct dm_target_io *tio)
r = ti->type->map(ti, clone);
dm_offload_end(&o);
- if (r == DM_MAPIO_REMAPPED) {
+ switch (r) {
+ case DM_MAPIO_SUBMITTED:
+ break;
+ case DM_MAPIO_REMAPPED:
/* the bio has been remapped so dispatch it */
-
trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
tio->io->bio->bi_bdev->bd_dev, sector);
-
generic_make_request(clone);
- } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
- /* error the io and bail out, or requeue it if needed */
- dec_pending(tio->io, r);
+ break;
+ case DM_MAPIO_KILL:
+ dec_pending(tio->io, BLK_STS_IOERR);
+ free_tio(tio);
+ break;
+ case DM_MAPIO_REQUEUE:
+ dec_pending(tio->io, BLK_STS_DM_REQUEUE);
free_tio(tio);
- } else if (r != DM_MAPIO_SUBMITTED) {
+ break;
+ default:
DMWARN("unimplemented target map return value: %d", r);
BUG();
}
@@ -1360,7 +1367,7 @@ static void __split_and_process_bio(struct mapped_device *md,
ci.map = map;
ci.md = md;
ci.io = alloc_io(md);
- ci.io->error = 0;
+ ci.io->status = 0;
atomic_set(&ci.io->io_count, 1);
ci.io->bio = bio;
ci.io->md = md;
@@ -1527,7 +1534,6 @@ void dm_init_normal_md_queue(struct mapped_device *md)
* Initialize aspects of queue that aren't relevant for blk-mq
*/
md->queue->backing_dev_info->congested_fn = dm_any_congested;
- blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
}
static void cleanup_mapped_device(struct mapped_device *md)
@@ -1657,7 +1663,7 @@ static struct mapped_device *alloc_dev(int minor)
bio_init(&md->flush_bio, NULL, 0);
md->flush_bio.bi_bdev = md->bdev;
- md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+ md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
dm_stats_init(&md->stats);
@@ -2654,7 +2660,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
BUG();
}
- pools->bs = bioset_create_nobvec(pool_size, front_pad);
+ pools->bs = bioset_create(pool_size, front_pad, BIOSET_NEED_RESCUER);
if (!pools->bs)
goto out;
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 7299ce2f08a8..03082e17c65c 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -1311,8 +1311,10 @@ static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev)
cmsg.raid_slot = cpu_to_le32(rdev->desc_nr);
lock_comm(cinfo, 1);
ret = __sendmsg(cinfo, &cmsg);
- if (ret)
+ if (ret) {
+ unlock_comm(cinfo);
return ret;
+ }
cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE;
ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX);
cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 82f798be964f..31bcbfb09fef 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -185,7 +185,7 @@ static int start_readonly;
static bool create_on_open = true;
/* bio_clone_mddev
- * like bio_clone, but with a local bio set
+ * like bio_clone_bioset, but with a local bio set
*/
struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
@@ -265,7 +265,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
unsigned int sectors;
int cpu;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if (mddev == NULL || mddev->pers == NULL) {
bio_io_error(bio);
@@ -273,7 +273,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
}
if (mddev->ro == 1 && unlikely(rw == WRITE)) {
if (bio_sectors(bio) != 0)
- bio->bi_error = -EROFS;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return BLK_QC_T_NONE;
}
@@ -719,8 +719,8 @@ static void super_written(struct bio *bio)
struct md_rdev *rdev = bio->bi_private;
struct mddev *mddev = rdev->mddev;
- if (bio->bi_error) {
- pr_err("md: super_written gets error=%d\n", bio->bi_error);
+ if (bio->bi_status) {
+ pr_err("md: super_written gets error=%d\n", bio->bi_status);
md_error(mddev, rdev);
if (!test_bit(Faulty, &rdev->flags)
&& (bio->bi_opf & MD_FAILFAST)) {
@@ -765,7 +765,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
test_bit(FailFast, &rdev->flags) &&
!test_bit(LastDev, &rdev->flags))
ff = MD_FAILFAST;
- bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA | ff;
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
atomic_inc(&mddev->pending_writes);
submit_bio(bio);
@@ -801,7 +801,7 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
submit_bio_wait(bio);
- ret = !bio->bi_error;
+ ret = !bio->bi_status;
bio_put(bio);
return ret;
}
@@ -825,7 +825,7 @@ fail:
return -EINVAL;
}
-static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
+static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
{
return sb1->set_uuid0 == sb2->set_uuid0 &&
sb1->set_uuid1 == sb2->set_uuid1 &&
@@ -833,7 +833,7 @@ static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
sb1->set_uuid3 == sb2->set_uuid3;
}
-static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
+static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
{
int ret;
mdp_super_t *tmp1, *tmp2;
@@ -1025,12 +1025,12 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
} else {
__u64 ev1, ev2;
mdp_super_t *refsb = page_address(refdev->sb_page);
- if (!uuid_equal(refsb, sb)) {
+ if (!md_uuid_equal(refsb, sb)) {
pr_warn("md: %s has different UUID to %s\n",
b, bdevname(refdev->bdev,b2));
goto abort;
}
- if (!sb_equal(refsb, sb)) {
+ if (!md_sb_equal(refsb, sb)) {
pr_warn("md: %s has same UUID but different superblock to %s\n",
b, bdevname(refdev->bdev, b2));
goto abort;
@@ -5174,6 +5174,18 @@ static void mddev_delayed_delete(struct work_struct *ws)
static void no_op(struct percpu_ref *r) {}
+int mddev_init_writes_pending(struct mddev *mddev)
+{
+ if (mddev->writes_pending.percpu_count_ptr)
+ return 0;
+ if (percpu_ref_init(&mddev->writes_pending, no_op, 0, GFP_KERNEL) < 0)
+ return -ENOMEM;
+ /* We want to start with the refcount at zero */
+ percpu_ref_put(&mddev->writes_pending);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
+
static int md_alloc(dev_t dev, char *name)
{
/*
@@ -5239,10 +5251,6 @@ static int md_alloc(dev_t dev, char *name)
blk_queue_make_request(mddev->queue, md_make_request);
blk_set_stacking_limits(&mddev->queue->limits);
- if (percpu_ref_init(&mddev->writes_pending, no_op, 0, GFP_KERNEL) < 0)
- goto abort;
- /* We want to start with the refcount at zero */
- percpu_ref_put(&mddev->writes_pending);
disk = alloc_disk(1 << shift);
if (!disk) {
blk_cleanup_queue(mddev->queue);
@@ -5420,7 +5428,7 @@ int md_run(struct mddev *mddev)
}
if (mddev->bio_set == NULL) {
- mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
+ mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!mddev->bio_set)
return -ENOMEM;
}
@@ -8022,18 +8030,15 @@ EXPORT_SYMBOL(md_write_end);
* may proceed without blocking. It is important to call this before
* attempting a GFP_KERNEL allocation while holding the mddev lock.
* Must be called with mddev_lock held.
- *
- * In the ->external case MD_SB_CHANGE_PENDING can not be cleared until mddev->lock
- * is dropped, so return -EAGAIN after notifying userspace.
*/
-int md_allow_write(struct mddev *mddev)
+void md_allow_write(struct mddev *mddev)
{
if (!mddev->pers)
- return 0;
+ return;
if (mddev->ro)
- return 0;
+ return;
if (!mddev->pers->sync_request)
- return 0;
+ return;
spin_lock(&mddev->lock);
if (mddev->in_sync) {
@@ -8046,13 +8051,12 @@ int md_allow_write(struct mddev *mddev)
spin_unlock(&mddev->lock);
md_update_sb(mddev, 0);
sysfs_notify_dirent_safe(mddev->sysfs_state);
+ /* wait for the dirty state to be recorded in the metadata */
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags) &&
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
} else
spin_unlock(&mddev->lock);
-
- if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
- return -EAGAIN;
- else
- return 0;
}
EXPORT_SYMBOL_GPL(md_allow_write);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 4e75d121bfcc..0fa1de42c42b 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -648,6 +648,7 @@ extern void md_unregister_thread(struct md_thread **threadp);
extern void md_wakeup_thread(struct md_thread *thread);
extern void md_check_recovery(struct mddev *mddev);
extern void md_reap_sync_thread(struct mddev *mddev);
+extern int mddev_init_writes_pending(struct mddev *mddev);
extern void md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_inc(struct mddev *mddev, struct bio *bi);
extern void md_write_end(struct mddev *mddev);
@@ -665,7 +666,7 @@ extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
bool metadata_op);
extern void md_do_sync(struct md_thread *thread);
extern void md_new_event(struct mddev *mddev);
-extern int md_allow_write(struct mddev *mddev);
+extern void md_allow_write(struct mddev *mddev);
extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
extern int md_check_no_bitmap(struct mddev *mddev);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index e95d521d93e9..68d036e64041 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -73,12 +73,12 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
* operation and are ready to return a success/failure code to the buffer
* cache layer.
*/
-static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err)
+static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
{
struct bio *bio = mp_bh->master_bio;
struct mpconf *conf = mp_bh->mddev->private;
- bio->bi_error = err;
+ bio->bi_status = status;
bio_endio(bio);
mempool_free(mp_bh, conf->pool);
}
@@ -89,7 +89,7 @@ static void multipath_end_request(struct bio *bio)
struct mpconf *conf = mp_bh->mddev->private;
struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
- if (!bio->bi_error)
+ if (!bio->bi_status)
multipath_end_bh_io(mp_bh, 0);
else if (!(bio->bi_opf & REQ_RAHEAD)) {
/*
@@ -102,7 +102,7 @@ static void multipath_end_request(struct bio *bio)
(unsigned long long)bio->bi_iter.bi_sector);
multipath_reschedule_retry(mp_bh);
} else
- multipath_end_bh_io(mp_bh, bio->bi_error);
+ multipath_end_bh_io(mp_bh, bio->bi_status);
rdev_dec_pending(rdev, conf->mddev);
}
@@ -347,7 +347,7 @@ static void multipathd(struct md_thread *thread)
pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
bdevname(bio->bi_bdev,b),
(unsigned long long)bio->bi_iter.bi_sector);
- multipath_end_bh_io(mp_bh, -EIO);
+ multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
} else {
pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
bdevname(bio->bi_bdev,b),
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index ebb280a14325..32adf6b4a9c7 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -142,10 +142,23 @@ static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
{
+ int r;
+ uint32_t old_count;
enum allocation_event ev;
struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
- return sm_ll_dec(&smd->ll, b, &ev);
+ r = sm_ll_dec(&smd->ll, b, &ev);
+ if (!r && (ev == SM_FREE)) {
+ /*
+ * It's only free if it's also free in the last
+ * transaction.
+ */
+ r = sm_ll_lookup(&smd->old_ll, b, &old_count);
+ if (!r && !old_count)
+ smd->nr_allocated_this_transaction--;
+ }
+
+ return r;
}
static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 84e58596594d..d6c0bc76e837 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -385,7 +385,7 @@ static int raid0_run(struct mddev *mddev)
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
- blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
+ blk_queue_max_discard_sectors(mddev->queue, UINT_MAX);
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
blk_queue_io_opt(mddev->queue,
@@ -459,6 +459,95 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
}
}
+static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
+{
+ struct r0conf *conf = mddev->private;
+ struct strip_zone *zone;
+ sector_t start = bio->bi_iter.bi_sector;
+ sector_t end;
+ unsigned int stripe_size;
+ sector_t first_stripe_index, last_stripe_index;
+ sector_t start_disk_offset;
+ unsigned int start_disk_index;
+ sector_t end_disk_offset;
+ unsigned int end_disk_index;
+ unsigned int disk;
+
+ zone = find_zone(conf, &start);
+
+ if (bio_end_sector(bio) > zone->zone_end) {
+ struct bio *split = bio_split(bio,
+ zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
+ mddev->bio_set);
+ bio_chain(split, bio);
+ generic_make_request(bio);
+ bio = split;
+ end = zone->zone_end;
+ } else
+ end = bio_end_sector(bio);
+
+ if (zone != conf->strip_zone)
+ end = end - zone[-1].zone_end;
+
+ /* Now start and end is the offset in zone */
+ stripe_size = zone->nb_dev * mddev->chunk_sectors;
+
+ first_stripe_index = start;
+ sector_div(first_stripe_index, stripe_size);
+ last_stripe_index = end;
+ sector_div(last_stripe_index, stripe_size);
+
+ start_disk_index = (int)(start - first_stripe_index * stripe_size) /
+ mddev->chunk_sectors;
+ start_disk_offset = ((int)(start - first_stripe_index * stripe_size) %
+ mddev->chunk_sectors) +
+ first_stripe_index * mddev->chunk_sectors;
+ end_disk_index = (int)(end - last_stripe_index * stripe_size) /
+ mddev->chunk_sectors;
+ end_disk_offset = ((int)(end - last_stripe_index * stripe_size) %
+ mddev->chunk_sectors) +
+ last_stripe_index * mddev->chunk_sectors;
+
+ for (disk = 0; disk < zone->nb_dev; disk++) {
+ sector_t dev_start, dev_end;
+ struct bio *discard_bio = NULL;
+ struct md_rdev *rdev;
+
+ if (disk < start_disk_index)
+ dev_start = (first_stripe_index + 1) *
+ mddev->chunk_sectors;
+ else if (disk > start_disk_index)
+ dev_start = first_stripe_index * mddev->chunk_sectors;
+ else
+ dev_start = start_disk_offset;
+
+ if (disk < end_disk_index)
+ dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
+ else if (disk > end_disk_index)
+ dev_end = last_stripe_index * mddev->chunk_sectors;
+ else
+ dev_end = end_disk_offset;
+
+ if (dev_end <= dev_start)
+ continue;
+
+ rdev = conf->devlist[(zone - conf->strip_zone) *
+ conf->strip_zone[0].nb_dev + disk];
+ if (__blkdev_issue_discard(rdev->bdev,
+ dev_start + zone->dev_start + rdev->data_offset,
+ dev_end - dev_start, GFP_NOIO, 0, &discard_bio) ||
+ !discard_bio)
+ continue;
+ bio_chain(discard_bio, bio);
+ if (mddev->gendisk)
+ trace_block_bio_remap(bdev_get_queue(rdev->bdev),
+ discard_bio, disk_devt(mddev->gendisk),
+ bio->bi_iter.bi_sector);
+ generic_make_request(discard_bio);
+ }
+ bio_endio(bio);
+}
+
static void raid0_make_request(struct mddev *mddev, struct bio *bio)
{
struct strip_zone *zone;
@@ -473,6 +562,11 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
return;
}
+ if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) {
+ raid0_handle_discard(mddev, bio);
+ return;
+ }
+
bio_sector = bio->bi_iter.bi_sector;
sector = bio_sector;
chunk_sects = mddev->chunk_sectors;
@@ -498,19 +592,13 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
bio->bi_iter.bi_sector = sector + zone->dev_start +
tmp_dev->data_offset;
- if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
- !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) {
- /* Just ignore it */
- bio_endio(bio);
- } else {
- if (mddev->gendisk)
- trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
- bio, disk_devt(mddev->gendisk),
- bio_sector);
- mddev_check_writesame(mddev, bio);
- mddev_check_write_zeroes(mddev, bio);
- generic_make_request(bio);
- }
+ if (mddev->gendisk)
+ trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
+ bio, disk_devt(mddev->gendisk),
+ bio_sector);
+ mddev_check_writesame(mddev, bio);
+ mddev_check_write_zeroes(mddev, bio);
+ generic_make_request(bio);
}
static void raid0_status(struct seq_file *seq, struct mddev *mddev)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7ed59351fe97..98ca2c1d3226 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -277,7 +277,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
struct r1conf *conf = r1_bio->mddev->private;
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
/*
@@ -335,7 +335,7 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
static void raid1_end_read_request(struct bio *bio)
{
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct r1bio *r1_bio = bio->bi_private;
struct r1conf *conf = r1_bio->mddev->private;
struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
@@ -426,12 +426,12 @@ static void raid1_end_write_request(struct bio *bio)
struct md_rdev *rdev = conf->mirrors[mirror].rdev;
bool discard_error;
- discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+ discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
/*
* 'one mirror IO has finished' event handler:
*/
- if (bio->bi_error && !discard_error) {
+ if (bio->bi_status && !discard_error) {
set_bit(WriteErrorSeen, &rdev->flags);
if (!test_and_set_bit(WantReplacement, &rdev->flags))
set_bit(MD_RECOVERY_NEEDED, &
@@ -666,8 +666,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
break;
}
continue;
- } else
+ } else {
+ if ((sectors > best_good_sectors) && (best_disk >= 0))
+ best_disk = -1;
best_good_sectors = sectors;
+ }
if (best_disk >= 0)
/* At least two disks to choose from so failfast is OK */
@@ -799,7 +802,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1529,17 +1532,16 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
plug = container_of(cb, struct raid1_plug_cb, cb);
else
plug = NULL;
- spin_lock_irqsave(&conf->device_lock, flags);
if (plug) {
bio_list_add(&plug->pending, mbio);
plug->pending_cnt++;
} else {
+ spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
- }
- spin_unlock_irqrestore(&conf->device_lock, flags);
- if (!plug)
+ spin_unlock_irqrestore(&conf->device_lock, flags);
md_wakeup_thread(mddev->thread);
+ }
}
r1_bio_write_done(r1_bio);
@@ -1854,7 +1856,7 @@ static void end_sync_read(struct bio *bio)
* or re-read if the read failed.
* We don't do much here, just schedule handling by raid1d
*/
- if (!bio->bi_error)
+ if (!bio->bi_status)
set_bit(R1BIO_Uptodate, &r1_bio->state);
if (atomic_dec_and_test(&r1_bio->remaining))
@@ -1863,7 +1865,7 @@ static void end_sync_read(struct bio *bio)
static void end_sync_write(struct bio *bio)
{
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct r1bio *r1_bio = get_resync_r1bio(bio);
struct mddev *mddev = r1_bio->mddev;
struct r1conf *conf = mddev->private;
@@ -2056,7 +2058,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
idx ++;
}
set_bit(R1BIO_Uptodate, &r1_bio->state);
- bio->bi_error = 0;
+ bio->bi_status = 0;
return 1;
}
@@ -2080,16 +2082,16 @@ static void process_checks(struct r1bio *r1_bio)
for (i = 0; i < conf->raid_disks * 2; i++) {
int j;
int size;
- int error;
+ blk_status_t status;
struct bio_vec *bi;
struct bio *b = r1_bio->bios[i];
struct resync_pages *rp = get_resync_pages(b);
if (b->bi_end_io != end_sync_read)
continue;
/* fixup the bio for reuse, but preserve errno */
- error = b->bi_error;
+ status = b->bi_status;
bio_reset(b);
- b->bi_error = error;
+ b->bi_status = status;
b->bi_vcnt = vcnt;
b->bi_iter.bi_size = r1_bio->sectors << 9;
b->bi_iter.bi_sector = r1_bio->sector +
@@ -2111,7 +2113,7 @@ static void process_checks(struct r1bio *r1_bio)
}
for (primary = 0; primary < conf->raid_disks * 2; primary++)
if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
- !r1_bio->bios[primary]->bi_error) {
+ !r1_bio->bios[primary]->bi_status) {
r1_bio->bios[primary]->bi_end_io = NULL;
rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
break;
@@ -2121,7 +2123,7 @@ static void process_checks(struct r1bio *r1_bio)
int j;
struct bio *pbio = r1_bio->bios[primary];
struct bio *sbio = r1_bio->bios[i];
- int error = sbio->bi_error;
+ blk_status_t status = sbio->bi_status;
struct page **ppages = get_resync_pages(pbio)->pages;
struct page **spages = get_resync_pages(sbio)->pages;
struct bio_vec *bi;
@@ -2130,12 +2132,12 @@ static void process_checks(struct r1bio *r1_bio)
if (sbio->bi_end_io != end_sync_read)
continue;
/* Now we can 'fixup' the error value */
- sbio->bi_error = 0;
+ sbio->bi_status = 0;
bio_for_each_segment_all(bi, sbio, j)
page_len[j] = bi->bv_len;
- if (!error) {
+ if (!status) {
for (j = vcnt; j-- ; ) {
if (memcmp(page_address(ppages[j]),
page_address(spages[j]),
@@ -2147,7 +2149,7 @@ static void process_checks(struct r1bio *r1_bio)
if (j >= 0)
atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
- && !error)) {
+ && !status)) {
/* No need to write to this device. */
sbio->bi_end_io = NULL;
rdev_dec_pending(conf->mirrors[i].rdev, mddev);
@@ -2398,11 +2400,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
struct bio *bio = r1_bio->bios[m];
if (bio->bi_end_io == NULL)
continue;
- if (!bio->bi_error &&
+ if (!bio->bi_status &&
test_bit(R1BIO_MadeGood, &r1_bio->state)) {
rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
}
- if (bio->bi_error &&
+ if (bio->bi_status &&
test_bit(R1BIO_WriteError, &r1_bio->state)) {
if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
md_error(conf->mddev, rdev);
@@ -2953,7 +2955,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
if (!conf->r1bio_pool)
goto abort;
- conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
if (!conf->bio_split)
goto abort;
@@ -3061,6 +3063,8 @@ static int raid1_run(struct mddev *mddev)
mdname(mddev));
return -EIO;
}
+ if (mddev_init_writes_pending(mddev) < 0)
+ return -ENOMEM;
/*
* copy the already verified devices into our private RAID1
* bookkeeping area. [whatever we allocate in run(),
@@ -3197,7 +3201,7 @@ static int raid1_reshape(struct mddev *mddev)
struct r1conf *conf = mddev->private;
int cnt, raid_disks;
unsigned long flags;
- int d, d2, err;
+ int d, d2;
/* Cannot change chunk_size, layout, or level */
if (mddev->chunk_sectors != mddev->new_chunk_sectors ||
@@ -3209,11 +3213,8 @@ static int raid1_reshape(struct mddev *mddev)
return -EINVAL;
}
- if (!mddev_is_clustered(mddev)) {
- err = md_allow_write(mddev);
- if (err)
- return err;
- }
+ if (!mddev_is_clustered(mddev))
+ md_allow_write(mddev);
raid_disks = mddev->raid_disks + mddev->delta_disks;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6b86a0032cf8..57a250fdbbcc 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -336,7 +336,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
struct r10conf *conf = r10_bio->mddev->private;
if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
/*
@@ -389,7 +389,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
static void raid10_end_read_request(struct bio *bio)
{
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct r10bio *r10_bio = bio->bi_private;
int slot, dev;
struct md_rdev *rdev;
@@ -477,7 +477,7 @@ static void raid10_end_write_request(struct bio *bio)
struct bio *to_put = NULL;
bool discard_error;
- discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+ discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
@@ -491,7 +491,7 @@ static void raid10_end_write_request(struct bio *bio)
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
- if (bio->bi_error && !discard_error) {
+ if (bio->bi_status && !discard_error) {
if (repl)
/* Never record new bad blocks to replacement,
* just fail it.
@@ -913,7 +913,7 @@ static void flush_pending_writes(struct r10conf *conf)
bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1098,7 +1098,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1282,17 +1282,16 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
plug = container_of(cb, struct raid10_plug_cb, cb);
else
plug = NULL;
- spin_lock_irqsave(&conf->device_lock, flags);
if (plug) {
bio_list_add(&plug->pending, mbio);
plug->pending_cnt++;
} else {
+ spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
- }
- spin_unlock_irqrestore(&conf->device_lock, flags);
- if (!plug)
+ spin_unlock_irqrestore(&conf->device_lock, flags);
md_wakeup_thread(mddev->thread);
+ }
}
static void raid10_write_request(struct mddev *mddev, struct bio *bio,
@@ -1889,7 +1888,7 @@ static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d)
{
struct r10conf *conf = r10_bio->mddev->private;
- if (!bio->bi_error)
+ if (!bio->bi_status)
set_bit(R10BIO_Uptodate, &r10_bio->state);
else
/* The write handler will notice the lack of
@@ -1973,7 +1972,7 @@ static void end_sync_write(struct bio *bio)
else
rdev = conf->mirrors[d].rdev;
- if (bio->bi_error) {
+ if (bio->bi_status) {
if (repl)
md_error(mddev, rdev);
else {
@@ -2022,7 +2021,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
/* find the first device with a block */
for (i=0; i<conf->copies; i++)
- if (!r10_bio->devs[i].bio->bi_error)
+ if (!r10_bio->devs[i].bio->bi_status)
break;
if (i == conf->copies)
@@ -2051,7 +2050,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
tpages = get_resync_pages(tbio)->pages;
d = r10_bio->devs[i].devnum;
rdev = conf->mirrors[d].rdev;
- if (!r10_bio->devs[i].bio->bi_error) {
+ if (!r10_bio->devs[i].bio->bi_status) {
/* We know that the bi_io_vec layout is the same for
* both 'first' and 'i', so we just compare them.
* All vec entries are PAGE_SIZE;
@@ -2634,7 +2633,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
rdev = conf->mirrors[dev].rdev;
if (r10_bio->devs[m].bio == NULL)
continue;
- if (!r10_bio->devs[m].bio->bi_error) {
+ if (!r10_bio->devs[m].bio->bi_status) {
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
@@ -2650,7 +2649,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
if (r10_bio->devs[m].repl_bio == NULL)
continue;
- if (!r10_bio->devs[m].repl_bio->bi_error) {
+ if (!r10_bio->devs[m].repl_bio->bi_status) {
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
@@ -2676,7 +2675,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
r10_bio->devs[m].addr,
r10_bio->sectors, 0);
rdev_dec_pending(rdev, conf->mddev);
- } else if (bio != NULL && bio->bi_error) {
+ } else if (bio != NULL && bio->bi_status) {
fail = true;
if (!narrow_write_error(r10_bio, m)) {
md_error(conf->mddev, rdev);
@@ -3268,7 +3267,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
r10_bio->devs[i].repl_bio->bi_end_io = NULL;
bio = r10_bio->devs[i].bio;
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
rcu_read_lock();
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
@@ -3310,7 +3309,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
/* Need to set up for writing to the replacement */
bio = r10_bio->devs[i].repl_bio;
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
sector = r10_bio->devs[i].addr;
bio->bi_next = biolist;
@@ -3376,7 +3375,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
if (bio->bi_end_io == end_sync_read) {
md_sync_acct(bio->bi_bdev, nr_sectors);
- bio->bi_error = 0;
+ bio->bi_status = 0;
generic_make_request(bio);
}
}
@@ -3553,7 +3552,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
if (!conf->r10bio_pool)
goto out;
- conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
if (!conf->bio_split)
goto out;
@@ -3612,6 +3611,9 @@ static int raid10_run(struct mddev *mddev)
int first = 1;
bool discard_supported = false;
+ if (mddev_init_writes_pending(mddev) < 0)
+ return -ENOMEM;
+
if (mddev->private == NULL) {
conf = setup_conf(mddev);
if (IS_ERR(conf))
@@ -4395,7 +4397,7 @@ read_more:
read_bio->bi_end_io = end_reshape_read;
bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
- read_bio->bi_error = 0;
+ read_bio->bi_status = 0;
read_bio->bi_vcnt = 0;
read_bio->bi_iter.bi_size = 0;
r10_bio->master_bio = read_bio;
@@ -4639,7 +4641,7 @@ static void end_reshape_write(struct bio *bio)
rdev = conf->mirrors[d].rdev;
}
- if (bio->bi_error) {
+ if (bio->bi_status) {
/* FIXME should record badblock */
md_error(mddev, rdev);
}
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 26ba09282e7c..bfa1e907c472 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -24,6 +24,7 @@
#include "md.h"
#include "raid5.h"
#include "bitmap.h"
+#include "raid5-log.h"
/*
* metadata/data stored in disk with 4k size unit (a block) regardless
@@ -571,7 +572,7 @@ static void r5l_log_endio(struct bio *bio)
struct r5l_log *log = io->log;
unsigned long flags;
- if (bio->bi_error)
+ if (bio->bi_status)
md_error(log->rdev->mddev, log->rdev);
bio_put(bio);
@@ -622,20 +623,30 @@ static void r5l_do_submit_io(struct r5l_log *log, struct r5l_io_unit *io)
__r5l_set_io_unit_state(io, IO_UNIT_IO_START);
spin_unlock_irqrestore(&log->io_list_lock, flags);
+ /*
+ * In case of journal device failures, submit_bio will get error
+ * and calls endio, then active stripes will continue write
+ * process. Therefore, it is not necessary to check Faulty bit
+ * of journal device here.
+ *
+ * We can't check split_bio after current_bio is submitted. If
+ * io->split_bio is null, after current_bio is submitted, current_bio
+ * might already be completed and the io_unit is freed. We submit
+ * split_bio first to avoid the issue.
+ */
+ if (io->split_bio) {
+ if (io->has_flush)
+ io->split_bio->bi_opf |= REQ_PREFLUSH;
+ if (io->has_fua)
+ io->split_bio->bi_opf |= REQ_FUA;
+ submit_bio(io->split_bio);
+ }
+
if (io->has_flush)
io->current_bio->bi_opf |= REQ_PREFLUSH;
if (io->has_fua)
io->current_bio->bi_opf |= REQ_FUA;
submit_bio(io->current_bio);
-
- if (!io->split_bio)
- return;
-
- if (io->has_flush)
- io->split_bio->bi_opf |= REQ_PREFLUSH;
- if (io->has_fua)
- io->split_bio->bi_opf |= REQ_FUA;
- submit_bio(io->split_bio);
}
/* deferred io_unit will be dispatched here */
@@ -670,6 +681,11 @@ static void r5c_disable_writeback_async(struct work_struct *work)
return;
pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n",
mdname(mddev));
+
+ /* wait superblock change before suspend */
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+
mddev_suspend(mddev);
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
mddev_resume(mddev);
@@ -1231,7 +1247,7 @@ static void r5l_log_flush_endio(struct bio *bio)
unsigned long flags;
struct r5l_io_unit *io;
- if (bio->bi_error)
+ if (bio->bi_status)
md_error(log->rdev->mddev, log->rdev);
spin_lock_irqsave(&log->io_list_lock, flags);
@@ -1766,7 +1782,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos,
mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
mb, PAGE_SIZE));
if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE,
- REQ_FUA, false)) {
+ REQ_SYNC | REQ_FUA, false)) {
__free_page(page);
return -EIO;
}
@@ -2372,7 +2388,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
mb, PAGE_SIZE));
sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page,
- REQ_OP_WRITE, REQ_FUA, false);
+ REQ_OP_WRITE, REQ_SYNC | REQ_FUA, false);
sh->log_start = ctx->pos;
list_add_tail(&sh->r5c, &log->stripe_in_journal_list);
atomic_inc(&log->stripe_in_journal_count);
@@ -2621,8 +2637,11 @@ int r5c_try_caching_write(struct r5conf *conf,
* When run in degraded mode, array is set to write-through mode.
* This check helps drain pending write safely in the transition to
* write-through mode.
+ *
+ * When a stripe is syncing, the write is also handled in write
+ * through mode.
*/
- if (s->failed) {
+ if (s->failed || test_bit(STRIPE_SYNCING, &sh->state)) {
r5c_make_stripe_write_out(sh);
return -EAGAIN;
}
@@ -2825,6 +2844,9 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
}
r5l_append_flush_payload(log, sh->sector);
+ /* stripe is flused to raid disks, we can do resync now */
+ if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
+ set_bit(STRIPE_HANDLE, &sh->state);
}
int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
@@ -2973,7 +2995,7 @@ ioerr:
return ret;
}
-void r5c_update_on_rdev_error(struct mddev *mddev)
+void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
{
struct r5conf *conf = mddev->private;
struct r5l_log *log = conf->log;
@@ -2981,7 +3003,8 @@ void r5c_update_on_rdev_error(struct mddev *mddev)
if (!log)
return;
- if (raid5_calc_degraded(conf) > 0 &&
+ if ((raid5_calc_degraded(conf) > 0 ||
+ test_bit(Journal, &rdev->flags)) &&
conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
schedule_work(&log->disable_writeback_work);
}
@@ -3040,7 +3063,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
if (!log->io_pool)
goto io_pool;
- log->bs = bioset_create(R5L_POOL_SIZE, 0);
+ log->bs = bioset_create(R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!log->bs)
goto io_bs;
diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h
index 27097101ccca..328d67aedda4 100644
--- a/drivers/md/raid5-log.h
+++ b/drivers/md/raid5-log.h
@@ -28,7 +28,8 @@ extern void r5c_flush_cache(struct r5conf *conf, int num);
extern void r5c_check_stripe_cache_usage(struct r5conf *conf);
extern void r5c_check_cached_full_stripe(struct r5conf *conf);
extern struct md_sysfs_entry r5c_journal_mode;
-extern void r5c_update_on_rdev_error(struct mddev *mddev);
+extern void r5c_update_on_rdev_error(struct mddev *mddev,
+ struct md_rdev *rdev);
extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
extern struct dma_async_tx_descriptor *
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 5d25bebf3328..77cce3573aa8 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -397,7 +397,7 @@ static void ppl_log_endio(struct bio *bio)
pr_debug("%s: seq: %llu\n", __func__, io->seq);
- if (bio->bi_error)
+ if (bio->bi_status)
md_error(ppl_conf->mddev, log->rdev);
list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
@@ -907,8 +907,8 @@ static int ppl_write_empty_header(struct ppl_log *log)
pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE));
if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset,
- PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_FUA, 0,
- false)) {
+ PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC |
+ REQ_FUA, 0, false)) {
md_error(rdev->mddev, rdev);
ret = -EIO;
}
@@ -1150,7 +1150,7 @@ int ppl_init_log(struct r5conf *conf)
goto err;
}
- ppl_conf->bs = bioset_create(conf->raid_disks, 0);
+ ppl_conf->bs = bioset_create(conf->raid_disks, 0, 0);
if (!ppl_conf->bs) {
ret = -ENOMEM;
goto err;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2e38cfac5b1d..62c965be97e1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -103,8 +103,7 @@ static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
{
int i;
- local_irq_disable();
- spin_lock(conf->hash_locks);
+ spin_lock_irq(conf->hash_locks);
for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks);
spin_lock(&conf->device_lock);
@@ -114,9 +113,9 @@ static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
{
int i;
spin_unlock(&conf->device_lock);
- for (i = NR_STRIPE_HASH_LOCKS; i; i--)
- spin_unlock(conf->hash_locks + i - 1);
- local_irq_enable();
+ for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--)
+ spin_unlock(conf->hash_locks + i);
+ spin_unlock_irq(conf->hash_locks);
}
/* Find first data disk in a raid6 stripe */
@@ -234,11 +233,15 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
if (test_bit(R5_InJournal, &sh->dev[i].flags))
injournal++;
/*
- * When quiesce in r5c write back, set STRIPE_HANDLE for stripes with
- * data in journal, so they are not released to cached lists
+ * In the following cases, the stripe cannot be released to cached
+ * lists. Therefore, we make the stripe write out and set
+ * STRIPE_HANDLE:
+ * 1. when quiesce in r5c write back;
+ * 2. when resync is requested fot the stripe.
*/
- if (conf->quiesce && r5c_is_writeback(conf->log) &&
- !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0) {
+ if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) ||
+ (conf->quiesce && r5c_is_writeback(conf->log) &&
+ !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0)) {
if (test_bit(STRIPE_R5C_CACHING, &sh->state))
r5c_make_stripe_write_out(sh);
set_bit(STRIPE_HANDLE, &sh->state);
@@ -714,12 +717,11 @@ static bool is_full_stripe_write(struct stripe_head *sh)
static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
{
- local_irq_disable();
if (sh1 > sh2) {
- spin_lock(&sh2->stripe_lock);
+ spin_lock_irq(&sh2->stripe_lock);
spin_lock_nested(&sh1->stripe_lock, 1);
} else {
- spin_lock(&sh1->stripe_lock);
+ spin_lock_irq(&sh1->stripe_lock);
spin_lock_nested(&sh2->stripe_lock, 1);
}
}
@@ -727,8 +729,7 @@ static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
{
spin_unlock(&sh1->stripe_lock);
- spin_unlock(&sh2->stripe_lock);
- local_irq_enable();
+ spin_unlock_irq(&sh2->stripe_lock);
}
/* Only freshly new full stripe normal write stripe can be added to a batch list */
@@ -2312,14 +2313,12 @@ static int resize_stripes(struct r5conf *conf, int newsize)
struct stripe_head *osh, *nsh;
LIST_HEAD(newstripes);
struct disk_info *ndisks;
- int err;
+ int err = 0;
struct kmem_cache *sc;
int i;
int hash, cnt;
- err = md_allow_write(conf->mddev);
- if (err)
- return err;
+ md_allow_write(conf->mddev);
/* Step 1 */
sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
@@ -2477,7 +2476,7 @@ static void raid5_end_read_request(struct bio * bi)
pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
- bi->bi_error);
+ bi->bi_status);
if (i == disks) {
bio_reset(bi);
BUG();
@@ -2497,7 +2496,7 @@ static void raid5_end_read_request(struct bio * bi)
s = sh->sector + rdev->new_data_offset;
else
s = sh->sector + rdev->data_offset;
- if (!bi->bi_error) {
+ if (!bi->bi_status) {
set_bit(R5_UPTODATE, &sh->dev[i].flags);
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
/* Note that this cannot happen on a
@@ -2614,7 +2613,7 @@ static void raid5_end_write_request(struct bio *bi)
}
pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
- bi->bi_error);
+ bi->bi_status);
if (i == disks) {
bio_reset(bi);
BUG();
@@ -2622,14 +2621,14 @@ static void raid5_end_write_request(struct bio *bi)
}
if (replacement) {
- if (bi->bi_error)
+ if (bi->bi_status)
md_error(conf->mddev, rdev);
else if (is_badblock(rdev, sh->sector,
STRIPE_SECTORS,
&first_bad, &bad_sectors))
set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
} else {
- if (bi->bi_error) {
+ if (bi->bi_status) {
set_bit(STRIPE_DEGRADED, &sh->state);
set_bit(WriteErrorSeen, &rdev->flags);
set_bit(R5_WriteError, &sh->dev[i].flags);
@@ -2650,7 +2649,7 @@ static void raid5_end_write_request(struct bio *bi)
}
rdev_dec_pending(rdev, conf->mddev);
- if (sh->batch_head && bi->bi_error && !replacement)
+ if (sh->batch_head && bi->bi_status && !replacement)
set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
bio_reset(bi);
@@ -2694,7 +2693,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
bdevname(rdev->bdev, b),
mdname(mddev),
conf->raid_disks - mddev->degraded);
- r5c_update_on_rdev_error(mddev);
+ r5c_update_on_rdev_error(mddev, rdev);
}
/*
@@ -3055,6 +3054,11 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
* When LOG_CRITICAL, stripes with injournal == 0 will be sent to
* no_space_stripes list.
*
+ * 3. during journal failure
+ * In journal failure, we try to flush all cached data to raid disks
+ * based on data in stripe cache. The array is read-only to upper
+ * layers, so we would skip all pending writes.
+ *
*/
static inline bool delay_towrite(struct r5conf *conf,
struct r5dev *dev,
@@ -3068,6 +3072,9 @@ static inline bool delay_towrite(struct r5conf *conf,
if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) &&
s->injournal > 0)
return true;
+ /* case 3 above */
+ if (s->log_failed && s->injournal)
+ return true;
return false;
}
@@ -3374,7 +3381,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
md_write_end(conf->mddev);
bio_endio(bi);
bi = nextbi;
@@ -3396,7 +3403,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
md_write_end(conf->mddev);
bio_endio(bi);
bi = bi2;
@@ -3422,7 +3429,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
struct bio *nextbi =
r5_next_bio(bi, sh->dev[i].sector);
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
bio_endio(bi);
bi = nextbi;
}
@@ -4078,10 +4085,15 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
set_bit(STRIPE_INSYNC, &sh->state);
else {
atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches);
- if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+ if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) {
/* don't try to repair!! */
set_bit(STRIPE_INSYNC, &sh->state);
- else {
+ pr_warn_ratelimited("%s: mismatch sector in range "
+ "%llu-%llu\n", mdname(conf->mddev),
+ (unsigned long long) sh->sector,
+ (unsigned long long) sh->sector +
+ STRIPE_SECTORS);
+ } else {
sh->check_state = check_state_compute_run;
set_bit(STRIPE_COMPUTE_RUN, &sh->state);
set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
@@ -4230,10 +4242,15 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
}
} else {
atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches);
- if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+ if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) {
/* don't try to repair!! */
set_bit(STRIPE_INSYNC, &sh->state);
- else {
+ pr_warn_ratelimited("%s: mismatch sector in range "
+ "%llu-%llu\n", mdname(conf->mddev),
+ (unsigned long long) sh->sector,
+ (unsigned long long) sh->sector +
+ STRIPE_SECTORS);
+ } else {
int *target = &sh->ops.target;
sh->ops.target = -1;
@@ -4653,8 +4670,13 @@ static void handle_stripe(struct stripe_head *sh)
if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) {
spin_lock(&sh->stripe_lock);
- /* Cannot process 'sync' concurrently with 'discard' */
- if (!test_bit(STRIPE_DISCARD, &sh->state) &&
+ /*
+ * Cannot process 'sync' concurrently with 'discard'.
+ * Flush data in r5cache before 'sync'.
+ */
+ if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) &&
+ !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) &&
+ !test_bit(STRIPE_DISCARD, &sh->state) &&
test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
set_bit(STRIPE_SYNCING, &sh->state);
clear_bit(STRIPE_INSYNC, &sh->state);
@@ -4701,10 +4723,15 @@ static void handle_stripe(struct stripe_head *sh)
" to_write=%d failed=%d failed_num=%d,%d\n",
s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
s.failed_num[0], s.failed_num[1]);
- /* check if the array has lost more than max_degraded devices and,
+ /*
+ * check if the array has lost more than max_degraded devices and,
* if so, some requests might need to be failed.
+ *
+ * When journal device failed (log_failed), we will only process
+ * the stripe if there is data need write to raid disks
*/
- if (s.failed > conf->max_degraded || s.log_failed) {
+ if (s.failed > conf->max_degraded ||
+ (s.log_failed && s.injournal == 0)) {
sh->check_state = 0;
sh->reconstruct_state = 0;
break_stripe_batch_list(sh, 0);
@@ -5127,7 +5154,7 @@ static void raid5_align_endio(struct bio *bi)
struct mddev *mddev;
struct r5conf *conf;
struct md_rdev *rdev;
- int error = bi->bi_error;
+ blk_status_t error = bi->bi_status;
bio_put(bi);
@@ -5277,8 +5304,10 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
struct stripe_head *sh, *tmp;
struct list_head *handle_list = NULL;
struct r5worker_group *wg;
- bool second_try = !r5c_is_writeback(conf->log);
- bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state);
+ bool second_try = !r5c_is_writeback(conf->log) &&
+ !r5l_log_disk_error(conf);
+ bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) ||
+ r5l_log_disk_error(conf);
again:
wg = NULL;
@@ -5702,7 +5731,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
release_stripe_plug(mddev, sh);
} else {
/* cannot get stripe for read-ahead, just give-up */
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
break;
}
}
@@ -6313,7 +6342,6 @@ int
raid5_set_cache_size(struct mddev *mddev, int size)
{
struct r5conf *conf = mddev->private;
- int err;
if (size <= 16 || size > 32768)
return -EINVAL;
@@ -6325,10 +6353,7 @@ raid5_set_cache_size(struct mddev *mddev, int size)
;
mutex_unlock(&conf->cache_size_mutex);
-
- err = md_allow_write(mddev);
- if (err)
- return err;
+ md_allow_write(mddev);
mutex_lock(&conf->cache_size_mutex);
while (size > conf->max_nr_stripes)
@@ -6918,7 +6943,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
goto abort;
}
- conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
if (!conf->bio_split)
goto abort;
conf->mddev = mddev;
@@ -7093,6 +7118,9 @@ static int raid5_run(struct mddev *mddev)
long long min_offset_diff = 0;
int first = 1;
+ if (mddev_init_writes_pending(mddev) < 0)
+ return -ENOMEM;
+
if (mddev->recovery_cp != MaxSector)
pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
mdname(mddev));
@@ -7530,7 +7558,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
* neilb: there is no locking about new writes here,
* so this cannot be safe.
*/
- if (atomic_read(&conf->active_stripes)) {
+ if (atomic_read(&conf->active_stripes) ||
+ atomic_read(&conf->r5c_cached_full_stripes) ||
+ atomic_read(&conf->r5c_cached_partial_stripes)) {
return -EBUSY;
}
log_exit(conf);
OpenPOWER on IntegriCloud