diff options
Diffstat (limited to 'drivers/md')
38 files changed, 1410 insertions, 781 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d6bf294f3907..05f82ff6f016 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -328,13 +328,6 @@ struct cached_dev { */ atomic_t has_dirty; - /* - * Set to zero by things that touch the backing volume-- except - * writeback. Incremented by writeback. Used to determine when to - * accelerate idle writeback. - */ - atomic_t backing_idle; - struct bch_ratelimit writeback_rate; struct delayed_work writeback_rate_update; @@ -423,9 +416,9 @@ struct cache { /* * When allocating new buckets, prio_write() gets first dibs - since we * may not be allocate at all without writing priorities and gens. - * prio_buckets[] contains the last buckets we wrote priorities to (so - * gc can mark them as metadata), prio_next[] contains the buckets - * allocated for the next prio write. + * prio_last_buckets[] contains the last buckets we wrote priorities to + * (so gc can mark them as metadata), prio_buckets[] contains the + * buckets allocated for the next prio write. */ uint64_t *prio_buckets; uint64_t *prio_last_buckets; @@ -474,6 +467,7 @@ struct cache { struct gc_stat { size_t nodes; + size_t nodes_pre; size_t key_bytes; size_t nkeys; @@ -514,6 +508,8 @@ struct cache_set { struct cache_accounting accounting; unsigned long flags; + atomic_t idle_counter; + atomic_t at_max_writeback_rate; struct cache_sb sb; @@ -523,8 +519,10 @@ struct cache_set { struct bcache_device **devices; unsigned devices_max_used; + atomic_t attached_dev_nr; struct list_head cached_devs; uint64_t cached_dev_sectors; + atomic_long_t flash_dev_dirty_sectors; struct closure caching; struct closure sb_write; @@ -603,6 +601,10 @@ struct cache_set { */ atomic_t rescale; /* + * used for GC, identify if any front side I/Os is inflight + */ + atomic_t search_inflight; + /* * When we invalidate buckets, we use both the priority and the amount * of good data to determine which buckets to reuse first - to weight * those together consistently we keep track of the smallest nonzero @@ -995,7 +997,7 @@ void bch_open_buckets_free(struct cache_set *); int bch_cache_allocator_start(struct cache *ca); void bch_debug_exit(void); -int bch_debug_init(struct kobject *); +void bch_debug_init(struct kobject *kobj); void bch_request_exit(void); int bch_request_init(void); diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index f3403b45bc28..596c93b44e9b 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -366,6 +366,10 @@ EXPORT_SYMBOL(bch_btree_keys_init); /* Binary tree stuff for auxiliary search trees */ +/* + * return array index next to j when does in-order traverse + * of a binary tree which is stored in a linear array + */ static unsigned inorder_next(unsigned j, unsigned size) { if (j * 2 + 1 < size) { @@ -379,6 +383,10 @@ static unsigned inorder_next(unsigned j, unsigned size) return j; } +/* + * return array index previous to j when does in-order traverse + * of a binary tree which is stored in a linear array + */ static unsigned inorder_prev(unsigned j, unsigned size) { if (j * 2 < size) { @@ -421,6 +429,10 @@ static unsigned __to_inorder(unsigned j, unsigned size, unsigned extra) return j; } +/* + * Return the cacheline index in bset_tree->data, where j is index + * from a linear array which stores the auxiliar binary tree + */ static unsigned to_inorder(unsigned j, struct bset_tree *t) { return __to_inorder(j, t->size, t->extra); @@ -441,6 +453,10 @@ static unsigned __inorder_to_tree(unsigned j, unsigned size, unsigned extra) return j; } +/* + * Return an index from a linear array which stores the auxiliar binary + * tree, j is the cacheline index of t->data. + */ static unsigned inorder_to_tree(unsigned j, struct bset_tree *t) { return __inorder_to_tree(j, t->size, t->extra); @@ -546,6 +562,20 @@ static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift) return low; } +/* + * Calculate mantissa value for struct bkey_float. + * If most significant bit of f->exponent is not set, then + * - f->exponent >> 6 is 0 + * - p[0] points to bkey->low + * - p[-1] borrows bits from KEY_INODE() of bkey->high + * if most isgnificant bits of f->exponent is set, then + * - f->exponent >> 6 is 1 + * - p[0] points to bits from KEY_INODE() of bkey->high + * - p[-1] points to other bits from KEY_INODE() of + * bkey->high too. + * See make_bfloat() to check when most significant bit of f->exponent + * is set or not. + */ static inline unsigned bfloat_mantissa(const struct bkey *k, struct bkey_float *f) { @@ -570,6 +600,16 @@ static void make_bfloat(struct bset_tree *t, unsigned j) BUG_ON(m < l || m > r); BUG_ON(bkey_next(p) != m); + /* + * If l and r have different KEY_INODE values (different backing + * device), f->exponent records how many least significant bits + * are different in KEY_INODE values and sets most significant + * bits to 1 (by +64). + * If l and r have same KEY_INODE value, f->exponent records + * how many different bits in least significant bits of bkey->low. + * See bfloat_mantiss() how the most significant bit of + * f->exponent is used to calculate bfloat mantissa value. + */ if (KEY_INODE(l) != KEY_INODE(r)) f->exponent = fls64(KEY_INODE(r) ^ KEY_INODE(l)) + 64; else @@ -633,6 +673,15 @@ void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic) } EXPORT_SYMBOL(bch_bset_init_next); +/* + * Build auxiliary binary tree 'struct bset_tree *t', this tree is used to + * accelerate bkey search in a btree node (pointed by bset_tree->data in + * memory). After search in the auxiliar tree by calling bset_search_tree(), + * a struct bset_search_iter is returned which indicates range [l, r] from + * bset_tree->data where the searching bkey might be inside. Then a followed + * linear comparison does the exact search, see __bch_bset_search() for how + * the auxiliary tree is used. + */ void bch_bset_build_written_tree(struct btree_keys *b) { struct bset_tree *t = bset_tree_last(b); @@ -898,6 +947,17 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t, unsigned inorder, j, n = 1; do { + /* + * A bit trick here. + * If p < t->size, (int)(p - t->size) is a minus value and + * the most significant bit is set, right shifting 31 bits + * gets 1. If p >= t->size, the most significant bit is + * not set, right shifting 31 bits gets 0. + * So the following 2 lines equals to + * if (p >= t->size) + * p = 0; + * but a branch instruction is avoided. + */ unsigned p = n << 4; p &= ((int) (p - t->size)) >> 31; @@ -907,6 +967,9 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t, f = &t->tree[j]; /* + * Similar bit trick, use subtract operation to avoid a branch + * instruction. + * * n = (f->mantissa > bfloat_mantissa()) * ? j * 2 * : j * 2 + 1; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 547c9eedc2f4..c19f7716df88 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -90,6 +90,9 @@ #define MAX_NEED_GC 64 #define MAX_SAVE_PRIO 72 +#define MAX_GC_TIMES 100 +#define MIN_GC_NODES 100 +#define GC_SLEEP_MS 100 #define PTR_DIRTY_BIT (((uint64_t) 1 << 36)) @@ -1008,6 +1011,13 @@ retry: BUG_ON(b->level != level); } + if (btree_node_io_error(b)) { + rw_unlock(write, b); + return ERR_PTR(-EIO); + } + + BUG_ON(!b->written); + b->parent = parent; b->accessed = 1; @@ -1019,13 +1029,6 @@ retry: for (; i <= b->keys.nsets; i++) prefetch(b->keys.set[i].data); - if (btree_node_io_error(b)) { - rw_unlock(write, b); - return ERR_PTR(-EIO); - } - - BUG_ON(!b->written); - return b; } @@ -1520,6 +1523,32 @@ static unsigned btree_gc_count_keys(struct btree *b) return ret; } +static size_t btree_gc_min_nodes(struct cache_set *c) +{ + size_t min_nodes; + + /* + * Since incremental GC would stop 100ms when front + * side I/O comes, so when there are many btree nodes, + * if GC only processes constant (100) nodes each time, + * GC would last a long time, and the front side I/Os + * would run out of the buckets (since no new bucket + * can be allocated during GC), and be blocked again. + * So GC should not process constant nodes, but varied + * nodes according to the number of btree nodes, which + * realized by dividing GC into constant(100) times, + * so when there are many btree nodes, GC can process + * more nodes each time, otherwise, GC will process less + * nodes each time (but no less than MIN_GC_NODES) + */ + min_nodes = c->gc_stats.nodes / MAX_GC_TIMES; + if (min_nodes < MIN_GC_NODES) + min_nodes = MIN_GC_NODES; + + return min_nodes; +} + + static int btree_gc_recurse(struct btree *b, struct btree_op *op, struct closure *writes, struct gc_stat *gc) { @@ -1585,6 +1614,13 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, memmove(r + 1, r, sizeof(r[0]) * (GC_MERGE_NODES - 1)); r->b = NULL; + if (atomic_read(&b->c->search_inflight) && + gc->nodes >= gc->nodes_pre + btree_gc_min_nodes(b->c)) { + gc->nodes_pre = gc->nodes; + ret = -EAGAIN; + break; + } + if (need_resched()) { ret = -EAGAIN; break; @@ -1753,7 +1789,10 @@ static void bch_btree_gc(struct cache_set *c) closure_sync(&writes); cond_resched(); - if (ret && ret != -EAGAIN) + if (ret == -EAGAIN) + schedule_timeout_interruptible(msecs_to_jiffies + (GC_SLEEP_MS)); + else if (ret) pr_warn("gc failed!"); } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags)); @@ -1834,8 +1873,14 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) do { k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); - if (k) + if (k) { btree_node_prefetch(b, k); + /* + * initiallize c->gc_stats.nodes + * for incremental GC + */ + b->c->gc_stats.nodes++; + } if (p) ret = btree(check_recurse, p, b, op); diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index d211e2c25b6b..68e9d926134d 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -152,7 +152,7 @@ static inline bool btree_node_ ## flag(struct btree *b) \ { return test_bit(BTREE_NODE_ ## flag, &b->flags); } \ \ static inline void set_btree_node_ ## flag(struct btree *b) \ -{ set_bit(BTREE_NODE_ ## flag, &b->flags); } \ +{ set_bit(BTREE_NODE_ ## flag, &b->flags); } enum btree_flags { BTREE_NODE_io_error, diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index 0e14969182c6..618253683d40 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -199,11 +199,16 @@ static const struct file_operations debug_ops = { .release = single_release }; -int __init closure_debug_init(void) +void __init closure_debug_init(void) { - closure_debug = debugfs_create_file("closures", - 0400, bcache_debug, NULL, &debug_ops); - return IS_ERR_OR_NULL(closure_debug); + if (!IS_ERR_OR_NULL(bcache_debug)) + /* + * it is unnecessary to check return value of + * debugfs_create_file(), we should not care + * about this. + */ + closure_debug = debugfs_create_file( + "closures", 0400, bcache_debug, NULL, &debug_ops); } #endif diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index 71427eb5fdae..7c2c5bc7c88b 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -186,13 +186,13 @@ static inline void closure_sync(struct closure *cl) #ifdef CONFIG_BCACHE_CLOSURES_DEBUG -int closure_debug_init(void); +void closure_debug_init(void); void closure_debug_create(struct closure *cl); void closure_debug_destroy(struct closure *cl); #else -static inline int closure_debug_init(void) { return 0; } +static inline void closure_debug_init(void) {} static inline void closure_debug_create(struct closure *cl) {} static inline void closure_debug_destroy(struct closure *cl) {} diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index d030ce3025a6..12034c07257b 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -110,11 +110,15 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) struct bio_vec bv, cbv; struct bvec_iter iter, citer = { 0 }; - check = bio_clone_kmalloc(bio, GFP_NOIO); + check = bio_kmalloc(GFP_NOIO, bio_segments(bio)); if (!check) return; + check->bi_disk = bio->bi_disk; check->bi_opf = REQ_OP_READ; + check->bi_iter.bi_sector = bio->bi_iter.bi_sector; + check->bi_iter.bi_size = bio->bi_iter.bi_size; + bch_bio_map(check, NULL); if (bch_bio_alloc_pages(check, GFP_NOIO)) goto out_put; @@ -248,11 +252,12 @@ void bch_debug_exit(void) debugfs_remove_recursive(bcache_debug); } -int __init bch_debug_init(struct kobject *kobj) +void __init bch_debug_init(struct kobject *kobj) { - if (!IS_ENABLED(CONFIG_DEBUG_FS)) - return 0; - + /* + * it is unnecessary to check return value of + * debugfs_create_file(), we should not care + * about this. + */ bcache_debug = debugfs_create_dir("bcache", NULL); - return IS_ERR_OR_NULL(bcache_debug); } diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 18f1b5239620..10748c626a1d 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -828,6 +828,7 @@ void bch_journal_free(struct cache_set *c) free_pages((unsigned long) c->journal.w[1].data, JSET_BITS); free_pages((unsigned long) c->journal.w[0].data, JSET_BITS); free_fifo(&c->journal.pin); + free_heap(&c->flush_btree); } int bch_journal_alloc(struct cache_set *c) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index ae67f5fa8047..7dbe8b6316a0 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -107,7 +107,7 @@ static int bch_keylist_realloc(struct keylist *l, unsigned u64s, /* * The journalling code doesn't handle the case where the keys to insert * is bigger than an empty write: If we just return -ENOMEM here, - * bio_insert() and bio_invalidate() will insert the keys created so far + * bch_data_insert_keys() will insert the keys created so far * and finish the rest when the keylist is empty. */ if (newsize * sizeof(uint64_t) > block_bytes(c) - sizeof(struct jset)) @@ -667,8 +667,7 @@ static void backing_request_endio(struct bio *bio) static void bio_complete(struct search *s) { if (s->orig_bio) { - generic_end_io_acct(s->d->disk->queue, - bio_data_dir(s->orig_bio), + generic_end_io_acct(s->d->disk->queue, bio_op(s->orig_bio), &s->d->disk->part0, s->start_time); trace_bcache_request_end(s->d, s->orig_bio); @@ -702,6 +701,8 @@ static void search_free(struct closure *cl) { struct search *s = container_of(cl, struct search, cl); + atomic_dec(&s->d->c->search_inflight); + if (s->iop.bio) bio_put(s->iop.bio); @@ -719,6 +720,7 @@ static inline struct search *search_alloc(struct bio *bio, closure_init(&s->cl, NULL); do_bio_hook(s, bio, request_endio); + atomic_inc(&d->c->search_inflight); s->orig_bio = bio; s->cache_miss = NULL; @@ -1062,8 +1064,7 @@ static void detached_dev_end_io(struct bio *bio) bio->bi_end_io = ddip->bi_end_io; bio->bi_private = ddip->bi_private; - generic_end_io_acct(ddip->d->disk->queue, - bio_data_dir(bio), + generic_end_io_acct(ddip->d->disk->queue, bio_op(bio), &ddip->d->disk->part0, ddip->start_time); if (bio->bi_status) { @@ -1102,6 +1103,44 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio) generic_make_request(bio); } +static void quit_max_writeback_rate(struct cache_set *c, + struct cached_dev *this_dc) +{ + int i; + struct bcache_device *d; + struct cached_dev *dc; + + /* + * mutex bch_register_lock may compete with other parallel requesters, + * or attach/detach operations on other backing device. Waiting to + * the mutex lock may increase I/O request latency for seconds or more. + * To avoid such situation, if mutext_trylock() failed, only writeback + * rate of current cached device is set to 1, and __update_write_back() + * will decide writeback rate of other cached devices (remember now + * c->idle_counter is 0 already). + */ + if (mutex_trylock(&bch_register_lock)) { + for (i = 0; i < c->devices_max_used; i++) { + if (!c->devices[i]) + continue; + + if (UUID_FLASH_ONLY(&c->uuids[i])) + continue; + + d = c->devices[i]; + dc = container_of(d, struct cached_dev, disk); + /* + * set writeback rate to default minimum value, + * then let update_writeback_rate() to decide the + * upcoming rate. + */ + atomic_long_set(&dc->writeback_rate.rate, 1); + } + mutex_unlock(&bch_register_lock); + } else + atomic_long_set(&this_dc->writeback_rate.rate, 1); +} + /* Cached devices - read & write stuff */ static blk_qc_t cached_dev_make_request(struct request_queue *q, @@ -1119,8 +1158,25 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, return BLK_QC_T_NONE; } - atomic_set(&dc->backing_idle, 0); - generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); + if (likely(d->c)) { + if (atomic_read(&d->c->idle_counter)) + atomic_set(&d->c->idle_counter, 0); + /* + * If at_max_writeback_rate of cache set is true and new I/O + * comes, quit max writeback rate of all cached devices + * attached to this cache set, and set at_max_writeback_rate + * to false. + */ + if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) { + atomic_set(&d->c->at_max_writeback_rate, 0); + quit_max_writeback_rate(d->c, dc); + } + } + + generic_start_io_acct(q, + bio_op(bio), + bio_sectors(bio), + &d->disk->part0); bio_set_dev(bio, dc->bdev); bio->bi_iter.bi_sector += dc->sb.data_offset; @@ -1229,7 +1285,6 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q, struct search *s; struct closure *cl; struct bcache_device *d = bio->bi_disk->private_data; - int rw = bio_data_dir(bio); if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) { bio->bi_status = BLK_STS_IOERR; @@ -1237,7 +1292,7 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q, return BLK_QC_T_NONE; } - generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); + generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0); s = search_alloc(bio, d); cl = &s->cl; @@ -1254,7 +1309,7 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q, flash_dev_nodata, bcache_wq); return BLK_QC_T_NONE; - } else if (rw) { + } else if (bio_data_dir(bio)) { bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &KEY(d->id, bio->bi_iter.bi_sector, 0), &KEY(d->id, bio_end_sector(bio), 0)); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index fa4058e43202..55a37641aa95 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -181,7 +181,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, goto err; } - sb->last_mount = get_seconds(); + sb->last_mount = (u32)ktime_get_real_seconds(); err = NULL; get_page(bh->b_page); @@ -696,12 +696,14 @@ static void bcache_device_detach(struct bcache_device *d) { lockdep_assert_held(&bch_register_lock); + atomic_dec(&d->c->attached_dev_nr); + if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) { struct uuid_entry *u = d->c->uuids + d->id; SET_UUID_FLASH_ONLY(u, 0); memcpy(u->uuid, invalid_uuid, 16); - u->invalidated = cpu_to_le32(get_seconds()); + u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds()); bch_uuid_write(d->c); } @@ -796,11 +798,12 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, return idx; if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio), - BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) || - !(d->disk = alloc_disk(BCACHE_MINORS))) { - ida_simple_remove(&bcache_device_idx, idx); - return -ENOMEM; - } + BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) + goto err; + + d->disk = alloc_disk(BCACHE_MINORS); + if (!d->disk) + goto err; set_capacity(d->disk, sectors); snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); @@ -834,6 +837,11 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, blk_queue_write_cache(q, true, true); return 0; + +err: + ida_simple_remove(&bcache_device_idx, idx); + return -ENOMEM; + } /* Cached device */ @@ -1027,7 +1035,7 @@ void bch_cached_dev_detach(struct cached_dev *dc) int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, uint8_t *set_uuid) { - uint32_t rtime = cpu_to_le32(get_seconds()); + uint32_t rtime = cpu_to_le32((u32)ktime_get_real_seconds()); struct uuid_entry *u; struct cached_dev *exist_dc, *t; @@ -1070,7 +1078,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, (BDEV_STATE(&dc->sb) == BDEV_STATE_STALE || BDEV_STATE(&dc->sb) == BDEV_STATE_NONE)) { memcpy(u->uuid, invalid_uuid, 16); - u->invalidated = cpu_to_le32(get_seconds()); + u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds()); u = NULL; } @@ -1138,6 +1146,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, bch_cached_dev_run(dc); bcache_device_link(&dc->disk, c, "bdev"); + atomic_inc(&c->attached_dev_nr); /* Allow the writeback thread to proceed */ up_write(&dc->writeback_lock); @@ -1285,6 +1294,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, pr_info("registered backing device %s", dc->backing_dev_name); list_add(&dc->list, &uncached_devices); + /* attach to a matched cache set if it exists */ list_for_each_entry(c, &bch_cache_sets, list) bch_cached_dev_attach(dc, c, NULL); @@ -1311,6 +1321,8 @@ static void flash_dev_free(struct closure *cl) { struct bcache_device *d = container_of(cl, struct bcache_device, cl); mutex_lock(&bch_register_lock); + atomic_long_sub(bcache_dev_sectors_dirty(d), + &d->c->flash_dev_dirty_sectors); bcache_device_free(d); mutex_unlock(&bch_register_lock); kobject_put(&d->kobj); @@ -1390,7 +1402,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size) get_random_bytes(u->uuid, 16); memset(u->label, 0, 32); - u->first_reg = u->last_reg = cpu_to_le32(get_seconds()); + u->first_reg = u->last_reg = cpu_to_le32((u32)ktime_get_real_seconds()); SET_UUID_FLASH_ONLY(u, 1); u->sectors = size >> 9; @@ -1687,6 +1699,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) c->block_bits = ilog2(sb->block_size); c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry); c->devices_max_used = 0; + atomic_set(&c->attached_dev_nr, 0); c->btree_pages = bucket_pages(c); if (c->btree_pages > BTREE_MAX_PAGES) c->btree_pages = max_t(int, c->btree_pages / 4, @@ -1894,7 +1907,7 @@ static void run_cache_set(struct cache_set *c) goto err; closure_sync(&cl); - c->sb.last_mount = get_seconds(); + c->sb.last_mount = (u32)ktime_get_real_seconds(); bcache_write_super(c); list_for_each_entry_safe(dc, t, &uncached_devices, list) @@ -2163,8 +2176,12 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!try_module_get(THIS_MODULE)) return -EBUSY; - if (!(path = kstrndup(buffer, size, GFP_KERNEL)) || - !(sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL))) + path = kstrndup(buffer, size, GFP_KERNEL); + if (!path) + goto err; + + sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL); + if (!sb) goto err; err = "failed to open device"; @@ -2324,13 +2341,21 @@ static int __init bcache_init(void) return bcache_major; } - if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) || - !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) || - bch_request_init() || - bch_debug_init(bcache_kobj) || closure_debug_init() || + bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0); + if (!bcache_wq) + goto err; + + bcache_kobj = kobject_create_and_add("bcache", fs_kobj); + if (!bcache_kobj) + goto err; + + if (bch_request_init() || sysfs_create_files(bcache_kobj, files)) goto err; + bch_debug_init(bcache_kobj); + closure_debug_init(); + return 0; err: bcache_exit(); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 225b15aa0340..81d3520b0702 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -149,6 +149,7 @@ SHOW(__bch_cached_dev) struct cached_dev *dc = container_of(kobj, struct cached_dev, disk.kobj); const char *states[] = { "no cache", "clean", "dirty", "inconsistent" }; + int wb = dc->writeback_running; #define var(stat) (dc->stat) @@ -170,7 +171,8 @@ SHOW(__bch_cached_dev) var_printf(writeback_running, "%i"); var_print(writeback_delay); var_print(writeback_percent); - sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9); + sysfs_hprint(writeback_rate, + wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0); sysfs_hprint(io_errors, atomic_read(&dc->io_errors)); sysfs_printf(io_error_limit, "%i", dc->error_limit); sysfs_printf(io_disable, "%i", dc->io_disable); @@ -188,15 +190,22 @@ SHOW(__bch_cached_dev) char change[20]; s64 next_io; - bch_hprint(rate, dc->writeback_rate.rate << 9); - bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9); - bch_hprint(target, dc->writeback_rate_target << 9); - bch_hprint(proportional,dc->writeback_rate_proportional << 9); - bch_hprint(integral, dc->writeback_rate_integral_scaled << 9); - bch_hprint(change, dc->writeback_rate_change << 9); - - next_io = div64_s64(dc->writeback_rate.next - local_clock(), - NSEC_PER_MSEC); + /* + * Except for dirty and target, other values should + * be 0 if writeback is not running. + */ + bch_hprint(rate, + wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 + : 0); + bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9); + bch_hprint(target, dc->writeback_rate_target << 9); + bch_hprint(proportional, + wb ? dc->writeback_rate_proportional << 9 : 0); + bch_hprint(integral, + wb ? dc->writeback_rate_integral_scaled << 9 : 0); + bch_hprint(change, wb ? dc->writeback_rate_change << 9 : 0); + next_io = wb ? div64_s64(dc->writeback_rate.next-local_clock(), + NSEC_PER_MSEC) : 0; return sprintf(buf, "rate:\t\t%s/sec\n" @@ -255,8 +264,19 @@ STORE(__cached_dev) sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40); - sysfs_strtoul_clamp(writeback_rate, - dc->writeback_rate.rate, 1, INT_MAX); + if (attr == &sysfs_writeback_rate) { + ssize_t ret; + long int v = atomic_long_read(&dc->writeback_rate.rate); + + ret = strtoul_safe_clamp(buf, v, 1, INT_MAX); + + if (!ret) { + atomic_long_set(&dc->writeback_rate.rate, v); + ret = size; + } + + return ret; + } sysfs_strtoul_clamp(writeback_rate_update_seconds, dc->writeback_rate_update_seconds, @@ -338,8 +358,8 @@ STORE(__cached_dev) if (!v) return size; } - - pr_err("Can't attach %s: cache set not found", buf); + if (v == -ENOENT) + pr_err("Can't attach %s: cache set not found", buf); return v; } diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index fc479b026d6d..b15256bcf0e7 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -200,7 +200,7 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done) { uint64_t now = local_clock(); - d->next += div_u64(done * NSEC_PER_SEC, d->rate); + d->next += div_u64(done * NSEC_PER_SEC, atomic_long_read(&d->rate)); /* Bound the time. Don't let us fall further than 2 seconds behind * (this prevents unnecessary backlog that would make it impossible diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index cced87f8eb27..f7b0133c9d2f 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -442,7 +442,7 @@ struct bch_ratelimit { * Rate at which we want to do work, in units per second * The units here correspond to the units passed to bch_next_delay() */ - uint32_t rate; + atomic_long_t rate; }; static inline void bch_ratelimit_reset(struct bch_ratelimit *d) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index ad45ebe1a74b..481d4cf38ac0 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -27,7 +27,7 @@ static uint64_t __calc_target_rate(struct cached_dev *dc) * flash-only devices */ uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size - - bcache_flash_devs_sectors_dirty(c); + atomic_long_read(&c->flash_dev_dirty_sectors); /* * Unfortunately there is no control of global dirty data. If the @@ -104,11 +104,56 @@ static void __update_writeback_rate(struct cached_dev *dc) dc->writeback_rate_proportional = proportional_scaled; dc->writeback_rate_integral_scaled = integral_scaled; - dc->writeback_rate_change = new_rate - dc->writeback_rate.rate; - dc->writeback_rate.rate = new_rate; + dc->writeback_rate_change = new_rate - + atomic_long_read(&dc->writeback_rate.rate); + atomic_long_set(&dc->writeback_rate.rate, new_rate); dc->writeback_rate_target = target; } +static bool set_at_max_writeback_rate(struct cache_set *c, + struct cached_dev *dc) +{ + /* + * Idle_counter is increased everytime when update_writeback_rate() is + * called. If all backing devices attached to the same cache set have + * identical dc->writeback_rate_update_seconds values, it is about 6 + * rounds of update_writeback_rate() on each backing device before + * c->at_max_writeback_rate is set to 1, and then max wrteback rate set + * to each dc->writeback_rate.rate. + * In order to avoid extra locking cost for counting exact dirty cached + * devices number, c->attached_dev_nr is used to calculate the idle + * throushold. It might be bigger if not all cached device are in write- + * back mode, but it still works well with limited extra rounds of + * update_writeback_rate(). + */ + if (atomic_inc_return(&c->idle_counter) < + atomic_read(&c->attached_dev_nr) * 6) + return false; + + if (atomic_read(&c->at_max_writeback_rate) != 1) + atomic_set(&c->at_max_writeback_rate, 1); + + atomic_long_set(&dc->writeback_rate.rate, INT_MAX); + + /* keep writeback_rate_target as existing value */ + dc->writeback_rate_proportional = 0; + dc->writeback_rate_integral_scaled = 0; + dc->writeback_rate_change = 0; + + /* + * Check c->idle_counter and c->at_max_writeback_rate agagain in case + * new I/O arrives during before set_at_max_writeback_rate() returns. + * Then the writeback rate is set to 1, and its new value should be + * decided via __update_writeback_rate(). + */ + if ((atomic_read(&c->idle_counter) < + atomic_read(&c->attached_dev_nr) * 6) || + !atomic_read(&c->at_max_writeback_rate)) + return false; + + return true; +} + static void update_writeback_rate(struct work_struct *work) { struct cached_dev *dc = container_of(to_delayed_work(work), @@ -136,13 +181,20 @@ static void update_writeback_rate(struct work_struct *work) return; } - down_read(&dc->writeback_lock); - - if (atomic_read(&dc->has_dirty) && - dc->writeback_percent) - __update_writeback_rate(dc); + if (atomic_read(&dc->has_dirty) && dc->writeback_percent) { + /* + * If the whole cache set is idle, set_at_max_writeback_rate() + * will set writeback rate to a max number. Then it is + * unncessary to update writeback rate for an idle cache set + * in maximum writeback rate number(s). + */ + if (!set_at_max_writeback_rate(c, dc)) { + down_read(&dc->writeback_lock); + __update_writeback_rate(dc); + up_read(&dc->writeback_lock); + } + } - up_read(&dc->writeback_lock); /* * CACHE_SET_IO_DISABLE might be set via sysfs interface, @@ -422,27 +474,6 @@ static void read_dirty(struct cached_dev *dc) delay = writeback_delay(dc, size); - /* If the control system would wait for at least half a - * second, and there's been no reqs hitting the backing disk - * for awhile: use an alternate mode where we have at most - * one contiguous set of writebacks in flight at a time. If - * someone wants to do IO it will be quick, as it will only - * have to contend with one operation in flight, and we'll - * be round-tripping data to the backing disk as quickly as - * it can accept it. - */ - if (delay >= HZ / 2) { - /* 3 means at least 1.5 seconds, up to 7.5 if we - * have slowed way down. - */ - if (atomic_inc_return(&dc->backing_idle) >= 3) { - /* Wait for current I/Os to finish */ - closure_sync(&cl); - /* And immediately launch a new set. */ - delay = 0; - } - } - while (!kthread_should_stop() && !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) && delay) { @@ -476,6 +507,9 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, if (!d) return; + if (UUID_FLASH_ONLY(&c->uuids[inode])) + atomic_long_add(nr_sectors, &c->flash_dev_dirty_sectors); + stripe = offset_to_stripe(d, offset); stripe_offset = offset & (d->stripe_size - 1); @@ -673,10 +707,14 @@ static int bch_writeback_thread(void *arg) } /* Init */ +#define INIT_KEYS_EACH_TIME 500000 +#define INIT_KEYS_SLEEP_MS 100 struct sectors_dirty_init { struct btree_op op; unsigned inode; + size_t count; + struct bkey start; }; static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, @@ -691,18 +729,37 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k), KEY_START(k), KEY_SIZE(k)); + op->count++; + if (atomic_read(&b->c->search_inflight) && + !(op->count % INIT_KEYS_EACH_TIME)) { + bkey_copy_key(&op->start, k); + return -EAGAIN; + } + return MAP_CONTINUE; } void bch_sectors_dirty_init(struct bcache_device *d) { struct sectors_dirty_init op; + int ret; bch_btree_op_init(&op.op, -1); op.inode = d->id; - - bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0), - sectors_dirty_init_fn, 0); + op.count = 0; + op.start = KEY(op.inode, 0, 0); + + do { + ret = bch_btree_map_keys(&op.op, d->c, &op.start, + sectors_dirty_init_fn, 0); + if (ret == -EAGAIN) + schedule_timeout_interruptible( + msecs_to_jiffies(INIT_KEYS_SLEEP_MS)); + else if (ret < 0) { + pr_warn("sectors dirty init failed, ret=%d!", ret); + break; + } + } while (ret == -EAGAIN); } void bch_cached_dev_writeback_init(struct cached_dev *dc) @@ -715,7 +772,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) dc->writeback_running = true; dc->writeback_percent = 10; dc->writeback_delay = 30; - dc->writeback_rate.rate = 1024; + atomic_long_set(&dc->writeback_rate.rate, 1024); dc->writeback_rate_minimum = 8; dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT; diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 610fb01de629..3745d7004c47 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -28,25 +28,6 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) return ret; } -static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c) -{ - uint64_t i, ret = 0; - - mutex_lock(&bch_register_lock); - - for (i = 0; i < c->devices_max_used; i++) { - struct bcache_device *d = c->devices[i]; - - if (!d || !UUID_FLASH_ONLY(&c->uuids[i])) - continue; - ret += bcache_dev_sectors_dirty(d); - } - - mutex_unlock(&bch_register_lock); - - return ret; -} - static inline unsigned offset_to_stripe(struct bcache_device *d, uint64_t offset) { diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 0d7212410e21..69dddeab124c 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -363,7 +363,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) disk_super->version = cpu_to_le32(cmd->version); memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); - disk_super->policy_hint_size = 0; + disk_super->policy_hint_size = cpu_to_le32(0); __copy_sm_root(cmd, disk_super); @@ -701,6 +701,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]); disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]); + disk_super->policy_hint_size = cpu_to_le32(cmd->policy_hint_size); disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits); disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); @@ -1322,6 +1323,7 @@ static int __load_mapping_v1(struct dm_cache_metadata *cmd, dm_oblock_t oblock; unsigned flags; + bool dirty = true; dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le); memcpy(&mapping, mapping_value_le, sizeof(mapping)); @@ -1332,8 +1334,10 @@ static int __load_mapping_v1(struct dm_cache_metadata *cmd, dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le); memcpy(&hint, hint_value_le, sizeof(hint)); } + if (cmd->clean_when_opened) + dirty = flags & M_DIRTY; - r = fn(context, oblock, to_cblock(cb), flags & M_DIRTY, + r = fn(context, oblock, to_cblock(cb), dirty, le32_to_cpu(hint), hints_valid); if (r) { DMERR("policy couldn't load cache block %llu", @@ -1361,7 +1365,7 @@ static int __load_mapping_v2(struct dm_cache_metadata *cmd, dm_oblock_t oblock; unsigned flags; - bool dirty; + bool dirty = true; dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le); memcpy(&mapping, mapping_value_le, sizeof(mapping)); @@ -1372,8 +1376,9 @@ static int __load_mapping_v2(struct dm_cache_metadata *cmd, dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le); memcpy(&hint, hint_value_le, sizeof(hint)); } + if (cmd->clean_when_opened) + dirty = dm_bitset_cursor_get_value(dirty_cursor); - dirty = dm_bitset_cursor_get_value(dirty_cursor); r = fn(context, oblock, to_cblock(cb), dirty, le32_to_cpu(hint), hints_valid); if (r) { diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index ce14a3d1f609..a53413371725 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1188,9 +1188,8 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) queue_continuation(mg->cache->wq, &mg->k); } -static int copy(struct dm_cache_migration *mg, bool promote) +static void copy(struct dm_cache_migration *mg, bool promote) { - int r; struct dm_io_region o_region, c_region; struct cache *cache = mg->cache; @@ -1203,11 +1202,9 @@ static int copy(struct dm_cache_migration *mg, bool promote) c_region.count = cache->sectors_per_block; if (promote) - r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k); + dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k); else - r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k); - - return r; + dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k); } static void bio_drop_shared_lock(struct cache *cache, struct bio *bio) @@ -1449,12 +1446,7 @@ static void mg_full_copy(struct work_struct *ws) } init_continuation(&mg->k, mg_upgrade_lock); - - if (copy(mg, is_policy_promote)) { - DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache)); - mg->k.input = BLK_STS_IOERR; - mg_complete(mg, false); - } + copy(mg, is_policy_promote); } static void mg_copy(struct work_struct *ws) @@ -2250,7 +2242,7 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, {0, 2, "Invalid number of cache feature arguments"}, }; - int r; + int r, mode_ctr = 0; unsigned argc; const char *arg; struct cache_features *cf = &ca->features; @@ -2264,14 +2256,20 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, while (argc--) { arg = dm_shift_arg(as); - if (!strcasecmp(arg, "writeback")) + if (!strcasecmp(arg, "writeback")) { cf->io_mode = CM_IO_WRITEBACK; + mode_ctr++; + } - else if (!strcasecmp(arg, "writethrough")) + else if (!strcasecmp(arg, "writethrough")) { cf->io_mode = CM_IO_WRITETHROUGH; + mode_ctr++; + } - else if (!strcasecmp(arg, "passthrough")) + else if (!strcasecmp(arg, "passthrough")) { cf->io_mode = CM_IO_PASSTHROUGH; + mode_ctr++; + } else if (!strcasecmp(arg, "metadata2")) cf->metadata_version = 2; @@ -2282,6 +2280,11 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, } } + if (mode_ctr > 1) { + *error = "Duplicate cache io_mode features requested"; + return -EINVAL; + } + return 0; } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index b61b069c33af..f266c81f396f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -99,7 +99,7 @@ struct crypt_iv_operations { }; struct iv_essiv_private { - struct crypto_ahash *hash_tfm; + struct crypto_shash *hash_tfm; u8 *salt; }; @@ -144,7 +144,7 @@ struct crypt_config { struct workqueue_struct *io_queue; struct workqueue_struct *crypt_queue; - wait_queue_head_t write_thread_wait; + spinlock_t write_thread_lock; struct task_struct *write_thread; struct rb_root write_tree; @@ -327,25 +327,22 @@ static int crypt_iv_plain64be_gen(struct crypt_config *cc, u8 *iv, static int crypt_iv_essiv_init(struct crypt_config *cc) { struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; - AHASH_REQUEST_ON_STACK(req, essiv->hash_tfm); - struct scatterlist sg; + SHASH_DESC_ON_STACK(desc, essiv->hash_tfm); struct crypto_cipher *essiv_tfm; int err; - sg_init_one(&sg, cc->key, cc->key_size); - ahash_request_set_tfm(req, essiv->hash_tfm); - ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); - ahash_request_set_crypt(req, &sg, essiv->salt, cc->key_size); + desc->tfm = essiv->hash_tfm; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; - err = crypto_ahash_digest(req); - ahash_request_zero(req); + err = crypto_shash_digest(desc, cc->key, cc->key_size, essiv->salt); + shash_desc_zero(desc); if (err) return err; essiv_tfm = cc->iv_private; err = crypto_cipher_setkey(essiv_tfm, essiv->salt, - crypto_ahash_digestsize(essiv->hash_tfm)); + crypto_shash_digestsize(essiv->hash_tfm)); if (err) return err; @@ -356,7 +353,7 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) static int crypt_iv_essiv_wipe(struct crypt_config *cc) { struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; - unsigned salt_size = crypto_ahash_digestsize(essiv->hash_tfm); + unsigned salt_size = crypto_shash_digestsize(essiv->hash_tfm); struct crypto_cipher *essiv_tfm; int r, err = 0; @@ -408,7 +405,7 @@ static void crypt_iv_essiv_dtr(struct crypt_config *cc) struct crypto_cipher *essiv_tfm; struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; - crypto_free_ahash(essiv->hash_tfm); + crypto_free_shash(essiv->hash_tfm); essiv->hash_tfm = NULL; kzfree(essiv->salt); @@ -426,7 +423,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, const char *opts) { struct crypto_cipher *essiv_tfm = NULL; - struct crypto_ahash *hash_tfm = NULL; + struct crypto_shash *hash_tfm = NULL; u8 *salt = NULL; int err; @@ -436,14 +433,14 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, } /* Allocate hash algorithm */ - hash_tfm = crypto_alloc_ahash(opts, 0, CRYPTO_ALG_ASYNC); + hash_tfm = crypto_alloc_shash(opts, 0, 0); if (IS_ERR(hash_tfm)) { ti->error = "Error initializing ESSIV hash"; err = PTR_ERR(hash_tfm); goto bad; } - salt = kzalloc(crypto_ahash_digestsize(hash_tfm), GFP_KERNEL); + salt = kzalloc(crypto_shash_digestsize(hash_tfm), GFP_KERNEL); if (!salt) { ti->error = "Error kmallocing salt storage in ESSIV"; err = -ENOMEM; @@ -454,7 +451,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, cc->iv_gen_private.essiv.hash_tfm = hash_tfm; essiv_tfm = alloc_essiv_cipher(cc, ti, salt, - crypto_ahash_digestsize(hash_tfm)); + crypto_shash_digestsize(hash_tfm)); if (IS_ERR(essiv_tfm)) { crypt_iv_essiv_dtr(cc); return PTR_ERR(essiv_tfm); @@ -465,7 +462,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, bad: if (hash_tfm && !IS_ERR(hash_tfm)) - crypto_free_ahash(hash_tfm); + crypto_free_shash(hash_tfm); kfree(salt); return err; } @@ -1620,36 +1617,31 @@ static int dmcrypt_write(void *data) struct rb_root write_tree; struct blk_plug plug; - DECLARE_WAITQUEUE(wait, current); - - spin_lock_irq(&cc->write_thread_wait.lock); + spin_lock_irq(&cc->write_thread_lock); continue_locked: if (!RB_EMPTY_ROOT(&cc->write_tree)) goto pop_from_list; set_current_state(TASK_INTERRUPTIBLE); - __add_wait_queue(&cc->write_thread_wait, &wait); - spin_unlock_irq(&cc->write_thread_wait.lock); + spin_unlock_irq(&cc->write_thread_lock); if (unlikely(kthread_should_stop())) { set_current_state(TASK_RUNNING); - remove_wait_queue(&cc->write_thread_wait, &wait); break; } schedule(); set_current_state(TASK_RUNNING); - spin_lock_irq(&cc->write_thread_wait.lock); - __remove_wait_queue(&cc->write_thread_wait, &wait); + spin_lock_irq(&cc->write_thread_lock); goto continue_locked; pop_from_list: write_tree = cc->write_tree; cc->write_tree = RB_ROOT; - spin_unlock_irq(&cc->write_thread_wait.lock); + spin_unlock_irq(&cc->write_thread_lock); BUG_ON(rb_parent(write_tree.rb_node)); @@ -1693,7 +1685,9 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) return; } - spin_lock_irqsave(&cc->write_thread_wait.lock, flags); + spin_lock_irqsave(&cc->write_thread_lock, flags); + if (RB_EMPTY_ROOT(&cc->write_tree)) + wake_up_process(cc->write_thread); rbp = &cc->write_tree.rb_node; parent = NULL; sector = io->sector; @@ -1706,9 +1700,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) } rb_link_node(&io->rb_node, parent, rbp); rb_insert_color(&io->rb_node, &cc->write_tree); - - wake_up_locked(&cc->write_thread_wait); - spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags); + spin_unlock_irqrestore(&cc->write_thread_lock, flags); } static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) @@ -2831,7 +2823,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - init_waitqueue_head(&cc->write_thread_wait); + spin_lock_init(&cc->write_thread_lock); cc->write_tree = RB_ROOT; cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write"); @@ -3069,11 +3061,11 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) */ limits->max_segment_size = PAGE_SIZE; - if (cc->sector_size != (1 << SECTOR_SHIFT)) { - limits->logical_block_size = cc->sector_size; - limits->physical_block_size = cc->sector_size; - blk_limits_io_min(limits, cc->sector_size); - } + limits->logical_block_size = + max_t(unsigned short, limits->logical_block_size, cc->sector_size); + limits->physical_block_size = + max_t(unsigned, limits->physical_block_size, cc->sector_size); + limits->io_min = max_t(unsigned, limits->io_min, cc->sector_size); } static struct target_type crypt_target = { diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 1783d80c9cad..2fb7bb4304ad 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -17,6 +17,13 @@ #define DM_MSG_PREFIX "delay" +struct delay_class { + struct dm_dev *dev; + sector_t start; + unsigned delay; + unsigned ops; +}; + struct delay_c { struct timer_list delay_timer; struct mutex timer_lock; @@ -25,19 +32,16 @@ struct delay_c { struct list_head delayed_bios; atomic_t may_delay; - struct dm_dev *dev_read; - sector_t start_read; - unsigned read_delay; - unsigned reads; + struct delay_class read; + struct delay_class write; + struct delay_class flush; - struct dm_dev *dev_write; - sector_t start_write; - unsigned write_delay; - unsigned writes; + int argc; }; struct dm_delay_info { struct delay_c *context; + struct delay_class *class; struct list_head list; unsigned long expires; }; @@ -77,7 +81,7 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) { struct dm_delay_info *delayed, *next; unsigned long next_expires = 0; - int start_timer = 0; + unsigned long start_timer = 0; struct bio_list flush_bios = { }; mutex_lock(&delayed_bios_lock); @@ -87,10 +91,7 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) sizeof(struct dm_delay_info)); list_del(&delayed->list); bio_list_add(&flush_bios, bio); - if ((bio_data_dir(bio) == WRITE)) - delayed->context->writes--; - else - delayed->context->reads--; + delayed->class->ops--; continue; } @@ -100,7 +101,6 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) } else next_expires = min(next_expires, delayed->expires); } - mutex_unlock(&delayed_bios_lock); if (start_timer) @@ -117,6 +117,50 @@ static void flush_expired_bios(struct work_struct *work) flush_bios(flush_delayed_bios(dc, 0)); } +static void delay_dtr(struct dm_target *ti) +{ + struct delay_c *dc = ti->private; + + destroy_workqueue(dc->kdelayd_wq); + + if (dc->read.dev) + dm_put_device(ti, dc->read.dev); + if (dc->write.dev) + dm_put_device(ti, dc->write.dev); + if (dc->flush.dev) + dm_put_device(ti, dc->flush.dev); + + mutex_destroy(&dc->timer_lock); + + kfree(dc); +} + +static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **argv) +{ + int ret; + unsigned long long tmpll; + char dummy; + + if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) { + ti->error = "Invalid device sector"; + return -EINVAL; + } + c->start = tmpll; + + if (sscanf(argv[2], "%u%c", &c->delay, &dummy) != 1) { + ti->error = "Invalid delay"; + return -EINVAL; + } + + ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev); + if (ret) { + ti->error = "Device lookup failed"; + return ret; + } + + return 0; +} + /* * Mapping parameters: * <device> <offset> <delay> [<write_device> <write_offset> <write_delay>] @@ -128,134 +172,89 @@ static void flush_expired_bios(struct work_struct *work) static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct delay_c *dc; - unsigned long long tmpll; - char dummy; int ret; - if (argc != 3 && argc != 6) { - ti->error = "Requires exactly 3 or 6 arguments"; + if (argc != 3 && argc != 6 && argc != 9) { + ti->error = "Requires exactly 3, 6 or 9 arguments"; return -EINVAL; } - dc = kmalloc(sizeof(*dc), GFP_KERNEL); + dc = kzalloc(sizeof(*dc), GFP_KERNEL); if (!dc) { ti->error = "Cannot allocate context"; return -ENOMEM; } - dc->reads = dc->writes = 0; + ti->private = dc; + timer_setup(&dc->delay_timer, handle_delayed_timer, 0); + INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); + INIT_LIST_HEAD(&dc->delayed_bios); + mutex_init(&dc->timer_lock); + atomic_set(&dc->may_delay, 1); + dc->argc = argc; - ret = -EINVAL; - if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) { - ti->error = "Invalid device sector"; + ret = delay_class_ctr(ti, &dc->read, argv); + if (ret) goto bad; - } - dc->start_read = tmpll; - if (sscanf(argv[2], "%u%c", &dc->read_delay, &dummy) != 1) { - ti->error = "Invalid delay"; - goto bad; + if (argc == 3) { + ret = delay_class_ctr(ti, &dc->write, argv); + if (ret) + goto bad; + ret = delay_class_ctr(ti, &dc->flush, argv); + if (ret) + goto bad; + goto out; } - ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), - &dc->dev_read); - if (ret) { - ti->error = "Device lookup failed"; + ret = delay_class_ctr(ti, &dc->write, argv + 3); + if (ret) goto bad; - } - - ret = -EINVAL; - dc->dev_write = NULL; - if (argc == 3) + if (argc == 6) { + ret = delay_class_ctr(ti, &dc->flush, argv + 3); + if (ret) + goto bad; goto out; - - if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) { - ti->error = "Invalid write device sector"; - goto bad_dev_read; } - dc->start_write = tmpll; - if (sscanf(argv[5], "%u%c", &dc->write_delay, &dummy) != 1) { - ti->error = "Invalid write delay"; - goto bad_dev_read; - } - - ret = dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), - &dc->dev_write); - if (ret) { - ti->error = "Write device lookup failed"; - goto bad_dev_read; - } + ret = delay_class_ctr(ti, &dc->flush, argv + 6); + if (ret) + goto bad; out: - ret = -EINVAL; dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); if (!dc->kdelayd_wq) { + ret = -EINVAL; DMERR("Couldn't start kdelayd"); - goto bad_queue; + goto bad; } - timer_setup(&dc->delay_timer, handle_delayed_timer, 0); - - INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); - INIT_LIST_HEAD(&dc->delayed_bios); - mutex_init(&dc->timer_lock); - atomic_set(&dc->may_delay, 1); - ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->per_io_data_size = sizeof(struct dm_delay_info); - ti->private = dc; return 0; -bad_queue: - if (dc->dev_write) - dm_put_device(ti, dc->dev_write); -bad_dev_read: - dm_put_device(ti, dc->dev_read); bad: - kfree(dc); + delay_dtr(ti); return ret; } -static void delay_dtr(struct dm_target *ti) -{ - struct delay_c *dc = ti->private; - - destroy_workqueue(dc->kdelayd_wq); - - dm_put_device(ti, dc->dev_read); - - if (dc->dev_write) - dm_put_device(ti, dc->dev_write); - - mutex_destroy(&dc->timer_lock); - - kfree(dc); -} - -static int delay_bio(struct delay_c *dc, int delay, struct bio *bio) +static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) { struct dm_delay_info *delayed; unsigned long expires = 0; - if (!delay || !atomic_read(&dc->may_delay)) + if (!c->delay || !atomic_read(&dc->may_delay)) return DM_MAPIO_REMAPPED; delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); delayed->context = dc; - delayed->expires = expires = jiffies + msecs_to_jiffies(delay); + delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay); mutex_lock(&delayed_bios_lock); - - if (bio_data_dir(bio) == WRITE) - dc->writes++; - else - dc->reads++; - + c->ops++; list_add_tail(&delayed->list, &dc->delayed_bios); - mutex_unlock(&delayed_bios_lock); queue_timeout(dc, expires); @@ -282,23 +281,28 @@ static void delay_resume(struct dm_target *ti) static int delay_map(struct dm_target *ti, struct bio *bio) { struct delay_c *dc = ti->private; - - if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) { - bio_set_dev(bio, dc->dev_write->bdev); - if (bio_sectors(bio)) - bio->bi_iter.bi_sector = dc->start_write + - dm_target_offset(ti, bio->bi_iter.bi_sector); - - return delay_bio(dc, dc->write_delay, bio); + struct delay_class *c; + struct dm_delay_info *delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); + + if (bio_data_dir(bio) == WRITE) { + if (unlikely(bio->bi_opf & REQ_PREFLUSH)) + c = &dc->flush; + else + c = &dc->write; + } else { + c = &dc->read; } + delayed->class = c; + bio_set_dev(bio, c->dev->bdev); + if (bio_sectors(bio)) + bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector); - bio_set_dev(bio, dc->dev_read->bdev); - bio->bi_iter.bi_sector = dc->start_read + - dm_target_offset(ti, bio->bi_iter.bi_sector); - - return delay_bio(dc, dc->read_delay, bio); + return delay_bio(dc, c, bio); } +#define DMEMIT_DELAY_CLASS(c) \ + DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay) + static void delay_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { @@ -307,17 +311,19 @@ static void delay_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: - DMEMIT("%u %u", dc->reads, dc->writes); + DMEMIT("%u %u %u", dc->read.ops, dc->write.ops, dc->flush.ops); break; case STATUSTYPE_TABLE: - DMEMIT("%s %llu %u", dc->dev_read->name, - (unsigned long long) dc->start_read, - dc->read_delay); - if (dc->dev_write) - DMEMIT(" %s %llu %u", dc->dev_write->name, - (unsigned long long) dc->start_write, - dc->write_delay); + DMEMIT_DELAY_CLASS(&dc->read); + if (dc->argc >= 6) { + DMEMIT(" "); + DMEMIT_DELAY_CLASS(&dc->write); + } + if (dc->argc >= 9) { + DMEMIT(" "); + DMEMIT_DELAY_CLASS(&dc->flush); + } break; } } @@ -328,12 +334,15 @@ static int delay_iterate_devices(struct dm_target *ti, struct delay_c *dc = ti->private; int ret = 0; - ret = fn(ti, dc->dev_read, dc->start_read, ti->len, data); + ret = fn(ti, dc->read.dev, dc->read.start, ti->len, data); + if (ret) + goto out; + ret = fn(ti, dc->write.dev, dc->write.start, ti->len, data); + if (ret) + goto out; + ret = fn(ti, dc->flush.dev, dc->flush.start, ti->len, data); if (ret) goto out; - - if (dc->dev_write) - ret = fn(ti, dc->dev_write, dc->start_write, ti->len, data); out: return ret; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 86438b2f10dd..378878599466 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -31,6 +31,8 @@ #define MIN_LOG2_INTERLEAVE_SECTORS 3 #define MAX_LOG2_INTERLEAVE_SECTORS 31 #define METADATA_WORKQUEUE_MAX_ACTIVE 16 +#define RECALC_SECTORS 8192 +#define RECALC_WRITE_SUPER 16 /* * Warning - DEBUG_PRINT prints security-sensitive data to the log, @@ -44,7 +46,8 @@ */ #define SB_MAGIC "integrt" -#define SB_VERSION 1 +#define SB_VERSION_1 1 +#define SB_VERSION_2 2 #define SB_SECTORS 8 #define MAX_SECTORS_PER_BLOCK 8 @@ -57,9 +60,12 @@ struct superblock { __u64 provided_data_sectors; /* userspace uses this value */ __u32 flags; __u8 log2_sectors_per_block; + __u8 pad[3]; + __u64 recalc_sector; }; #define SB_FLAG_HAVE_JOURNAL_MAC 0x1 +#define SB_FLAG_RECALCULATING 0x2 #define JOURNAL_ENTRY_ROUNDUP 8 @@ -139,6 +145,7 @@ struct alg_spec { struct dm_integrity_c { struct dm_dev *dev; + struct dm_dev *meta_dev; unsigned tag_size; __s8 log2_tag_size; sector_t start; @@ -170,7 +177,8 @@ struct dm_integrity_c { unsigned short journal_section_sectors; unsigned journal_sections; unsigned journal_entries; - sector_t device_sectors; + sector_t data_device_sectors; + sector_t meta_device_sectors; unsigned initial_sectors; unsigned metadata_run; __s8 log2_metadata_run; @@ -178,7 +186,7 @@ struct dm_integrity_c { __u8 sectors_per_block; unsigned char mode; - bool suspending; + int suspending; int failed; @@ -186,6 +194,7 @@ struct dm_integrity_c { /* these variables are locked with endio_wait.lock */ struct rb_root in_progress; + struct list_head wait_list; wait_queue_head_t endio_wait; struct workqueue_struct *wait_wq; @@ -210,6 +219,11 @@ struct dm_integrity_c { struct workqueue_struct *writer_wq; struct work_struct writer_work; + struct workqueue_struct *recalc_wq; + struct work_struct recalc_work; + u8 *recalc_buffer; + u8 *recalc_tags; + struct bio_list flush_bio_list; unsigned long autocommit_jiffies; @@ -233,7 +247,14 @@ struct dm_integrity_c { struct dm_integrity_range { sector_t logical_sector; unsigned n_sectors; - struct rb_node node; + bool waiting; + union { + struct rb_node node; + struct { + struct task_struct *task; + struct list_head wait_entry; + }; + }; }; struct dm_integrity_io { @@ -337,10 +358,14 @@ static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i, static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector, sector_t *area, sector_t *offset) { - __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors; - - *area = data_sector >> log2_interleave_sectors; - *offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1); + if (!ic->meta_dev) { + __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors; + *area = data_sector >> log2_interleave_sectors; + *offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1); + } else { + *area = 0; + *offset = data_sector; + } } #define sector_to_block(ic, n) \ @@ -379,6 +404,9 @@ static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector { sector_t result; + if (ic->meta_dev) + return offset; + result = area << ic->sb->log2_interleave_sectors; if (likely(ic->log2_metadata_run >= 0)) result += (area + 1) << ic->log2_metadata_run; @@ -386,6 +414,8 @@ static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector result += (area + 1) * ic->metadata_run; result += (sector_t)ic->initial_sectors + offset; + result += ic->start; + return result; } @@ -395,6 +425,14 @@ static void wraparound_section(struct dm_integrity_c *ic, unsigned *sec_ptr) *sec_ptr -= ic->journal_sections; } +static void sb_set_version(struct dm_integrity_c *ic) +{ + if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) + ic->sb->version = SB_VERSION_2; + else + ic->sb->version = SB_VERSION_1; +} + static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags) { struct dm_io_request io_req; @@ -406,7 +444,7 @@ static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags) io_req.mem.ptr.addr = ic->sb; io_req.notify.fn = NULL; io_req.client = ic->io; - io_loc.bdev = ic->dev->bdev; + io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev; io_loc.sector = ic->start; io_loc.count = SB_SECTORS; @@ -753,7 +791,7 @@ static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned io_req.notify.fn = NULL; } io_req.client = ic->io; - io_loc.bdev = ic->dev->bdev; + io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev; io_loc.sector = ic->start + SB_SECTORS + sector; io_loc.count = n_sectors; @@ -857,7 +895,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig io_req.notify.context = data; io_req.client = ic->io; io_loc.bdev = ic->dev->bdev; - io_loc.sector = ic->start + target; + io_loc.sector = target; io_loc.count = n_sectors; r = dm_io(&io_req, 1, &io_loc, NULL); @@ -867,13 +905,27 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig } } -static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range) +static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2) +{ + return range1->logical_sector < range2->logical_sector + range2->n_sectors && + range2->logical_sector + range2->n_sectors > range2->logical_sector; +} + +static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting) { struct rb_node **n = &ic->in_progress.rb_node; struct rb_node *parent; BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned)(ic->sectors_per_block - 1)); + if (likely(check_waiting)) { + struct dm_integrity_range *range; + list_for_each_entry(range, &ic->wait_list, wait_entry) { + if (unlikely(ranges_overlap(range, new_range))) + return false; + } + } + parent = NULL; while (*n) { @@ -898,7 +950,22 @@ static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range * static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range) { rb_erase(&range->node, &ic->in_progress); - wake_up_locked(&ic->endio_wait); + while (unlikely(!list_empty(&ic->wait_list))) { + struct dm_integrity_range *last_range = + list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry); + struct task_struct *last_range_task; + if (!ranges_overlap(range, last_range)) + break; + last_range_task = last_range->task; + list_del(&last_range->wait_entry); + if (!add_new_range(ic, last_range, false)) { + last_range->task = last_range_task; + list_add(&last_range->wait_entry, &ic->wait_list); + break; + } + last_range->waiting = false; + wake_up_process(last_range_task); + } } static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range) @@ -910,6 +977,19 @@ static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *r spin_unlock_irqrestore(&ic->endio_wait.lock, flags); } +static void wait_and_add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range) +{ + new_range->waiting = true; + list_add_tail(&new_range->wait_entry, &ic->wait_list); + new_range->task = current; + do { + __set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&ic->endio_wait.lock); + io_schedule(); + spin_lock_irq(&ic->endio_wait.lock); + } while (unlikely(new_range->waiting)); +} + static void init_journal_node(struct journal_node *node) { RB_CLEAR_NODE(&node->node); @@ -1599,8 +1679,12 @@ retry: dio->range.n_sectors = min(dio->range.n_sectors, ic->free_sectors << ic->sb->log2_sectors_per_block); - if (unlikely(!dio->range.n_sectors)) - goto sleep; + if (unlikely(!dio->range.n_sectors)) { + if (from_map) + goto offload_to_thread; + sleep_on_endio_wait(ic); + goto retry; + } range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block; ic->free_sectors -= range_sectors; journal_section = ic->free_section; @@ -1654,22 +1738,20 @@ retry: } } } - if (unlikely(!add_new_range(ic, &dio->range))) { + if (unlikely(!add_new_range(ic, &dio->range, true))) { /* * We must not sleep in the request routine because it could * stall bios on current->bio_list. * So, we offload the bio to a workqueue if we have to sleep. */ -sleep: if (from_map) { +offload_to_thread: spin_unlock_irq(&ic->endio_wait.lock); INIT_WORK(&dio->work, integrity_bio_wait); queue_work(ic->wait_wq, &dio->work); return; - } else { - sleep_on_endio_wait(ic); - goto retry; } + wait_and_add_new_range(ic, &dio->range); } spin_unlock_irq(&ic->endio_wait.lock); @@ -1701,14 +1783,18 @@ sleep: bio->bi_end_io = integrity_end_io; bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT; - bio->bi_iter.bi_sector += ic->start; generic_make_request(bio); if (need_sync_io) { wait_for_completion_io(&read_comp); + if (unlikely(ic->recalc_wq != NULL) && + ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && + dio->range.logical_sector + dio->range.n_sectors > le64_to_cpu(ic->sb->recalc_sector)) + goto skip_check; if (likely(!bio->bi_status)) integrity_metadata(&dio->work); else +skip_check: dec_in_flight(dio); } else { @@ -1892,8 +1978,8 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block; spin_lock_irq(&ic->endio_wait.lock); - while (unlikely(!add_new_range(ic, &io->range))) - sleep_on_endio_wait(ic); + if (unlikely(!add_new_range(ic, &io->range, true))) + wait_and_add_new_range(ic, &io->range); if (likely(!from_replay)) { struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries]; @@ -1981,7 +2067,7 @@ static void integrity_writer(struct work_struct *w) unsigned prev_free_sectors; /* the following test is not needed, but it tests the replay code */ - if (READ_ONCE(ic->suspending)) + if (READ_ONCE(ic->suspending) && !ic->meta_dev) return; spin_lock_irq(&ic->endio_wait.lock); @@ -2008,6 +2094,108 @@ static void integrity_writer(struct work_struct *w) spin_unlock_irq(&ic->endio_wait.lock); } +static void recalc_write_super(struct dm_integrity_c *ic) +{ + int r; + + dm_integrity_flush_buffers(ic); + if (dm_integrity_failed(ic)) + return; + + sb_set_version(ic); + r = sync_rw_sb(ic, REQ_OP_WRITE, 0); + if (unlikely(r)) + dm_integrity_io_error(ic, "writing superblock", r); +} + +static void integrity_recalc(struct work_struct *w) +{ + struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work); + struct dm_integrity_range range; + struct dm_io_request io_req; + struct dm_io_region io_loc; + sector_t area, offset; + sector_t metadata_block; + unsigned metadata_offset; + __u8 *t; + unsigned i; + int r; + unsigned super_counter = 0; + + spin_lock_irq(&ic->endio_wait.lock); + +next_chunk: + + if (unlikely(READ_ONCE(ic->suspending))) + goto unlock_ret; + + range.logical_sector = le64_to_cpu(ic->sb->recalc_sector); + if (unlikely(range.logical_sector >= ic->provided_data_sectors)) + goto unlock_ret; + + get_area_and_offset(ic, range.logical_sector, &area, &offset); + range.n_sectors = min((sector_t)RECALC_SECTORS, ic->provided_data_sectors - range.logical_sector); + if (!ic->meta_dev) + range.n_sectors = min(range.n_sectors, (1U << ic->sb->log2_interleave_sectors) - (unsigned)offset); + + if (unlikely(!add_new_range(ic, &range, true))) + wait_and_add_new_range(ic, &range); + + spin_unlock_irq(&ic->endio_wait.lock); + + if (unlikely(++super_counter == RECALC_WRITE_SUPER)) { + recalc_write_super(ic); + super_counter = 0; + } + + if (unlikely(dm_integrity_failed(ic))) + goto err; + + io_req.bi_op = REQ_OP_READ; + io_req.bi_op_flags = 0; + io_req.mem.type = DM_IO_VMA; + io_req.mem.ptr.addr = ic->recalc_buffer; + io_req.notify.fn = NULL; + io_req.client = ic->io; + io_loc.bdev = ic->dev->bdev; + io_loc.sector = get_data_sector(ic, area, offset); + io_loc.count = range.n_sectors; + + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) { + dm_integrity_io_error(ic, "reading data", r); + goto err; + } + + t = ic->recalc_tags; + for (i = 0; i < range.n_sectors; i += ic->sectors_per_block) { + integrity_sector_checksum(ic, range.logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t); + t += ic->tag_size; + } + + metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset); + + r = dm_integrity_rw_tag(ic, ic->recalc_tags, &metadata_block, &metadata_offset, t - ic->recalc_tags, TAG_WRITE); + if (unlikely(r)) { + dm_integrity_io_error(ic, "writing tags", r); + goto err; + } + + spin_lock_irq(&ic->endio_wait.lock); + remove_range_unlocked(ic, &range); + ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors); + goto next_chunk; + +err: + remove_range(ic, &range); + return; + +unlock_ret: + spin_unlock_irq(&ic->endio_wait.lock); + + recalc_write_super(ic); +} + static void init_journal(struct dm_integrity_c *ic, unsigned start_section, unsigned n_sections, unsigned char commit_seq) { @@ -2210,17 +2398,22 @@ static void dm_integrity_postsuspend(struct dm_target *ti) del_timer_sync(&ic->autocommit_timer); - ic->suspending = true; + WRITE_ONCE(ic->suspending, 1); + + if (ic->recalc_wq) + drain_workqueue(ic->recalc_wq); queue_work(ic->commit_wq, &ic->commit_work); drain_workqueue(ic->commit_wq); if (ic->mode == 'J') { + if (ic->meta_dev) + queue_work(ic->writer_wq, &ic->writer_work); drain_workqueue(ic->writer_wq); dm_integrity_flush_buffers(ic); } - ic->suspending = false; + WRITE_ONCE(ic->suspending, 0); BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); @@ -2232,6 +2425,16 @@ static void dm_integrity_resume(struct dm_target *ti) struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private; replay_journal(ic); + + if (ic->recalc_wq && ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { + __u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector); + if (recalc_pos < ic->provided_data_sectors) { + queue_work(ic->recalc_wq, &ic->recalc_work); + } else if (recalc_pos > ic->provided_data_sectors) { + ic->sb->recalc_sector = cpu_to_le64(ic->provided_data_sectors); + recalc_write_super(ic); + } + } } static void dm_integrity_status(struct dm_target *ti, status_type_t type, @@ -2243,7 +2446,13 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: - DMEMIT("%llu", (unsigned long long)atomic64_read(&ic->number_of_mismatches)); + DMEMIT("%llu %llu", + (unsigned long long)atomic64_read(&ic->number_of_mismatches), + (unsigned long long)ic->provided_data_sectors); + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) + DMEMIT(" %llu", (unsigned long long)le64_to_cpu(ic->sb->recalc_sector)); + else + DMEMIT(" -"); break; case STATUSTYPE_TABLE: { @@ -2251,19 +2460,25 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, watermark_percentage += ic->journal_entries / 2; do_div(watermark_percentage, ic->journal_entries); arg_count = 5; + arg_count += !!ic->meta_dev; arg_count += ic->sectors_per_block != 1; + arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)); arg_count += !!ic->internal_hash_alg.alg_string; arg_count += !!ic->journal_crypt_alg.alg_string; arg_count += !!ic->journal_mac_alg.alg_string; DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start, ic->tag_size, ic->mode, arg_count); + if (ic->meta_dev) + DMEMIT(" meta_device:%s", ic->meta_dev->name); + if (ic->sectors_per_block != 1) + DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT); + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) + DMEMIT(" recalculate"); DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage); DMEMIT(" commit_time:%u", ic->autocommit_msec); - if (ic->sectors_per_block != 1) - DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT); #define EMIT_ALG(a, n) \ do { \ @@ -2286,7 +2501,10 @@ static int dm_integrity_iterate_devices(struct dm_target *ti, { struct dm_integrity_c *ic = ti->private; - return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data); + if (!ic->meta_dev) + return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data); + else + return fn(ti, ic->dev, 0, ti->len, data); } static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -2319,26 +2537,38 @@ static void calculate_journal_section_size(struct dm_integrity_c *ic) static int calculate_device_limits(struct dm_integrity_c *ic) { __u64 initial_sectors; - sector_t last_sector, last_area, last_offset; calculate_journal_section_size(ic); initial_sectors = SB_SECTORS + (__u64)ic->journal_section_sectors * ic->journal_sections; - if (initial_sectors + METADATA_PADDING_SECTORS >= ic->device_sectors || initial_sectors > UINT_MAX) + if (initial_sectors + METADATA_PADDING_SECTORS >= ic->meta_device_sectors || initial_sectors > UINT_MAX) return -EINVAL; ic->initial_sectors = initial_sectors; - ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block), - (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT; - if (!(ic->metadata_run & (ic->metadata_run - 1))) - ic->log2_metadata_run = __ffs(ic->metadata_run); - else - ic->log2_metadata_run = -1; + if (!ic->meta_dev) { + sector_t last_sector, last_area, last_offset; - get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset); - last_sector = get_data_sector(ic, last_area, last_offset); + ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block), + (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT; + if (!(ic->metadata_run & (ic->metadata_run - 1))) + ic->log2_metadata_run = __ffs(ic->metadata_run); + else + ic->log2_metadata_run = -1; - if (ic->start + last_sector < last_sector || ic->start + last_sector >= ic->device_sectors) - return -EINVAL; + get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset); + last_sector = get_data_sector(ic, last_area, last_offset); + if (last_sector < ic->start || last_sector >= ic->meta_device_sectors) + return -EINVAL; + } else { + __u64 meta_size = ic->provided_data_sectors * ic->tag_size; + meta_size = (meta_size + ((1U << (ic->log2_buffer_sectors + SECTOR_SHIFT)) - 1)) + >> (ic->log2_buffer_sectors + SECTOR_SHIFT); + meta_size <<= ic->log2_buffer_sectors; + if (ic->initial_sectors + meta_size < ic->initial_sectors || + ic->initial_sectors + meta_size > ic->meta_device_sectors) + return -EINVAL; + ic->metadata_run = 1; + ic->log2_metadata_run = 0; + } return 0; } @@ -2350,7 +2580,6 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT); memcpy(ic->sb->magic, SB_MAGIC, 8); - ic->sb->version = SB_VERSION; ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size); ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block); if (ic->journal_mac_alg.alg_string) @@ -2360,28 +2589,55 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec journal_sections = journal_sectors / ic->journal_section_sectors; if (!journal_sections) journal_sections = 1; - ic->sb->journal_sections = cpu_to_le32(journal_sections); - if (!interleave_sectors) - interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; - ic->sb->log2_interleave_sectors = __fls(interleave_sectors); - ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); - ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); - - ic->provided_data_sectors = 0; - for (test_bit = fls64(ic->device_sectors) - 1; test_bit >= 3; test_bit--) { - __u64 prev_data_sectors = ic->provided_data_sectors; + if (!ic->meta_dev) { + ic->sb->journal_sections = cpu_to_le32(journal_sections); + if (!interleave_sectors) + interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; + ic->sb->log2_interleave_sectors = __fls(interleave_sectors); + ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); + ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); + + ic->provided_data_sectors = 0; + for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) { + __u64 prev_data_sectors = ic->provided_data_sectors; + + ic->provided_data_sectors |= (sector_t)1 << test_bit; + if (calculate_device_limits(ic)) + ic->provided_data_sectors = prev_data_sectors; + } + if (!ic->provided_data_sectors) + return -EINVAL; + } else { + ic->sb->log2_interleave_sectors = 0; + ic->provided_data_sectors = ic->data_device_sectors; + ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1); + +try_smaller_buffer: + ic->sb->journal_sections = cpu_to_le32(0); + for (test_bit = fls(journal_sections) - 1; test_bit >= 0; test_bit--) { + __u32 prev_journal_sections = le32_to_cpu(ic->sb->journal_sections); + __u32 test_journal_sections = prev_journal_sections | (1U << test_bit); + if (test_journal_sections > journal_sections) + continue; + ic->sb->journal_sections = cpu_to_le32(test_journal_sections); + if (calculate_device_limits(ic)) + ic->sb->journal_sections = cpu_to_le32(prev_journal_sections); - ic->provided_data_sectors |= (sector_t)1 << test_bit; - if (calculate_device_limits(ic)) - ic->provided_data_sectors = prev_data_sectors; + } + if (!le32_to_cpu(ic->sb->journal_sections)) { + if (ic->log2_buffer_sectors > 3) { + ic->log2_buffer_sectors--; + goto try_smaller_buffer; + } + return -EINVAL; + } } - if (!ic->provided_data_sectors) - return -EINVAL; - ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors); + sb_set_version(ic); + return 0; } @@ -2828,6 +3084,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) {0, 9, "Invalid number of feature args"}, }; unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; + bool recalculate; bool should_write_sb; __u64 threshold; unsigned long long start; @@ -2848,6 +3105,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->per_io_data_size = sizeof(struct dm_integrity_io); ic->in_progress = RB_ROOT; + INIT_LIST_HEAD(&ic->wait_list); init_waitqueue_head(&ic->endio_wait); bio_list_init(&ic->flush_bio_list); init_waitqueue_head(&ic->copy_to_journal_wait); @@ -2883,13 +3141,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - ic->device_sectors = i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT; - journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS, - ic->device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR); + journal_sectors = 0; interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; buffer_sectors = DEFAULT_BUFFER_SECTORS; journal_watermark = DEFAULT_JOURNAL_WATERMARK; sync_msec = DEFAULT_SYNC_MSEC; + recalculate = false; ic->sectors_per_block = 1; as.argc = argc - DIRECT_ARGUMENTS; @@ -2908,7 +3165,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1) - journal_sectors = val; + journal_sectors = val ? val : 1; else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1) interleave_sectors = val; else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1) @@ -2917,7 +3174,17 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) journal_watermark = val; else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1) sync_msec = val; - else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) { + else if (!memcmp(opt_string, "meta_device:", strlen("meta_device:"))) { + if (ic->meta_dev) { + dm_put_device(ti, ic->meta_dev); + ic->meta_dev = NULL; + } + r = dm_get_device(ti, strchr(opt_string, ':') + 1, dm_table_get_mode(ti->table), &ic->meta_dev); + if (r) { + ti->error = "Device lookup failed"; + goto bad; + } + } else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) { if (val < 1 << SECTOR_SHIFT || val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT || (val & (val -1))) { @@ -2941,6 +3208,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) "Invalid journal_mac argument"); if (r) goto bad; + } else if (!strcmp(opt_string, "recalculate")) { + recalculate = true; } else { r = -EINVAL; ti->error = "Invalid argument"; @@ -2948,6 +3217,21 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) } } + ic->data_device_sectors = i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT; + if (!ic->meta_dev) + ic->meta_device_sectors = ic->data_device_sectors; + else + ic->meta_device_sectors = i_size_read(ic->meta_dev->bdev->bd_inode) >> SECTOR_SHIFT; + + if (!journal_sectors) { + journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS, + ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR); + } + + if (!buffer_sectors) + buffer_sectors = 1; + ic->log2_buffer_sectors = min((int)__fls(buffer_sectors), 31 - SECTOR_SHIFT); + r = get_mac(&ic->internal_hash, &ic->internal_hash_alg, &ti->error, "Invalid internal hash", "Error setting internal hash key"); if (r) @@ -3062,7 +3346,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) should_write_sb = true; } - if (ic->sb->version != SB_VERSION) { + if (!ic->sb->version || ic->sb->version > SB_VERSION_2) { r = -EINVAL; ti->error = "Unknown version"; goto bad; @@ -3083,11 +3367,19 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } /* make sure that ti->max_io_len doesn't overflow */ - if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS || - ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) { - r = -EINVAL; - ti->error = "Invalid interleave_sectors in the superblock"; - goto bad; + if (!ic->meta_dev) { + if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS || + ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) { + r = -EINVAL; + ti->error = "Invalid interleave_sectors in the superblock"; + goto bad; + } + } else { + if (ic->sb->log2_interleave_sectors) { + r = -EINVAL; + ti->error = "Invalid interleave_sectors in the superblock"; + goto bad; + } } ic->provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors); if (ic->provided_data_sectors != le64_to_cpu(ic->sb->provided_data_sectors)) { @@ -3101,20 +3393,28 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->error = "Journal mac mismatch"; goto bad; } + +try_smaller_buffer: r = calculate_device_limits(ic); if (r) { + if (ic->meta_dev) { + if (ic->log2_buffer_sectors > 3) { + ic->log2_buffer_sectors--; + goto try_smaller_buffer; + } + } ti->error = "The device is too small"; goto bad; } + if (!ic->meta_dev) + ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run)); + if (ti->len > ic->provided_data_sectors) { r = -EINVAL; ti->error = "Not enough provided sectors for requested mapping size"; goto bad; } - if (!buffer_sectors) - buffer_sectors = 1; - ic->log2_buffer_sectors = min3((int)__fls(buffer_sectors), (int)__ffs(ic->metadata_run), 31 - SECTOR_SHIFT); threshold = (__u64)ic->journal_entries * (100 - journal_watermark); threshold += 50; @@ -3138,8 +3438,40 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) (unsigned long long)ic->provided_data_sectors); DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors); - ic->bufio = dm_bufio_client_create(ic->dev->bdev, 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), - 1, 0, NULL, NULL); + if (recalculate && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) { + ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); + ic->sb->recalc_sector = cpu_to_le64(0); + } + + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { + if (!ic->internal_hash) { + r = -EINVAL; + ti->error = "Recalculate is only valid with internal hash"; + goto bad; + } + ic->recalc_wq = alloc_workqueue("dm-intergrity-recalc", WQ_MEM_RECLAIM, 1); + if (!ic->recalc_wq ) { + ti->error = "Cannot allocate workqueue"; + r = -ENOMEM; + goto bad; + } + INIT_WORK(&ic->recalc_work, integrity_recalc); + ic->recalc_buffer = vmalloc(RECALC_SECTORS << SECTOR_SHIFT); + if (!ic->recalc_buffer) { + ti->error = "Cannot allocate buffer for recalculating"; + r = -ENOMEM; + goto bad; + } + ic->recalc_tags = kvmalloc((RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size, GFP_KERNEL); + if (!ic->recalc_tags) { + ti->error = "Cannot allocate tags for recalculating"; + r = -ENOMEM; + goto bad; + } + } + + ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, + 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL); if (IS_ERR(ic->bufio)) { r = PTR_ERR(ic->bufio); ti->error = "Cannot initialize dm-bufio"; @@ -3171,9 +3503,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ic->just_formatted = true; } - r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors); - if (r) - goto bad; + if (!ic->meta_dev) { + r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors); + if (r) + goto bad; + } if (!ic->internal_hash) dm_integrity_set(ti, ic); @@ -3192,6 +3526,7 @@ static void dm_integrity_dtr(struct dm_target *ti) struct dm_integrity_c *ic = ti->private; BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); + BUG_ON(!list_empty(&ic->wait_list)); if (ic->metadata_wq) destroy_workqueue(ic->metadata_wq); @@ -3201,6 +3536,12 @@ static void dm_integrity_dtr(struct dm_target *ti) destroy_workqueue(ic->commit_wq); if (ic->writer_wq) destroy_workqueue(ic->writer_wq); + if (ic->recalc_wq) + destroy_workqueue(ic->recalc_wq); + if (ic->recalc_buffer) + vfree(ic->recalc_buffer); + if (ic->recalc_tags) + kvfree(ic->recalc_tags); if (ic->bufio) dm_bufio_client_destroy(ic->bufio); mempool_exit(&ic->journal_io_mempool); @@ -3208,6 +3549,8 @@ static void dm_integrity_dtr(struct dm_target *ti) dm_io_client_destroy(ic->io); if (ic->dev) dm_put_device(ti, ic->dev); + if (ic->meta_dev) + dm_put_device(ti, ic->meta_dev); dm_integrity_free_page_list(ic, ic->journal); dm_integrity_free_page_list(ic, ic->journal_io); dm_integrity_free_page_list(ic, ic->journal_xor); @@ -3248,7 +3591,7 @@ static void dm_integrity_dtr(struct dm_target *ti) static struct target_type integrity_target = { .name = "integrity", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, .ctr = dm_integrity_ctr, diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 3c7547a3c371..2fc4213e02b5 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -487,6 +487,8 @@ static int run_complete_job(struct kcopyd_job *job) if (atomic_dec_and_test(&kc->nr_jobs)) wake_up(&kc->destroyq); + cond_resched(); + return 0; } @@ -741,9 +743,9 @@ static void split_job(struct kcopyd_job *master_job) } } -int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, - unsigned int num_dests, struct dm_io_region *dests, - unsigned int flags, dm_kcopyd_notify_fn fn, void *context) +void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, + unsigned int num_dests, struct dm_io_region *dests, + unsigned int flags, dm_kcopyd_notify_fn fn, void *context) { struct kcopyd_job *job; int i; @@ -818,16 +820,14 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->progress = 0; split_job(job); } - - return 0; } EXPORT_SYMBOL(dm_kcopyd_copy); -int dm_kcopyd_zero(struct dm_kcopyd_client *kc, - unsigned num_dests, struct dm_io_region *dests, - unsigned flags, dm_kcopyd_notify_fn fn, void *context) +void dm_kcopyd_zero(struct dm_kcopyd_client *kc, + unsigned num_dests, struct dm_io_region *dests, + unsigned flags, dm_kcopyd_notify_fn fn, void *context) { - return dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context); + dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context); } EXPORT_SYMBOL(dm_kcopyd_zero); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 75df4c9d8b54..cae689de75fd 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3859,7 +3859,7 @@ static int __load_dirty_region_bitmap(struct raid_set *rs) /* Try loading the bitmap unless "raid0", which does not have one */ if (!rs_is_raid0(rs) && !test_and_set_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags)) { - r = bitmap_load(&rs->md); + r = md_bitmap_load(&rs->md); if (r) DMERR("Failed to load bitmap"); } @@ -3987,8 +3987,8 @@ static int raid_preresume(struct dm_target *ti) /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) */ if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)) { - r = bitmap_resize(mddev->bitmap, mddev->dev_sectors, - to_bytes(rs->requested_bitmap_chunk_sectors), 0); + r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, + to_bytes(rs->requested_bitmap_chunk_sectors), 0); if (r) DMERR("Failed to resize bitmap"); } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 5903e492bb34..79eab1071ec2 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -326,9 +326,8 @@ static void recovery_complete(int read_err, unsigned long write_err, dm_rh_recovery_end(reg, !(read_err || write_err)); } -static int recover(struct mirror_set *ms, struct dm_region *reg) +static void recover(struct mirror_set *ms, struct dm_region *reg) { - int r; unsigned i; struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest; struct mirror *m; @@ -367,10 +366,8 @@ static int recover(struct mirror_set *ms, struct dm_region *reg) if (!errors_handled(ms)) set_bit(DM_KCOPYD_IGNORE_ERROR, &flags); - r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, - flags, recovery_complete, reg); - - return r; + dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, + flags, recovery_complete, reg); } static void reset_ms_flags(struct mirror_set *ms) @@ -388,7 +385,6 @@ static void do_recovery(struct mirror_set *ms) { struct dm_region *reg; struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); - int r; /* * Start quiescing some regions. @@ -398,11 +394,8 @@ static void do_recovery(struct mirror_set *ms) /* * Copy any already quiesced regions. */ - while ((reg = dm_rh_recovery_start(ms->rh))) { - r = recover(ms, reg); - if (r) - dm_rh_recovery_end(reg, 0); - } + while ((reg = dm_rh_recovery_start(ms->rh))) + recover(ms, reg); /* * Update the in sync flag. diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 97de7a7334d4..ae4b33d10924 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -85,7 +85,7 @@ struct dm_snapshot { * A list of pending exceptions that completed out of order. * Protected by kcopyd single-threaded callback. */ - struct list_head out_of_order_list; + struct rb_root out_of_order_tree; mempool_t pending_pool; @@ -200,7 +200,7 @@ struct dm_snap_pending_exception { /* A sequence number, it is used for in-order completion. */ sector_t exception_sequence; - struct list_head out_of_order_entry; + struct rb_node out_of_order_node; /* * For writing a complete chunk, bypassing the copy. @@ -1173,7 +1173,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) atomic_set(&s->pending_exceptions_count, 0); s->exception_start_sequence = 0; s->exception_complete_sequence = 0; - INIT_LIST_HEAD(&s->out_of_order_list); + s->out_of_order_tree = RB_ROOT; mutex_init(&s->lock); INIT_LIST_HEAD(&s->list); spin_lock_init(&s->pe_lock); @@ -1539,28 +1539,41 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) pe->copy_error = read_err || write_err; if (pe->exception_sequence == s->exception_complete_sequence) { + struct rb_node *next; + s->exception_complete_sequence++; complete_exception(pe); - while (!list_empty(&s->out_of_order_list)) { - pe = list_entry(s->out_of_order_list.next, - struct dm_snap_pending_exception, out_of_order_entry); + next = rb_first(&s->out_of_order_tree); + while (next) { + pe = rb_entry(next, struct dm_snap_pending_exception, + out_of_order_node); if (pe->exception_sequence != s->exception_complete_sequence) break; + next = rb_next(next); s->exception_complete_sequence++; - list_del(&pe->out_of_order_entry); + rb_erase(&pe->out_of_order_node, &s->out_of_order_tree); complete_exception(pe); + cond_resched(); } } else { - struct list_head *lh; + struct rb_node *parent = NULL; + struct rb_node **p = &s->out_of_order_tree.rb_node; struct dm_snap_pending_exception *pe2; - list_for_each_prev(lh, &s->out_of_order_list) { - pe2 = list_entry(lh, struct dm_snap_pending_exception, out_of_order_entry); - if (pe2->exception_sequence < pe->exception_sequence) - break; + while (*p) { + pe2 = rb_entry(*p, struct dm_snap_pending_exception, out_of_order_node); + parent = *p; + + BUG_ON(pe->exception_sequence == pe2->exception_sequence); + if (pe->exception_sequence < pe2->exception_sequence) + p = &((*p)->rb_left); + else + p = &((*p)->rb_right); } - list_add(&pe->out_of_order_entry, lh); + + rb_link_node(&pe->out_of_order_node, parent, p); + rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree); } } @@ -1694,8 +1707,6 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) if (!s->valid) return DM_MAPIO_KILL; - /* FIXME: should only take write lock if we need - * to copy an exception */ mutex_lock(&s->lock); if (!s->valid || (unlikely(s->snapshot_overflowed) && diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index b900723bbd0f..7bd60a150f8f 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1220,18 +1220,13 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool) static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m, sector_t begin, sector_t end) { - int r; struct dm_io_region to; to.bdev = tc->pool_dev->bdev; to.sector = begin; to.count = end - begin; - r = dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m); - if (r < 0) { - DMERR_LIMIT("dm_kcopyd_zero() failed"); - copy_complete(1, 1, m); - } + dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m); } static void remap_and_issue_overwrite(struct thin_c *tc, struct bio *bio, @@ -1257,7 +1252,6 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, struct dm_bio_prison_cell *cell, struct bio *bio, sector_t len) { - int r; struct pool *pool = tc->pool; struct dm_thin_new_mapping *m = get_next_mapping(pool); @@ -1296,19 +1290,8 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, to.sector = data_dest * pool->sectors_per_block; to.count = len; - r = dm_kcopyd_copy(pool->copier, &from, 1, &to, - 0, copy_complete, m); - if (r < 0) { - DMERR_LIMIT("dm_kcopyd_copy() failed"); - copy_complete(1, 1, m); - - /* - * We allow the zero to be issued, to simplify the - * error path. Otherwise we'd need to start - * worrying about decrementing the prepare_actions - * counter. - */ - } + dm_kcopyd_copy(pool->copier, &from, 1, &to, + 0, copy_complete, m); /* * Do we need to zero a tail region? @@ -2520,6 +2503,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) case PM_WRITE: if (old_mode != new_mode) notify_of_pool_mode_change(pool, "write"); + if (old_mode == PM_OUT_OF_DATA_SPACE) + cancel_delayed_work_sync(&pool->no_space_timeout); pool->out_of_data_space = false; pool->pf.error_if_no_space = pt->requested_pf.error_if_no_space; dm_pool_metadata_read_write(pool->pmd); @@ -3890,6 +3875,8 @@ static void pool_status(struct dm_target *ti, status_type_t type, else DMEMIT("- "); + DMEMIT("%llu ", (unsigned long long)calc_metadata_threshold(pt)); + break; case STATUSTYPE_TABLE: @@ -3979,7 +3966,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 19, 0}, + .version = {1, 20, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -4353,7 +4340,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 19, 0}, + .version = {1, 20, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 87107c995cb5..3a28a68f184c 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -457,7 +457,7 @@ static void ssd_commit_flushed(struct dm_writecache *wc) COMPLETION_INITIALIZER_ONSTACK(endio.c), ATOMIC_INIT(1), }; - unsigned bitmap_bits = wc->dirty_bitmap_size * BITS_PER_LONG; + unsigned bitmap_bits = wc->dirty_bitmap_size * 8; unsigned i = 0; while (1) { @@ -2240,6 +2240,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type, DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's', wc->dev->name, wc->ssd_dev->name, wc->block_size); extra_args = 0; + if (wc->start_sector) + extra_args += 2; if (wc->high_wm_percent_set) extra_args += 2; if (wc->low_wm_percent_set) @@ -2254,6 +2256,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type, extra_args++; DMEMIT("%u", extra_args); + if (wc->start_sector) + DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector); if (wc->high_wm_percent_set) { x = (uint64_t)wc->freelist_high_watermark * 100; x += wc->n_blocks / 2; @@ -2280,7 +2284,7 @@ static void writecache_status(struct dm_target *ti, status_type_t type, static struct target_type writecache_target = { .name = "writecache", - .version = {1, 1, 0}, + .version = {1, 1, 1}, .module = THIS_MODULE, .ctr = writecache_ctr, .dtr = writecache_dtr, diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index 44a119e12f1a..edf4b95eb075 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -161,10 +161,8 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc, /* Copy the valid region */ set_bit(DMZ_RECLAIM_KCOPY, &zrc->flags); - ret = dm_kcopyd_copy(zrc->kc, &src, 1, &dst, flags, - dmz_reclaim_kcopy_end, zrc); - if (ret) - return ret; + dm_kcopyd_copy(zrc->kc, &src, 1, &dst, flags, + dmz_reclaim_kcopy_end, zrc); /* Wait for copy to complete */ wait_on_bit_io(&zrc->flags, DMZ_RECLAIM_KCOPY, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b0dd7027848b..20f7e4ef5342 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -609,7 +609,8 @@ static void start_io_acct(struct dm_io *io) io->start_time = jiffies; - generic_start_io_acct(md->queue, rw, bio_sectors(bio), &dm_disk(md)->part0); + generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio), + &dm_disk(md)->part0); atomic_set(&dm_disk(md)->part0.in_flight[rw], atomic_inc_return(&md->pending[rw])); @@ -628,7 +629,8 @@ static void end_io_acct(struct dm_io *io) int pending; int rw = bio_data_dir(bio); - generic_end_io_acct(md->queue, rw, &dm_disk(md)->part0, io->start_time); + generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0, + io->start_time); if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index f983c3fdf204..2fc8c113977f 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -46,8 +46,8 @@ static inline char *bmname(struct bitmap *bitmap) * if we find our page, we increment the page's refcount so that it stays * allocated while we're using it */ -static int bitmap_checkpage(struct bitmap_counts *bitmap, - unsigned long page, int create, int no_hijack) +static int md_bitmap_checkpage(struct bitmap_counts *bitmap, + unsigned long page, int create, int no_hijack) __releases(bitmap->lock) __acquires(bitmap->lock) { @@ -115,7 +115,7 @@ __acquires(bitmap->lock) /* if page is completely empty, put it back on the free list, or dealloc it */ /* if page was hijacked, unmark the flag so it might get alloced next time */ /* Note: lock should be held when calling this */ -static void bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page) +static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page) { char *ptr; @@ -280,7 +280,7 @@ restart: return -EINVAL; } -static void bitmap_file_kick(struct bitmap *bitmap); +static void md_bitmap_file_kick(struct bitmap *bitmap); /* * write out a page to a file */ @@ -310,7 +310,7 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait) atomic_read(&bitmap->pending_writes)==0); } if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) - bitmap_file_kick(bitmap); + md_bitmap_file_kick(bitmap); } static void end_bitmap_write(struct buffer_head *bh, int uptodate) @@ -421,11 +421,11 @@ out: */ /* - * bitmap_wait_writes() should be called before writing any bitmap + * md_bitmap_wait_writes() should be called before writing any bitmap * blocks, to ensure previous writes, particularly from - * bitmap_daemon_work(), have completed. + * md_bitmap_daemon_work(), have completed. */ -static void bitmap_wait_writes(struct bitmap *bitmap) +static void md_bitmap_wait_writes(struct bitmap *bitmap) { if (bitmap->storage.file) wait_event(bitmap->write_wait, @@ -443,7 +443,7 @@ static void bitmap_wait_writes(struct bitmap *bitmap) /* update the event counter and sync the superblock to disk */ -void bitmap_update_sb(struct bitmap *bitmap) +void md_bitmap_update_sb(struct bitmap *bitmap) { bitmap_super_t *sb; @@ -476,10 +476,10 @@ void bitmap_update_sb(struct bitmap *bitmap) kunmap_atomic(sb); write_page(bitmap, bitmap->storage.sb_page, 1); } -EXPORT_SYMBOL(bitmap_update_sb); +EXPORT_SYMBOL(md_bitmap_update_sb); /* print out the bitmap file superblock */ -void bitmap_print_sb(struct bitmap *bitmap) +void md_bitmap_print_sb(struct bitmap *bitmap) { bitmap_super_t *sb; @@ -518,7 +518,7 @@ void bitmap_print_sb(struct bitmap *bitmap) * * Returns: 0 on success, -Exxx on error */ -static int bitmap_new_disk_sb(struct bitmap *bitmap) +static int md_bitmap_new_disk_sb(struct bitmap *bitmap) { bitmap_super_t *sb; unsigned long chunksize, daemon_sleep, write_behind; @@ -577,7 +577,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) } /* read the superblock from the bitmap file and initialize some bitmap fields */ -static int bitmap_read_sb(struct bitmap *bitmap) +static int md_bitmap_read_sb(struct bitmap *bitmap) { char *reason = NULL; bitmap_super_t *sb; @@ -727,7 +727,7 @@ out_no_sb: bitmap->mddev->bitmap_info.space > sectors_reserved) bitmap->mddev->bitmap_info.space = sectors_reserved; if (err) { - bitmap_print_sb(bitmap); + md_bitmap_print_sb(bitmap); if (bitmap->cluster_slot < 0) md_cluster_stop(bitmap->mddev); } @@ -774,9 +774,9 @@ static inline struct page *filemap_get_page(struct bitmap_storage *store, return store->filemap[file_page_index(store, chunk)]; } -static int bitmap_storage_alloc(struct bitmap_storage *store, - unsigned long chunks, int with_super, - int slot_number) +static int md_bitmap_storage_alloc(struct bitmap_storage *store, + unsigned long chunks, int with_super, + int slot_number) { int pnum, offset = 0; unsigned long num_pages; @@ -830,7 +830,7 @@ static int bitmap_storage_alloc(struct bitmap_storage *store, return 0; } -static void bitmap_file_unmap(struct bitmap_storage *store) +static void md_bitmap_file_unmap(struct bitmap_storage *store) { struct page **map, *sb_page; int pages; @@ -862,12 +862,12 @@ static void bitmap_file_unmap(struct bitmap_storage *store) * then it is no longer reliable, so we stop using it and we mark the file * as failed in the superblock */ -static void bitmap_file_kick(struct bitmap *bitmap) +static void md_bitmap_file_kick(struct bitmap *bitmap) { char *path, *ptr = NULL; if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) { - bitmap_update_sb(bitmap); + md_bitmap_update_sb(bitmap); if (bitmap->storage.file) { path = kmalloc(PAGE_SIZE, GFP_KERNEL); @@ -923,7 +923,7 @@ static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum, * we set the bit immediately, then we record the page number so that * when an unplug occurs, we can flush the dirty pages out to disk */ -static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) +static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) { unsigned long bit; struct page *page; @@ -952,7 +952,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_DIRTY); } -static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) +static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) { unsigned long bit; struct page *page; @@ -980,7 +980,7 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) } } -static int bitmap_file_test_bit(struct bitmap *bitmap, sector_t block) +static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block) { unsigned long bit; struct page *page; @@ -1005,7 +1005,7 @@ static int bitmap_file_test_bit(struct bitmap *bitmap, sector_t block) /* this gets called when the md device is ready to unplug its underlying * (slave) device queues -- before we let any writes go down, we need to * sync the dirty pages of the bitmap file to disk */ -void bitmap_unplug(struct bitmap *bitmap) +void md_bitmap_unplug(struct bitmap *bitmap) { unsigned long i; int dirty, need_write; @@ -1025,7 +1025,7 @@ void bitmap_unplug(struct bitmap *bitmap) BITMAP_PAGE_NEEDWRITE); if (dirty || need_write) { if (!writing) { - bitmap_wait_writes(bitmap); + md_bitmap_wait_writes(bitmap); if (bitmap->mddev->queue) blk_add_trace_msg(bitmap->mddev->queue, "md bitmap_unplug"); @@ -1036,14 +1036,14 @@ void bitmap_unplug(struct bitmap *bitmap) } } if (writing) - bitmap_wait_writes(bitmap); + md_bitmap_wait_writes(bitmap); if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) - bitmap_file_kick(bitmap); + md_bitmap_file_kick(bitmap); } -EXPORT_SYMBOL(bitmap_unplug); +EXPORT_SYMBOL(md_bitmap_unplug); -static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); +static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); /* * bitmap_init_from_disk -- called at bitmap_create time to initialize * the in-memory bitmap from the on-disk bitmap -- also, sets up the * memory mapping of the bitmap file @@ -1055,7 +1055,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n * We ignore all bits for sectors that end earlier than 'start'. * This is used when reading an out-of-date bitmap... */ -static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) +static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) { unsigned long i, chunks, index, oldindex, bit, node_offset = 0; struct page *page = NULL; @@ -1078,9 +1078,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) /* if the disk bit is set, set the memory bit */ int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift) >= start); - bitmap_set_memory_bits(bitmap, - (sector_t)i << bitmap->counts.chunkshift, - needed); + md_bitmap_set_memory_bits(bitmap, + (sector_t)i << bitmap->counts.chunkshift, + needed); } return 0; } @@ -1159,9 +1159,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) /* if the disk bit is set, set the memory bit */ int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift >= start); - bitmap_set_memory_bits(bitmap, - (sector_t)i << bitmap->counts.chunkshift, - needed); + md_bitmap_set_memory_bits(bitmap, + (sector_t)i << bitmap->counts.chunkshift, + needed); bit_cnt++; } offset = 0; @@ -1179,7 +1179,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) return ret; } -void bitmap_write_all(struct bitmap *bitmap) +void md_bitmap_write_all(struct bitmap *bitmap) { /* We don't actually write all bitmap blocks here, * just flag them as needing to be written @@ -1198,16 +1198,16 @@ void bitmap_write_all(struct bitmap *bitmap) bitmap->allclean = 0; } -static void bitmap_count_page(struct bitmap_counts *bitmap, - sector_t offset, int inc) +static void md_bitmap_count_page(struct bitmap_counts *bitmap, + sector_t offset, int inc) { sector_t chunk = offset >> bitmap->chunkshift; unsigned long page = chunk >> PAGE_COUNTER_SHIFT; bitmap->bp[page].count += inc; - bitmap_checkfree(bitmap, page); + md_bitmap_checkfree(bitmap, page); } -static void bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset) +static void md_bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset) { sector_t chunk = offset >> bitmap->chunkshift; unsigned long page = chunk >> PAGE_COUNTER_SHIFT; @@ -1217,16 +1217,16 @@ static void bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset) bp->pending = 1; } -static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap, - sector_t offset, sector_t *blocks, - int create); +static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, + sector_t offset, sector_t *blocks, + int create); /* * bitmap daemon -- periodically wakes up to clean bits and flush pages * out to disk */ -void bitmap_daemon_work(struct mddev *mddev) +void md_bitmap_daemon_work(struct mddev *mddev) { struct bitmap *bitmap; unsigned long j; @@ -1301,10 +1301,8 @@ void bitmap_daemon_work(struct mddev *mddev) } counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0; } - bmc = bitmap_get_counter(counts, - block, - &blocks, 0); + bmc = md_bitmap_get_counter(counts, block, &blocks, 0); if (!bmc) { j |= PAGE_COUNTER_MASK; continue; @@ -1312,17 +1310,17 @@ void bitmap_daemon_work(struct mddev *mddev) if (*bmc == 1 && !bitmap->need_sync) { /* We can clear the bit */ *bmc = 0; - bitmap_count_page(counts, block, -1); - bitmap_file_clear_bit(bitmap, block); + md_bitmap_count_page(counts, block, -1); + md_bitmap_file_clear_bit(bitmap, block); } else if (*bmc && *bmc <= 2) { *bmc = 1; - bitmap_set_pending(counts, block); + md_bitmap_set_pending(counts, block); bitmap->allclean = 0; } } spin_unlock_irq(&counts->lock); - bitmap_wait_writes(bitmap); + md_bitmap_wait_writes(bitmap); /* Now start writeout on any page in NEEDWRITE that isn't DIRTY. * DIRTY pages need to be written by bitmap_unplug so it can wait * for them. @@ -1352,9 +1350,9 @@ void bitmap_daemon_work(struct mddev *mddev) mutex_unlock(&mddev->bitmap_info.mutex); } -static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap, - sector_t offset, sector_t *blocks, - int create) +static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, + sector_t offset, sector_t *blocks, + int create) __releases(bitmap->lock) __acquires(bitmap->lock) { @@ -1368,7 +1366,7 @@ __acquires(bitmap->lock) sector_t csize; int err; - err = bitmap_checkpage(bitmap, page, create, 0); + err = md_bitmap_checkpage(bitmap, page, create, 0); if (bitmap->bp[page].hijacked || bitmap->bp[page].map == NULL) @@ -1394,7 +1392,7 @@ __acquires(bitmap->lock) &(bitmap->bp[page].map[pageoff]); } -int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) +int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) { if (!bitmap) return 0; @@ -1415,7 +1413,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect bitmap_counter_t *bmc; spin_lock_irq(&bitmap->counts.lock); - bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 1); + bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 1); if (!bmc) { spin_unlock_irq(&bitmap->counts.lock); return 0; @@ -1437,8 +1435,8 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect switch (*bmc) { case 0: - bitmap_file_set_bit(bitmap, offset); - bitmap_count_page(&bitmap->counts, offset, 1); + md_bitmap_file_set_bit(bitmap, offset); + md_bitmap_count_page(&bitmap->counts, offset, 1); /* fall through */ case 1: *bmc = 2; @@ -1456,10 +1454,10 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect } return 0; } -EXPORT_SYMBOL(bitmap_startwrite); +EXPORT_SYMBOL(md_bitmap_startwrite); -void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, - int success, int behind) +void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset, + unsigned long sectors, int success, int behind) { if (!bitmap) return; @@ -1477,7 +1475,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto bitmap_counter_t *bmc; spin_lock_irqsave(&bitmap->counts.lock, flags); - bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 0); + bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 0); if (!bmc) { spin_unlock_irqrestore(&bitmap->counts.lock, flags); return; @@ -1498,7 +1496,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto (*bmc)--; if (*bmc <= 2) { - bitmap_set_pending(&bitmap->counts, offset); + md_bitmap_set_pending(&bitmap->counts, offset); bitmap->allclean = 0; } spin_unlock_irqrestore(&bitmap->counts.lock, flags); @@ -1509,7 +1507,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto sectors = 0; } } -EXPORT_SYMBOL(bitmap_endwrite); +EXPORT_SYMBOL(md_bitmap_endwrite); static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded) @@ -1521,7 +1519,7 @@ static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t return 1; /* always resync if no bitmap */ } spin_lock_irq(&bitmap->counts.lock); - bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0); + bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); rv = 0; if (bmc) { /* locked */ @@ -1539,8 +1537,8 @@ static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t return rv; } -int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, - int degraded) +int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, + int degraded) { /* bitmap_start_sync must always report on multiples of whole * pages, otherwise resync (which is very PAGE_SIZE based) will @@ -1561,9 +1559,9 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, } return rv; } -EXPORT_SYMBOL(bitmap_start_sync); +EXPORT_SYMBOL(md_bitmap_start_sync); -void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted) +void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted) { bitmap_counter_t *bmc; unsigned long flags; @@ -1573,7 +1571,7 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, i return; } spin_lock_irqsave(&bitmap->counts.lock, flags); - bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0); + bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); if (bmc == NULL) goto unlock; /* locked */ @@ -1584,7 +1582,7 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, i *bmc |= NEEDED_MASK; else { if (*bmc <= 2) { - bitmap_set_pending(&bitmap->counts, offset); + md_bitmap_set_pending(&bitmap->counts, offset); bitmap->allclean = 0; } } @@ -1592,9 +1590,9 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, i unlock: spin_unlock_irqrestore(&bitmap->counts.lock, flags); } -EXPORT_SYMBOL(bitmap_end_sync); +EXPORT_SYMBOL(md_bitmap_end_sync); -void bitmap_close_sync(struct bitmap *bitmap) +void md_bitmap_close_sync(struct bitmap *bitmap) { /* Sync has finished, and any bitmap chunks that weren't synced * properly have been aborted. It remains to us to clear the @@ -1605,13 +1603,13 @@ void bitmap_close_sync(struct bitmap *bitmap) if (!bitmap) return; while (sector < bitmap->mddev->resync_max_sectors) { - bitmap_end_sync(bitmap, sector, &blocks, 0); + md_bitmap_end_sync(bitmap, sector, &blocks, 0); sector += blocks; } } -EXPORT_SYMBOL(bitmap_close_sync); +EXPORT_SYMBOL(md_bitmap_close_sync); -void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force) +void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force) { sector_t s = 0; sector_t blocks; @@ -1633,15 +1631,15 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force) sector &= ~((1ULL << bitmap->counts.chunkshift) - 1); s = 0; while (s < sector && s < bitmap->mddev->resync_max_sectors) { - bitmap_end_sync(bitmap, s, &blocks, 0); + md_bitmap_end_sync(bitmap, s, &blocks, 0); s += blocks; } bitmap->last_end_sync = jiffies; sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed"); } -EXPORT_SYMBOL(bitmap_cond_end_sync); +EXPORT_SYMBOL(md_bitmap_cond_end_sync); -void bitmap_sync_with_cluster(struct mddev *mddev, +void md_bitmap_sync_with_cluster(struct mddev *mddev, sector_t old_lo, sector_t old_hi, sector_t new_lo, sector_t new_hi) { @@ -1649,20 +1647,20 @@ void bitmap_sync_with_cluster(struct mddev *mddev, sector_t sector, blocks = 0; for (sector = old_lo; sector < new_lo; ) { - bitmap_end_sync(bitmap, sector, &blocks, 0); + md_bitmap_end_sync(bitmap, sector, &blocks, 0); sector += blocks; } WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n"); for (sector = old_hi; sector < new_hi; ) { - bitmap_start_sync(bitmap, sector, &blocks, 0); + md_bitmap_start_sync(bitmap, sector, &blocks, 0); sector += blocks; } WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n"); } -EXPORT_SYMBOL(bitmap_sync_with_cluster); +EXPORT_SYMBOL(md_bitmap_sync_with_cluster); -static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) +static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) { /* For each chunk covered by any of these sectors, set the * counter to 2 and possibly set resync_needed. They should all @@ -1672,15 +1670,15 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n sector_t secs; bitmap_counter_t *bmc; spin_lock_irq(&bitmap->counts.lock); - bmc = bitmap_get_counter(&bitmap->counts, offset, &secs, 1); + bmc = md_bitmap_get_counter(&bitmap->counts, offset, &secs, 1); if (!bmc) { spin_unlock_irq(&bitmap->counts.lock); return; } if (!*bmc) { *bmc = 2; - bitmap_count_page(&bitmap->counts, offset, 1); - bitmap_set_pending(&bitmap->counts, offset); + md_bitmap_count_page(&bitmap->counts, offset, 1); + md_bitmap_set_pending(&bitmap->counts, offset); bitmap->allclean = 0; } if (needed) @@ -1689,14 +1687,14 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n } /* dirty the memory and file bits for bitmap chunks "s" to "e" */ -void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) +void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) { unsigned long chunk; for (chunk = s; chunk <= e; chunk++) { sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift; - bitmap_set_memory_bits(bitmap, sec, 1); - bitmap_file_set_bit(bitmap, sec); + md_bitmap_set_memory_bits(bitmap, sec, 1); + md_bitmap_file_set_bit(bitmap, sec); if (sec < bitmap->mddev->recovery_cp) /* We are asserting that the array is dirty, * so move the recovery_cp address back so @@ -1709,7 +1707,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) /* * flush out any pending updates */ -void bitmap_flush(struct mddev *mddev) +void md_bitmap_flush(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; long sleep; @@ -1722,18 +1720,18 @@ void bitmap_flush(struct mddev *mddev) */ sleep = mddev->bitmap_info.daemon_sleep * 2; bitmap->daemon_lastrun -= sleep; - bitmap_daemon_work(mddev); + md_bitmap_daemon_work(mddev); bitmap->daemon_lastrun -= sleep; - bitmap_daemon_work(mddev); + md_bitmap_daemon_work(mddev); bitmap->daemon_lastrun -= sleep; - bitmap_daemon_work(mddev); - bitmap_update_sb(bitmap); + md_bitmap_daemon_work(mddev); + md_bitmap_update_sb(bitmap); } /* * free memory that was allocated */ -void bitmap_free(struct bitmap *bitmap) +void md_bitmap_free(struct bitmap *bitmap) { unsigned long k, pages; struct bitmap_page *bp; @@ -1753,7 +1751,7 @@ void bitmap_free(struct bitmap *bitmap) atomic_read(&bitmap->pending_writes) == 0); /* release the bitmap file */ - bitmap_file_unmap(&bitmap->storage); + md_bitmap_file_unmap(&bitmap->storage); bp = bitmap->counts.bp; pages = bitmap->counts.pages; @@ -1767,9 +1765,9 @@ void bitmap_free(struct bitmap *bitmap) kfree(bp); kfree(bitmap); } -EXPORT_SYMBOL(bitmap_free); +EXPORT_SYMBOL(md_bitmap_free); -void bitmap_wait_behind_writes(struct mddev *mddev) +void md_bitmap_wait_behind_writes(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; @@ -1783,14 +1781,14 @@ void bitmap_wait_behind_writes(struct mddev *mddev) } } -void bitmap_destroy(struct mddev *mddev) +void md_bitmap_destroy(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; if (!bitmap) /* there was no bitmap */ return; - bitmap_wait_behind_writes(mddev); + md_bitmap_wait_behind_writes(mddev); mutex_lock(&mddev->bitmap_info.mutex); spin_lock(&mddev->lock); @@ -1800,7 +1798,7 @@ void bitmap_destroy(struct mddev *mddev) if (mddev->thread) mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; - bitmap_free(bitmap); + md_bitmap_free(bitmap); } /* @@ -1808,7 +1806,7 @@ void bitmap_destroy(struct mddev *mddev) * if this returns an error, bitmap_destroy must be called to do clean up * once mddev->bitmap is set */ -struct bitmap *bitmap_create(struct mddev *mddev, int slot) +struct bitmap *md_bitmap_create(struct mddev *mddev, int slot) { struct bitmap *bitmap; sector_t blocks = mddev->resync_max_sectors; @@ -1863,9 +1861,9 @@ struct bitmap *bitmap_create(struct mddev *mddev, int slot) * instructing us to create a new on-disk bitmap instance. */ if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags)) - err = bitmap_new_disk_sb(bitmap); + err = md_bitmap_new_disk_sb(bitmap); else - err = bitmap_read_sb(bitmap); + err = md_bitmap_read_sb(bitmap); } else { err = 0; if (mddev->bitmap_info.chunksize == 0 || @@ -1878,7 +1876,7 @@ struct bitmap *bitmap_create(struct mddev *mddev, int slot) goto error; bitmap->daemon_lastrun = jiffies; - err = bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1); + err = md_bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1); if (err) goto error; @@ -1891,11 +1889,11 @@ struct bitmap *bitmap_create(struct mddev *mddev, int slot) return bitmap; error: - bitmap_free(bitmap); + md_bitmap_free(bitmap); return ERR_PTR(err); } -int bitmap_load(struct mddev *mddev) +int md_bitmap_load(struct mddev *mddev) { int err = 0; sector_t start = 0; @@ -1915,10 +1913,10 @@ int bitmap_load(struct mddev *mddev) */ while (sector < mddev->resync_max_sectors) { sector_t blocks; - bitmap_start_sync(bitmap, sector, &blocks, 0); + md_bitmap_start_sync(bitmap, sector, &blocks, 0); sector += blocks; } - bitmap_close_sync(bitmap); + md_bitmap_close_sync(bitmap); if (mddev->degraded == 0 || bitmap->events_cleared == mddev->events) @@ -1927,7 +1925,7 @@ int bitmap_load(struct mddev *mddev) start = mddev->recovery_cp; mutex_lock(&mddev->bitmap_info.mutex); - err = bitmap_init_from_disk(bitmap, start); + err = md_bitmap_init_from_disk(bitmap, start); mutex_unlock(&mddev->bitmap_info.mutex); if (err) @@ -1940,29 +1938,29 @@ int bitmap_load(struct mddev *mddev) mddev->thread->timeout = mddev->bitmap_info.daemon_sleep; md_wakeup_thread(mddev->thread); - bitmap_update_sb(bitmap); + md_bitmap_update_sb(bitmap); if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) err = -EIO; out: return err; } -EXPORT_SYMBOL_GPL(bitmap_load); +EXPORT_SYMBOL_GPL(md_bitmap_load); struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot) { int rv = 0; struct bitmap *bitmap; - bitmap = bitmap_create(mddev, slot); + bitmap = md_bitmap_create(mddev, slot); if (IS_ERR(bitmap)) { rv = PTR_ERR(bitmap); return ERR_PTR(rv); } - rv = bitmap_init_from_disk(bitmap, 0); + rv = md_bitmap_init_from_disk(bitmap, 0); if (rv) { - bitmap_free(bitmap); + md_bitmap_free(bitmap); return ERR_PTR(rv); } @@ -1973,7 +1971,7 @@ EXPORT_SYMBOL(get_bitmap_from_slot); /* Loads the bitmap associated with slot and copies the resync information * to our bitmap */ -int bitmap_copy_from_slot(struct mddev *mddev, int slot, +int md_bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *low, sector_t *high, bool clear_bits) { int rv = 0, i, j; @@ -1990,35 +1988,35 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot, counts = &bitmap->counts; for (j = 0; j < counts->chunks; j++) { block = (sector_t)j << counts->chunkshift; - if (bitmap_file_test_bit(bitmap, block)) { + if (md_bitmap_file_test_bit(bitmap, block)) { if (!lo) lo = block; hi = block; - bitmap_file_clear_bit(bitmap, block); - bitmap_set_memory_bits(mddev->bitmap, block, 1); - bitmap_file_set_bit(mddev->bitmap, block); + md_bitmap_file_clear_bit(bitmap, block); + md_bitmap_set_memory_bits(mddev->bitmap, block, 1); + md_bitmap_file_set_bit(mddev->bitmap, block); } } if (clear_bits) { - bitmap_update_sb(bitmap); + md_bitmap_update_sb(bitmap); /* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */ for (i = 0; i < bitmap->storage.file_pages; i++) if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING)) set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); - bitmap_unplug(bitmap); + md_bitmap_unplug(bitmap); } - bitmap_unplug(mddev->bitmap); + md_bitmap_unplug(mddev->bitmap); *low = lo; *high = hi; return rv; } -EXPORT_SYMBOL_GPL(bitmap_copy_from_slot); +EXPORT_SYMBOL_GPL(md_bitmap_copy_from_slot); -void bitmap_status(struct seq_file *seq, struct bitmap *bitmap) +void md_bitmap_status(struct seq_file *seq, struct bitmap *bitmap) { unsigned long chunk_kb; struct bitmap_counts *counts; @@ -2045,7 +2043,7 @@ void bitmap_status(struct seq_file *seq, struct bitmap *bitmap) seq_printf(seq, "\n"); } -int bitmap_resize(struct bitmap *bitmap, sector_t blocks, +int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, int chunksize, int init) { /* If chunk_size is 0, choose an appropriate chunk size. @@ -2106,12 +2104,12 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); memset(&store, 0, sizeof(store)); if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) - ret = bitmap_storage_alloc(&store, chunks, - !bitmap->mddev->bitmap_info.external, - mddev_is_clustered(bitmap->mddev) - ? bitmap->cluster_slot : 0); + ret = md_bitmap_storage_alloc(&store, chunks, + !bitmap->mddev->bitmap_info.external, + mddev_is_clustered(bitmap->mddev) + ? bitmap->cluster_slot : 0); if (ret) { - bitmap_file_unmap(&store); + md_bitmap_file_unmap(&store); goto err; } @@ -2120,7 +2118,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, new_bp = kcalloc(pages, sizeof(*new_bp), GFP_KERNEL); ret = -ENOMEM; if (!new_bp) { - bitmap_file_unmap(&store); + md_bitmap_file_unmap(&store); goto err; } @@ -2134,7 +2132,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, memcpy(page_address(store.sb_page), page_address(bitmap->storage.sb_page), sizeof(bitmap_super_t)); - bitmap_file_unmap(&bitmap->storage); + md_bitmap_file_unmap(&bitmap->storage); bitmap->storage = store; old_counts = bitmap->counts; @@ -2154,7 +2152,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, if (mddev_is_clustered(bitmap->mddev)) { unsigned long page; for (page = 0; page < pages; page++) { - ret = bitmap_checkpage(&bitmap->counts, page, 1, 1); + ret = md_bitmap_checkpage(&bitmap->counts, page, 1, 1); if (ret) { unsigned long k; @@ -2184,27 +2182,23 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, bitmap_counter_t *bmc_old, *bmc_new; int set; - bmc_old = bitmap_get_counter(&old_counts, block, - &old_blocks, 0); + bmc_old = md_bitmap_get_counter(&old_counts, block, &old_blocks, 0); set = bmc_old && NEEDED(*bmc_old); if (set) { - bmc_new = bitmap_get_counter(&bitmap->counts, block, - &new_blocks, 1); + bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); if (*bmc_new == 0) { /* need to set on-disk bits too. */ sector_t end = block + new_blocks; sector_t start = block >> chunkshift; start <<= chunkshift; while (start < end) { - bitmap_file_set_bit(bitmap, block); + md_bitmap_file_set_bit(bitmap, block); start += 1 << chunkshift; } *bmc_new = 2; - bitmap_count_page(&bitmap->counts, - block, 1); - bitmap_set_pending(&bitmap->counts, - block); + md_bitmap_count_page(&bitmap->counts, block, 1); + md_bitmap_set_pending(&bitmap->counts, block); } *bmc_new |= NEEDED_MASK; if (new_blocks < old_blocks) @@ -2225,18 +2219,15 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, int i; while (block < (chunks << chunkshift)) { bitmap_counter_t *bmc; - bmc = bitmap_get_counter(&bitmap->counts, block, - &new_blocks, 1); + bmc = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); if (bmc) { /* new space. It needs to be resynced, so * we set NEEDED_MASK. */ if (*bmc == 0) { *bmc = NEEDED_MASK | 2; - bitmap_count_page(&bitmap->counts, - block, 1); - bitmap_set_pending(&bitmap->counts, - block); + md_bitmap_count_page(&bitmap->counts, block, 1); + md_bitmap_set_pending(&bitmap->counts, block); } } block += new_blocks; @@ -2247,14 +2238,14 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, spin_unlock_irq(&bitmap->counts.lock); if (!init) { - bitmap_unplug(bitmap); + md_bitmap_unplug(bitmap); bitmap->mddev->pers->quiesce(bitmap->mddev, 0); } ret = 0; err: return ret; } -EXPORT_SYMBOL_GPL(bitmap_resize); +EXPORT_SYMBOL_GPL(md_bitmap_resize); static ssize_t location_show(struct mddev *mddev, char *page) @@ -2298,7 +2289,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len) } if (mddev->pers) { mddev->pers->quiesce(mddev, 1); - bitmap_destroy(mddev); + md_bitmap_destroy(mddev); mddev->pers->quiesce(mddev, 0); } mddev->bitmap_info.offset = 0; @@ -2337,18 +2328,18 @@ location_store(struct mddev *mddev, const char *buf, size_t len) if (mddev->pers) { struct bitmap *bitmap; mddev->pers->quiesce(mddev, 1); - bitmap = bitmap_create(mddev, -1); + bitmap = md_bitmap_create(mddev, -1); if (IS_ERR(bitmap)) rv = PTR_ERR(bitmap); else { mddev->bitmap = bitmap; - rv = bitmap_load(mddev); + rv = md_bitmap_load(mddev); if (rv) mddev->bitmap_info.offset = 0; } mddev->pers->quiesce(mddev, 0); if (rv) { - bitmap_destroy(mddev); + md_bitmap_destroy(mddev); goto out; } } diff --git a/drivers/md/md-bitmap.h b/drivers/md/md-bitmap.h index 5df35ca90f58..cfd7395de8fd 100644 --- a/drivers/md/md-bitmap.h +++ b/drivers/md/md-bitmap.h @@ -236,43 +236,43 @@ struct bitmap { /* the bitmap API */ /* these are used only by md/bitmap */ -struct bitmap *bitmap_create(struct mddev *mddev, int slot); -int bitmap_load(struct mddev *mddev); -void bitmap_flush(struct mddev *mddev); -void bitmap_destroy(struct mddev *mddev); +struct bitmap *md_bitmap_create(struct mddev *mddev, int slot); +int md_bitmap_load(struct mddev *mddev); +void md_bitmap_flush(struct mddev *mddev); +void md_bitmap_destroy(struct mddev *mddev); -void bitmap_print_sb(struct bitmap *bitmap); -void bitmap_update_sb(struct bitmap *bitmap); -void bitmap_status(struct seq_file *seq, struct bitmap *bitmap); +void md_bitmap_print_sb(struct bitmap *bitmap); +void md_bitmap_update_sb(struct bitmap *bitmap); +void md_bitmap_status(struct seq_file *seq, struct bitmap *bitmap); -int bitmap_setallbits(struct bitmap *bitmap); -void bitmap_write_all(struct bitmap *bitmap); +int md_bitmap_setallbits(struct bitmap *bitmap); +void md_bitmap_write_all(struct bitmap *bitmap); -void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e); +void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e); /* these are exported */ -int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, - unsigned long sectors, int behind); -void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, +int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, + unsigned long sectors, int behind); +void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int success, int behind); -int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded); -void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted); -void bitmap_close_sync(struct bitmap *bitmap); -void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force); -void bitmap_sync_with_cluster(struct mddev *mddev, - sector_t old_lo, sector_t old_hi, - sector_t new_lo, sector_t new_hi); - -void bitmap_unplug(struct bitmap *bitmap); -void bitmap_daemon_work(struct mddev *mddev); - -int bitmap_resize(struct bitmap *bitmap, sector_t blocks, - int chunksize, int init); +int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded); +void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted); +void md_bitmap_close_sync(struct bitmap *bitmap); +void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force); +void md_bitmap_sync_with_cluster(struct mddev *mddev, + sector_t old_lo, sector_t old_hi, + sector_t new_lo, sector_t new_hi); + +void md_bitmap_unplug(struct bitmap *bitmap); +void md_bitmap_daemon_work(struct mddev *mddev); + +int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, + int chunksize, int init); struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot); -int bitmap_copy_from_slot(struct mddev *mddev, int slot, - sector_t *lo, sector_t *hi, bool clear_bits); -void bitmap_free(struct bitmap *bitmap); -void bitmap_wait_behind_writes(struct mddev *mddev); +int md_bitmap_copy_from_slot(struct mddev *mddev, int slot, + sector_t *lo, sector_t *hi, bool clear_bits); +void md_bitmap_free(struct bitmap *bitmap); +void md_bitmap_wait_behind_writes(struct mddev *mddev); #endif #endif diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 021cbf9ef1bf..94329e03001e 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -304,15 +304,6 @@ static void recover_bitmaps(struct md_thread *thread) while (cinfo->recovery_map) { slot = fls64((u64)cinfo->recovery_map) - 1; - /* Clear suspend_area associated with the bitmap */ - spin_lock_irq(&cinfo->suspend_lock); - list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list) - if (slot == s->slot) { - list_del(&s->list); - kfree(s); - } - spin_unlock_irq(&cinfo->suspend_lock); - snprintf(str, 64, "bitmap%04d", slot); bm_lockres = lockres_init(mddev, str, NULL, 1); if (!bm_lockres) { @@ -326,19 +317,35 @@ static void recover_bitmaps(struct md_thread *thread) str, ret); goto clear_bit; } - ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true); + ret = md_bitmap_copy_from_slot(mddev, slot, &lo, &hi, true); if (ret) { pr_err("md-cluster: Could not copy data from bitmap %d\n", slot); goto clear_bit; } + + /* Clear suspend_area associated with the bitmap */ + spin_lock_irq(&cinfo->suspend_lock); + list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list) + if (slot == s->slot) { + list_del(&s->list); + kfree(s); + } + spin_unlock_irq(&cinfo->suspend_lock); + if (hi > 0) { if (lo < mddev->recovery_cp) mddev->recovery_cp = lo; /* wake up thread to continue resync in case resync * is not finished */ if (mddev->recovery_cp != MaxSector) { - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - md_wakeup_thread(mddev->thread); + /* + * clear the REMOTE flag since we will launch + * resync thread in current node. + */ + clear_bit(MD_RESYNCING_REMOTE, + &mddev->recovery); + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + md_wakeup_thread(mddev->thread); } } clear_bit: @@ -457,6 +464,11 @@ static void process_suspend_info(struct mddev *mddev, struct suspend_info *s; if (!hi) { + /* + * clear the REMOTE flag since resync or recovery is finished + * in remote node. + */ + clear_bit(MD_RESYNCING_REMOTE, &mddev->recovery); remove_suspend_info(mddev, slot); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); @@ -480,9 +492,7 @@ static void process_suspend_info(struct mddev *mddev, * resync thread is running in another node, * so we don't need to do the resync again * with the same section */ - bitmap_sync_with_cluster(mddev, cinfo->sync_low, - cinfo->sync_hi, - lo, hi); + md_bitmap_sync_with_cluster(mddev, cinfo->sync_low, cinfo->sync_hi, lo, hi); cinfo->sync_low = lo; cinfo->sync_hi = hi; @@ -585,6 +595,7 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) revalidate_disk(mddev->gendisk); break; case RESYNCING: + set_bit(MD_RESYNCING_REMOTE, &mddev->recovery); process_suspend_info(mddev, le32_to_cpu(msg->slot), le64_to_cpu(msg->low), le64_to_cpu(msg->high)); @@ -829,7 +840,7 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots) } /* Read the disk bitmap sb and check if it needs recovery */ - ret = bitmap_copy_from_slot(mddev, i, &lo, &hi, false); + ret = md_bitmap_copy_from_slot(mddev, i, &lo, &hi, false); if (ret) { pr_warn("md-cluster: Could not gather bitmaps from slot %d", i); lockres_free(bm_lockres); @@ -1127,13 +1138,13 @@ static int cluster_check_sync_size(struct mddev *mddev) bm_lockres = lockres_init(mddev, str, NULL, 1); if (!bm_lockres) { pr_err("md-cluster: Cannot initialize %s\n", str); - bitmap_free(bitmap); + md_bitmap_free(bitmap); return -1; } bm_lockres->flags |= DLM_LKF_NOQUEUE; rv = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); if (!rv) - bitmap_update_sb(bitmap); + md_bitmap_update_sb(bitmap); lockres_free(bm_lockres); sb = kmap_atomic(bitmap->storage.sb_page); @@ -1141,11 +1152,11 @@ static int cluster_check_sync_size(struct mddev *mddev) sync_size = sb->sync_size; else if (sync_size != sb->sync_size) { kunmap_atomic(sb); - bitmap_free(bitmap); + md_bitmap_free(bitmap); return -1; } kunmap_atomic(sb); - bitmap_free(bitmap); + md_bitmap_free(bitmap); } return (my_sync_size == sync_size) ? 0 : -1; @@ -1265,8 +1276,18 @@ static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) static int resync_finish(struct mddev *mddev) { struct md_cluster_info *cinfo = mddev->cluster_info; + + clear_bit(MD_RESYNCING_REMOTE, &mddev->recovery); dlm_unlock_sync(cinfo->resync_lockres); - return resync_info_update(mddev, 0, 0); + + /* + * If resync thread is interrupted so we can't say resync is finished, + * another node will launch resync thread to continue. + */ + if (test_bit(MD_CLOSING, &mddev->flags)) + return 0; + else + return resync_info_update(mddev, 0, 0); } static int area_resyncing(struct mddev *mddev, int direction, @@ -1442,7 +1463,7 @@ static int gather_bitmaps(struct md_rdev *rdev) for (sn = 0; sn < mddev->bitmap_info.nodes; sn++) { if (sn == (cinfo->slot_number - 1)) continue; - err = bitmap_copy_from_slot(mddev, sn, &lo, &hi, false); + err = md_bitmap_copy_from_slot(mddev, sn, &lo, &hi, false); if (err) { pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn); goto out; diff --git a/drivers/md/md.c b/drivers/md/md.c index 994aed2f9dff..63ceabb4e020 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -204,10 +204,6 @@ static int start_readonly; */ static bool create_on_open = true; -/* bio_clone_mddev - * like bio_clone_bioset, but with a local bio set - */ - struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, struct mddev *mddev) { @@ -335,6 +331,7 @@ EXPORT_SYMBOL(md_handle_request); static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) { const int rw = bio_data_dir(bio); + const int sgrp = op_stat_group(bio_op(bio)); struct mddev *mddev = q->queuedata; unsigned int sectors; int cpu; @@ -363,8 +360,8 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) md_handle_request(mddev, bio); cpu = part_stat_lock(); - part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); - part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[sgrp]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[sgrp], sectors); part_stat_unlock(); return BLK_QC_T_NONE; @@ -2571,7 +2568,7 @@ repeat: if (mddev->queue) blk_add_trace_msg(mddev->queue, "md md_update_sb"); rewrite: - bitmap_update_sb(mddev->bitmap); + md_bitmap_update_sb(mddev->bitmap); rdev_for_each(rdev, mddev) { char b[BDEVNAME_SIZE]; @@ -4384,10 +4381,10 @@ bitmap_store(struct mddev *mddev, const char *buf, size_t len) if (buf == end) break; } if (*end && !isspace(*end)) break; - bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk); + md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk); buf = skip_spaces(end); } - bitmap_unplug(mddev->bitmap); /* flush the bits to disk */ + md_bitmap_unplug(mddev->bitmap); /* flush the bits to disk */ out: mddev_unlock(mddev); return len; @@ -5615,7 +5612,7 @@ int md_run(struct mddev *mddev) (mddev->bitmap_info.file || mddev->bitmap_info.offset)) { struct bitmap *bitmap; - bitmap = bitmap_create(mddev, -1); + bitmap = md_bitmap_create(mddev, -1); if (IS_ERR(bitmap)) { err = PTR_ERR(bitmap); pr_warn("%s: failed to create bitmap (%d)\n", @@ -5630,7 +5627,7 @@ int md_run(struct mddev *mddev) pers->free(mddev, mddev->private); mddev->private = NULL; module_put(pers->owner); - bitmap_destroy(mddev); + md_bitmap_destroy(mddev); goto abort; } if (mddev->queue) { @@ -5715,9 +5712,9 @@ static int do_md_run(struct mddev *mddev) err = md_run(mddev); if (err) goto out; - err = bitmap_load(mddev); + err = md_bitmap_load(mddev); if (err) { - bitmap_destroy(mddev); + md_bitmap_destroy(mddev); goto out; } @@ -5859,7 +5856,7 @@ static void __md_stop_writes(struct mddev *mddev) mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 0); } - bitmap_flush(mddev); + md_bitmap_flush(mddev); if (mddev->ro == 0 && ((!mddev->in_sync && !mddev_is_clustered(mddev)) || @@ -5881,7 +5878,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes); static void mddev_detach(struct mddev *mddev) { - bitmap_wait_behind_writes(mddev); + md_bitmap_wait_behind_writes(mddev); if (mddev->pers && mddev->pers->quiesce) { mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 0); @@ -5894,7 +5891,7 @@ static void mddev_detach(struct mddev *mddev) static void __md_stop(struct mddev *mddev) { struct md_personality *pers = mddev->pers; - bitmap_destroy(mddev); + md_bitmap_destroy(mddev); mddev_detach(mddev); /* Ensure ->event_work is done */ flush_workqueue(md_misc_wq); @@ -6713,21 +6710,21 @@ static int set_bitmap_file(struct mddev *mddev, int fd) if (fd >= 0) { struct bitmap *bitmap; - bitmap = bitmap_create(mddev, -1); + bitmap = md_bitmap_create(mddev, -1); mddev_suspend(mddev); if (!IS_ERR(bitmap)) { mddev->bitmap = bitmap; - err = bitmap_load(mddev); + err = md_bitmap_load(mddev); } else err = PTR_ERR(bitmap); if (err) { - bitmap_destroy(mddev); + md_bitmap_destroy(mddev); fd = -1; } mddev_resume(mddev); } else if (fd < 0) { mddev_suspend(mddev); - bitmap_destroy(mddev); + md_bitmap_destroy(mddev); mddev_resume(mddev); } } @@ -7013,15 +7010,15 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->bitmap_info.default_offset; mddev->bitmap_info.space = mddev->bitmap_info.default_space; - bitmap = bitmap_create(mddev, -1); + bitmap = md_bitmap_create(mddev, -1); mddev_suspend(mddev); if (!IS_ERR(bitmap)) { mddev->bitmap = bitmap; - rv = bitmap_load(mddev); + rv = md_bitmap_load(mddev); } else rv = PTR_ERR(bitmap); if (rv) - bitmap_destroy(mddev); + md_bitmap_destroy(mddev); mddev_resume(mddev); } else { /* remove the bitmap */ @@ -7046,7 +7043,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) md_cluster_ops->leave(mddev); } mddev_suspend(mddev); - bitmap_destroy(mddev); + md_bitmap_destroy(mddev); mddev_resume(mddev); mddev->bitmap_info.offset = 0; } @@ -7680,6 +7677,23 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev) resync -= atomic_read(&mddev->recovery_active); if (resync == 0) { + if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) { + struct md_rdev *rdev; + + rdev_for_each(rdev, mddev) + if (rdev->raid_disk >= 0 && + !test_bit(Faulty, &rdev->flags) && + rdev->recovery_offset != MaxSector && + rdev->recovery_offset) { + seq_printf(seq, "\trecover=REMOTE"); + return 1; + } + if (mddev->reshape_position != MaxSector) + seq_printf(seq, "\treshape=REMOTE"); + else + seq_printf(seq, "\tresync=REMOTE"); + return 1; + } if (mddev->recovery_cp < MaxSector) { seq_printf(seq, "\tresync=PENDING"); return 1; @@ -7909,7 +7923,7 @@ static int md_seq_show(struct seq_file *seq, void *v) } else seq_printf(seq, "\n "); - bitmap_status(seq, mddev->bitmap); + md_bitmap_status(seq, mddev->bitmap); seq_printf(seq, "\n"); } @@ -8046,8 +8060,7 @@ static int is_mddev_idle(struct mddev *mddev, int init) rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; - curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + - (int)part_stat_read(&disk->part0, sectors[1]) - + curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - atomic_read(&disk->sync_io); /* sync IO will cause sync_io to increase before the disk_stats * as sync_io is counted when a request starts, and @@ -8781,7 +8794,7 @@ void md_check_recovery(struct mddev *mddev) return; if (mddev->bitmap) - bitmap_daemon_work(mddev); + md_bitmap_daemon_work(mddev); if (signal_pending(current)) { if (mddev->pers->sync_request && !mddev->external) { @@ -8918,7 +8931,7 @@ void md_check_recovery(struct mddev *mddev) * which has the bitmap stored on all devices. * So make sure all bitmap pages get written */ - bitmap_write_all(mddev->bitmap); + md_bitmap_write_all(mddev->bitmap); } INIT_WORK(&mddev->del_work, md_start_sync); queue_work(md_misc_wq, &mddev->del_work); @@ -9166,7 +9179,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) if (ret) pr_info("md-cluster: resize failed\n"); else - bitmap_update_sb(mddev->bitmap); + md_bitmap_update_sb(mddev->bitmap); } /* Check for change of roles in the active devices */ diff --git a/drivers/md/md.h b/drivers/md/md.h index 2d148bdaba74..8afd6bfdbfb9 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -496,6 +496,7 @@ enum recovery_flags { MD_RECOVERY_FROZEN, /* User request to abort, and not restart, any action */ MD_RECOVERY_ERROR, /* sync-action interrupted because io-error */ MD_RECOVERY_WAIT, /* waiting for pers->start() to finish */ + MD_RESYNCING_REMOTE, /* remote node is running resync thread */ }; static inline int __must_check mddev_lock(struct mddev *mddev) diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 829b4ce057d8..0a3b8ae4a29c 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -69,9 +69,9 @@ static struct dm_block_validator index_validator = { */ #define BITMAP_CSUM_XOR 240779 -static void bitmap_prepare_for_write(struct dm_block_validator *v, - struct dm_block *b, - size_t block_size) +static void dm_bitmap_prepare_for_write(struct dm_block_validator *v, + struct dm_block *b, + size_t block_size) { struct disk_bitmap_header *disk_header = dm_block_data(b); @@ -81,9 +81,9 @@ static void bitmap_prepare_for_write(struct dm_block_validator *v, BITMAP_CSUM_XOR)); } -static int bitmap_check(struct dm_block_validator *v, - struct dm_block *b, - size_t block_size) +static int dm_bitmap_check(struct dm_block_validator *v, + struct dm_block *b, + size_t block_size) { struct disk_bitmap_header *disk_header = dm_block_data(b); __le32 csum_disk; @@ -108,8 +108,8 @@ static int bitmap_check(struct dm_block_validator *v, static struct dm_block_validator dm_sm_bitmap_validator = { .name = "sm_bitmap", - .prepare_for_write = bitmap_prepare_for_write, - .check = bitmap_check + .prepare_for_write = dm_bitmap_prepare_for_write, + .check = dm_bitmap_check, }; /*----------------------------------------------------------------*/ @@ -124,7 +124,7 @@ static void *dm_bitmap_data(struct dm_block *b) #define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL -static unsigned bitmap_word_used(void *addr, unsigned b) +static unsigned dm_bitmap_word_used(void *addr, unsigned b) { __le64 *words_le = addr; __le64 *w_le = words_le + (b >> ENTRIES_SHIFT); @@ -170,7 +170,7 @@ static int sm_find_free(void *addr, unsigned begin, unsigned end, { while (begin < end) { if (!(begin & (ENTRIES_PER_WORD - 1)) && - bitmap_word_used(addr, begin)) { + dm_bitmap_word_used(addr, begin)) { begin += ENTRIES_PER_WORD; continue; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 8e05c1092aef..4e990246225e 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -385,10 +385,10 @@ static void close_write(struct r1bio *r1_bio) r1_bio->behind_master_bio = NULL; } /* clear the bitmap if all writes complete successfully */ - bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, - r1_bio->sectors, - !test_bit(R1BIO_Degraded, &r1_bio->state), - test_bit(R1BIO_BehindIO, &r1_bio->state)); + md_bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, + r1_bio->sectors, + !test_bit(R1BIO_Degraded, &r1_bio->state), + test_bit(R1BIO_BehindIO, &r1_bio->state)); md_write_end(r1_bio->mddev); } @@ -781,7 +781,7 @@ static int raid1_congested(struct mddev *mddev, int bits) static void flush_bio_list(struct r1conf *conf, struct bio *bio) { /* flush any pending bitmap writes to disk before proceeding w/ I/O */ - bitmap_unplug(conf->mddev->bitmap); + md_bitmap_unplug(conf->mddev->bitmap); wake_up(&conf->wait_barrier); while (bio) { /* submit pending writes */ @@ -1470,10 +1470,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, alloc_behind_master_bio(r1_bio, bio); } - bitmap_startwrite(bitmap, r1_bio->sector, - r1_bio->sectors, - test_bit(R1BIO_BehindIO, - &r1_bio->state)); + md_bitmap_startwrite(bitmap, r1_bio->sector, r1_bio->sectors, + test_bit(R1BIO_BehindIO, &r1_bio->state)); first_clone = 0; } @@ -1880,8 +1878,7 @@ static void end_sync_write(struct bio *bio) long sectors_to_go = r1_bio->sectors; /* make sure these bits doesn't get cleared. */ do { - bitmap_end_sync(mddev->bitmap, s, - &sync_blocks, 1); + md_bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1); s += sync_blocks; sectors_to_go -= sync_blocks; } while (sectors_to_go > 0); @@ -2626,12 +2623,12 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, * We can find the current addess in mddev->curr_resync */ if (mddev->curr_resync < max_sector) /* aborted */ - bitmap_end_sync(mddev->bitmap, mddev->curr_resync, - &sync_blocks, 1); + md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync, + &sync_blocks, 1); else /* completed sync */ conf->fullsync = 0; - bitmap_close_sync(mddev->bitmap); + md_bitmap_close_sync(mddev->bitmap); close_sync(conf); if (mddev_is_clustered(mddev)) { @@ -2651,7 +2648,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, /* before building a request, check if we can skip these blocks.. * This call the bitmap_start_sync doesn't actually record anything */ - if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && + if (!md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* We can skip this block, and probably several more */ *skipped = 1; @@ -2669,7 +2666,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, * sector_nr + two times RESYNC_SECTORS */ - bitmap_cond_end_sync(mddev->bitmap, sector_nr, + md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); @@ -2828,8 +2825,8 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, if (len == 0) break; if (sync_blocks == 0) { - if (!bitmap_start_sync(mddev->bitmap, sector_nr, - &sync_blocks, still_degraded) && + if (!md_bitmap_start_sync(mddev->bitmap, sector_nr, + &sync_blocks, still_degraded) && !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) break; @@ -3165,7 +3162,7 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors) mddev->array_sectors > newsize) return -EINVAL; if (mddev->bitmap) { - int ret = bitmap_resize(mddev->bitmap, newsize, 0, 0); + int ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0); if (ret) return ret; } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 35bd3a62451b..981898049491 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -440,10 +440,10 @@ static void raid10_end_read_request(struct bio *bio) static void close_write(struct r10bio *r10_bio) { /* clear the bitmap if all writes complete successfully */ - bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, - r10_bio->sectors, - !test_bit(R10BIO_Degraded, &r10_bio->state), - 0); + md_bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, + r10_bio->sectors, + !test_bit(R10BIO_Degraded, &r10_bio->state), + 0); md_write_end(r10_bio->mddev); } @@ -917,7 +917,7 @@ static void flush_pending_writes(struct r10conf *conf) blk_start_plug(&plug); /* flush any pending bitmap writes to disk * before proceeding w/ I/O */ - bitmap_unplug(conf->mddev->bitmap); + md_bitmap_unplug(conf->mddev->bitmap); wake_up(&conf->wait_barrier); while (bio) { /* submit pending writes */ @@ -1102,7 +1102,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) /* we aren't scheduling, so we can do the write-out directly. */ bio = bio_list_get(&plug->pending); - bitmap_unplug(mddev->bitmap); + md_bitmap_unplug(mddev->bitmap); wake_up(&conf->wait_barrier); while (bio) { /* submit pending writes */ @@ -1519,7 +1519,7 @@ retry_write: } atomic_set(&r10_bio->remaining, 1); - bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); + md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); for (i = 0; i < conf->copies; i++) { if (r10_bio->devs[i].bio) @@ -2991,13 +2991,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, if (mddev->curr_resync < max_sector) { /* aborted */ if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) - bitmap_end_sync(mddev->bitmap, mddev->curr_resync, - &sync_blocks, 1); + md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync, + &sync_blocks, 1); else for (i = 0; i < conf->geo.raid_disks; i++) { sector_t sect = raid10_find_virt(conf, mddev->curr_resync, i); - bitmap_end_sync(mddev->bitmap, sect, - &sync_blocks, 1); + md_bitmap_end_sync(mddev->bitmap, sect, + &sync_blocks, 1); } } else { /* completed sync */ @@ -3018,7 +3018,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, } conf->fullsync = 0; } - bitmap_close_sync(mddev->bitmap); + md_bitmap_close_sync(mddev->bitmap); close_sync(conf); *skipped = 1; return sectors_skipped; @@ -3112,8 +3112,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, * we only need to recover the block if it is set in * the bitmap */ - must_sync = bitmap_start_sync(mddev->bitmap, sect, - &sync_blocks, 1); + must_sync = md_bitmap_start_sync(mddev->bitmap, sect, + &sync_blocks, 1); if (sync_blocks < max_sync) max_sync = sync_blocks; if (!must_sync && @@ -3158,8 +3158,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, } } - must_sync = bitmap_start_sync(mddev->bitmap, sect, - &sync_blocks, still_degraded); + must_sync = md_bitmap_start_sync(mddev->bitmap, sect, + &sync_blocks, still_degraded); any_working = 0; for (j=0; j<conf->copies;j++) { @@ -3335,13 +3335,12 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, * safety reason, which ensures curr_resync_completed is * updated in bitmap_cond_end_sync. */ - bitmap_cond_end_sync(mddev->bitmap, sector_nr, - mddev_is_clustered(mddev) && - (sector_nr + 2 * RESYNC_SECTORS > - conf->cluster_sync_high)); + md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, + mddev_is_clustered(mddev) && + (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); - if (!bitmap_start_sync(mddev->bitmap, sector_nr, - &sync_blocks, mddev->degraded) && + if (!md_bitmap_start_sync(mddev->bitmap, sector_nr, + &sync_blocks, mddev->degraded) && !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* We can skip this block */ @@ -4022,7 +4021,7 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors) mddev->array_sectors > size) return -EINVAL; if (mddev->bitmap) { - int ret = bitmap_resize(mddev->bitmap, size, 0, 0); + int ret = md_bitmap_resize(mddev->bitmap, size, 0, 0); if (ret) return ret; } @@ -4287,10 +4286,9 @@ static int raid10_start_reshape(struct mddev *mddev) spin_unlock_irq(&conf->device_lock); if (mddev->delta_disks && mddev->bitmap) { - ret = bitmap_resize(mddev->bitmap, - raid10_size(mddev, 0, - conf->geo.raid_disks), - 0, 0); + ret = md_bitmap_resize(mddev->bitmap, + raid10_size(mddev, 0, conf->geo.raid_disks), + 0, 0); if (ret) goto abort; } diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 2b775abf377b..e6e925add700 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -324,10 +324,10 @@ void r5c_handle_cached_data_endio(struct r5conf *conf, if (sh->dev[i].written) { set_bit(R5_UPTODATE, &sh->dev[i].flags); r5c_return_dev_pending_writes(conf, &sh->dev[i]); - bitmap_endwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, - !test_bit(STRIPE_DEGRADED, &sh->state), - 0); + md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, + STRIPE_SECTORS, + !test_bit(STRIPE_DEGRADED, &sh->state), + 0); } } } @@ -717,7 +717,6 @@ static void r5c_disable_writeback_async(struct work_struct *work) static void r5l_submit_current_io(struct r5l_log *log) { struct r5l_io_unit *io = log->current_io; - struct bio *bio; struct r5l_meta_block *block; unsigned long flags; u32 crc; @@ -730,7 +729,6 @@ static void r5l_submit_current_io(struct r5l_log *log) block->meta_size = cpu_to_le32(io->meta_offset); crc = crc32c_le(log->uuid_checksum, block, PAGE_SIZE); block->checksum = cpu_to_le32(crc); - bio = io->current_bio; log->current_io = NULL; spin_lock_irqsave(&log->io_list_lock, flags); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2031506a0ecd..4ce0d7502fad 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -409,16 +409,14 @@ void raid5_release_stripe(struct stripe_head *sh) md_wakeup_thread(conf->mddev->thread); return; slow_path: - local_irq_save(flags); /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */ - if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) { + if (atomic_dec_and_lock_irqsave(&sh->count, &conf->device_lock, flags)) { INIT_LIST_HEAD(&list); hash = sh->hash_lock_index; do_release_stripe(conf, sh, &list); - spin_unlock(&conf->device_lock); + spin_unlock_irqrestore(&conf->device_lock, flags); release_inactive_stripe_list(conf, &list, hash); } - local_irq_restore(flags); } static inline void remove_hash(struct stripe_head *sh) @@ -3301,8 +3299,8 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, */ set_bit(STRIPE_BITMAP_PENDING, &sh->state); spin_unlock_irq(&sh->stripe_lock); - bitmap_startwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, 0); + md_bitmap_startwrite(conf->mddev->bitmap, sh->sector, + STRIPE_SECTORS, 0); spin_lock_irq(&sh->stripe_lock); clear_bit(STRIPE_BITMAP_PENDING, &sh->state); if (!sh->batch_head) { @@ -3392,8 +3390,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, bi = nextbi; } if (bitmap_end) - bitmap_endwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, 0, 0); + md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, + STRIPE_SECTORS, 0, 0); bitmap_end = 0; /* and fail all 'written' */ bi = sh->dev[i].written; @@ -3438,8 +3436,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, } } if (bitmap_end) - bitmap_endwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, 0, 0); + md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, + STRIPE_SECTORS, 0, 0); /* If we were in the middle of a write the parity block might * still be locked - so just clear all R5_LOCKED flags */ @@ -3779,10 +3777,10 @@ returnbi: bio_endio(wbi); wbi = wbi2; } - bitmap_endwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, - !test_bit(STRIPE_DEGRADED, &sh->state), - 0); + md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, + STRIPE_SECTORS, + !test_bit(STRIPE_DEGRADED, &sh->state), + 0); if (head_sh->batch_head) { sh = list_first_entry(&sh->batch_list, struct stripe_head, @@ -4521,6 +4519,12 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) s->failed++; if (rdev && !test_bit(Faulty, &rdev->flags)) do_recovery = 1; + else if (!rdev) { + rdev = rcu_dereference( + conf->disks[i].replacement); + if (rdev && !test_bit(Faulty, &rdev->flags)) + do_recovery = 1; + } } if (test_bit(R5_InJournal, &dev->flags)) @@ -5539,10 +5543,10 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) for (d = 0; d < conf->raid_disks - conf->max_degraded; d++) - bitmap_startwrite(mddev->bitmap, - sh->sector, - STRIPE_SECTORS, - 0); + md_bitmap_startwrite(mddev->bitmap, + sh->sector, + STRIPE_SECTORS, + 0); sh->bm_seq = conf->seq_flush + 1; set_bit(STRIPE_BIT_DELAY, &sh->state); } @@ -6020,11 +6024,11 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n } if (mddev->curr_resync < max_sector) /* aborted */ - bitmap_end_sync(mddev->bitmap, mddev->curr_resync, - &sync_blocks, 1); + md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync, + &sync_blocks, 1); else /* completed sync */ conf->fullsync = 0; - bitmap_close_sync(mddev->bitmap); + md_bitmap_close_sync(mddev->bitmap); return 0; } @@ -6053,7 +6057,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n } if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && !conf->fullsync && - !bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && + !md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && sync_blocks >= STRIPE_SECTORS) { /* we can skip this block, and probably more */ sync_blocks /= STRIPE_SECTORS; @@ -6061,7 +6065,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */ } - bitmap_cond_end_sync(mddev->bitmap, sector_nr, false); + md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, false); sh = raid5_get_active_stripe(conf, sector_nr, 0, 1, 0); if (sh == NULL) { @@ -6084,7 +6088,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n } rcu_read_unlock(); - bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); + md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); set_bit(STRIPE_SYNC_REQUESTED, &sh->state); set_bit(STRIPE_HANDLE, &sh->state); @@ -6285,7 +6289,7 @@ static void raid5d(struct md_thread *thread) /* Now is a good time to flush some bitmap updates */ conf->seq_flush++; spin_unlock_irq(&conf->device_lock); - bitmap_unplug(mddev->bitmap); + md_bitmap_unplug(mddev->bitmap); spin_lock_irq(&conf->device_lock); conf->seq_write = conf->seq_flush; activate_bit_delay(conf, conf->temp_inactive_list); @@ -7741,7 +7745,7 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors) mddev->array_sectors > newsize) return -EINVAL; if (mddev->bitmap) { - int ret = bitmap_resize(mddev->bitmap, sectors, 0, 0); + int ret = md_bitmap_resize(mddev->bitmap, sectors, 0, 0); if (ret) return ret; } |