diff options
Diffstat (limited to 'drivers/md/bcache/super.c')
-rw-r--r-- | drivers/md/bcache/super.c | 228 |
1 files changed, 167 insertions, 61 deletions
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 55a37641aa95..7bbd670a5a84 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * bcache setup/teardown code, and some metadata io - read a superblock and * figure out what to do with it. @@ -46,6 +47,7 @@ static int bcache_major; static DEFINE_IDA(bcache_device_idx); static wait_queue_head_t unregister_wait; struct workqueue_struct *bcache_wq; +struct workqueue_struct *bch_journal_wq; #define BTREE_MAX_PAGES (256 * 1024 / PAGE_SIZE) /* limitation of partitions number on single bcache device */ @@ -61,7 +63,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, const char *err; struct cache_sb *s; struct buffer_head *bh = __bread(bdev, 1, SB_SIZE); - unsigned i; + unsigned int i; if (!bh) return "IO error"; @@ -149,7 +151,8 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, goto err; err = "Invalid superblock: device too small"; - if (get_capacity(bdev->bd_disk) < sb->bucket_size * sb->nbuckets) + if (get_capacity(bdev->bd_disk) < + sb->bucket_size * sb->nbuckets) goto err; err = "Bad UUID"; @@ -202,7 +205,7 @@ static void write_bdev_super_endio(struct bio *bio) static void __write_super(struct cache_sb *sb, struct bio *bio) { struct cache_sb *out = page_address(bio_first_page_all(bio)); - unsigned i; + unsigned int i; bio->bi_iter.bi_sector = SB_SECTOR; bio->bi_iter.bi_size = SB_SIZE; @@ -282,7 +285,7 @@ void bcache_write_super(struct cache_set *c) { struct closure *cl = &c->sb_write; struct cache *ca; - unsigned i; + unsigned int i; down(&c->sb_write_mutex); closure_init(cl, &c->cl); @@ -334,7 +337,7 @@ static void uuid_io(struct cache_set *c, int op, unsigned long op_flags, { struct closure *cl = &c->uuid_write; struct uuid_entry *u; - unsigned i; + unsigned int i; char buf[80]; BUG_ON(!parent); @@ -415,8 +418,9 @@ static int __uuid_write(struct cache_set *c) { BKEY_PADDED(key) k; struct closure cl; - closure_init_stack(&cl); + struct cache *ca; + closure_init_stack(&cl); lockdep_assert_held(&bch_register_lock); if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, true)) @@ -426,6 +430,10 @@ static int __uuid_write(struct cache_set *c) uuid_io(c, REQ_OP_WRITE, 0, &k.key, &cl); closure_sync(&cl); + /* Only one bucket used for uuid write */ + ca = PTR_CACHE(c, &k.key, 0); + atomic_long_add(ca->sb.bucket_size, &ca->meta_sectors_written); + bkey_copy(&c->uuid_bucket, &k.key); bkey_put(c, &k.key); return 0; @@ -456,6 +464,7 @@ static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid) static struct uuid_entry *uuid_find_empty(struct cache_set *c) { static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + return uuid_find(c, zero_uuid); } @@ -463,8 +472,8 @@ static struct uuid_entry *uuid_find_empty(struct cache_set *c) * Bucket priorities/gens: * * For each bucket, we store on disk its - * 8 bit gen - * 16 bit priority + * 8 bit gen + * 16 bit priority * * See alloc.c for an explanation of the gen. The priority is used to implement * lru (and in the future other) cache replacement policies; for most purposes @@ -587,7 +596,7 @@ static void prio_read(struct cache *ca, uint64_t bucket) struct prio_set *p = ca->disk_buckets; struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d; struct bucket *b; - unsigned bucket_nr = 0; + unsigned int bucket_nr = 0; for (b = ca->buckets; b < ca->buckets + ca->sb.nbuckets; @@ -599,7 +608,8 @@ static void prio_read(struct cache *ca, uint64_t bucket) prio_io(ca, bucket, REQ_OP_READ, 0); - if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8)) + if (p->csum != + bch_crc64(&p->magic, bucket_bytes(ca) - 8)) pr_warn("bad csum reading priorities"); if (p->magic != pset_magic(&ca->sb)) @@ -619,6 +629,7 @@ static void prio_read(struct cache *ca, uint64_t bucket) static int open_dev(struct block_device *b, fmode_t mode) { struct bcache_device *d = b->bd_disk->private_data; + if (test_bit(BCACHE_DEV_CLOSING, &d->flags)) return -ENXIO; @@ -629,6 +640,7 @@ static int open_dev(struct block_device *b, fmode_t mode) static void release_dev(struct gendisk *b, fmode_t mode) { struct bcache_device *d = b->private_data; + closure_put(&d->cl); } @@ -636,10 +648,6 @@ static int ioctl_dev(struct block_device *b, fmode_t mode, unsigned int cmd, unsigned long arg) { struct bcache_device *d = b->bd_disk->private_data; - struct cached_dev *dc = container_of(d, struct cached_dev, disk); - - if (dc->io_disable) - return -EIO; return d->ioctl(d, mode, cmd, arg); } @@ -662,7 +670,7 @@ static void bcache_device_unlink(struct bcache_device *d) lockdep_assert_held(&bch_register_lock); if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) { - unsigned i; + unsigned int i; struct cache *ca; sysfs_remove_link(&d->c->kobj, d->name); @@ -676,7 +684,7 @@ static void bcache_device_unlink(struct bcache_device *d) static void bcache_device_link(struct bcache_device *d, struct cache_set *c, const char *name) { - unsigned i; + unsigned int i; struct cache *ca; for_each_cache(ca, d->c, i) @@ -715,7 +723,7 @@ static void bcache_device_detach(struct bcache_device *d) } static void bcache_device_attach(struct bcache_device *d, struct cache_set *c, - unsigned id) + unsigned int id) { d->id = id; d->c = c; @@ -762,7 +770,7 @@ static void bcache_device_free(struct bcache_device *d) closure_debug_destroy(&d->cl); } -static int bcache_device_init(struct bcache_device *d, unsigned block_size, +static int bcache_device_init(struct bcache_device *d, unsigned int block_size, sector_t sectors) { struct request_queue *q; @@ -778,7 +786,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, if (!d->nr_stripes || d->nr_stripes > max_stripes) { pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)", - (unsigned)d->nr_stripes); + (unsigned int)d->nr_stripes); return -ENOMEM; } @@ -919,6 +927,7 @@ void bch_cached_dev_run(struct cached_dev *dc) if (!d->c && BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) { struct closure cl; + closure_init_stack(&cl); SET_BDEV_STATE(&dc->sb, BDEV_STATE_STALE); @@ -928,8 +937,10 @@ void bch_cached_dev_run(struct cached_dev *dc) add_disk(d->disk); bd_link_disk_holder(dc->bdev, dc->disk.disk); - /* won't show up in the uevent file, use udevadm monitor -e instead - * only class / kset properties are persistent */ + /* + * won't show up in the uevent file, use udevadm monitor -e instead + * only class / kset properties are persistent + */ kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); kfree(env[1]); kfree(env[2]); @@ -976,6 +987,7 @@ static void cached_dev_detach_finish(struct work_struct *w) { struct cached_dev *dc = container_of(w, struct cached_dev, detach); struct closure cl; + closure_init_stack(&cl); BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)); @@ -997,6 +1009,7 @@ static void cached_dev_detach_finish(struct work_struct *w) bch_write_bdev_super(dc, &cl); closure_sync(&cl); + calc_cached_dev_sectors(dc->disk.c); bcache_device_detach(&dc->disk); list_move(&dc->list, &uncached_devices); @@ -1097,12 +1110,14 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, } } - /* Deadlocks since we're called via sysfs... - sysfs_remove_file(&dc->kobj, &sysfs_attach); + /* + * Deadlocks since we're called via sysfs... + * sysfs_remove_file(&dc->kobj, &sysfs_attach); */ if (bch_is_zero(u->uuid, 16)) { struct closure cl; + closure_init_stack(&cl); memcpy(u->uuid, dc->sb.uuid, 16); @@ -1124,11 +1139,11 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, list_move(&dc->list, &c->cached_devs); calc_cached_dev_sectors(c); - smp_wmb(); /* * dc->c must be set before dc->count != 0 - paired with the mb in * cached_dev_get() */ + smp_wmb(); refcount_set(&dc->count, 1); /* Block writeback thread, but spawn it */ @@ -1139,11 +1154,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, } if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - bch_sectors_dirty_init(&dc->disk); atomic_set(&dc->has_dirty, 1); bch_writeback_queue(dc); } + bch_sectors_dirty_init(&dc->disk); + bch_cached_dev_run(dc); bcache_device_link(&dc->disk, c, "bdev"); atomic_inc(&c->attached_dev_nr); @@ -1212,7 +1228,7 @@ static void cached_dev_flush(struct closure *cl) continue_at(cl, cached_dev_free, system_wq); } -static int cached_dev_init(struct cached_dev *dc, unsigned block_size) +static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) { int ret; struct io *io; @@ -1320,6 +1336,7 @@ void bch_flash_dev_release(struct kobject *kobj) static void flash_dev_free(struct closure *cl) { struct bcache_device *d = container_of(cl, struct bcache_device, cl); + mutex_lock(&bch_register_lock); atomic_long_sub(bcache_dev_sectors_dirty(d), &d->c->flash_dev_dirty_sectors); @@ -1459,17 +1476,18 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) pr_info("CACHE_SET_IO_DISABLE already set"); - /* XXX: we can be called from atomic context - acquire_console_sem(); - */ + /* + * XXX: we can be called from atomic context + * acquire_console_sem(); + */ - printk(KERN_ERR "bcache: error on %pU: ", c->sb.set_uuid); + pr_err("bcache: error on %pU: ", c->sb.set_uuid); va_start(args, fmt); vprintk(fmt, args); va_end(args); - printk(", disabling caching\n"); + pr_err(", disabling caching\n"); if (c->on_error == ON_ERROR_PANIC) panic("panic forced after error\n"); @@ -1481,6 +1499,7 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) void bch_cache_set_release(struct kobject *kobj) { struct cache_set *c = container_of(kobj, struct cache_set, kobj); + kfree(c); module_put(THIS_MODULE); } @@ -1489,7 +1508,7 @@ static void cache_set_free(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, cl); struct cache *ca; - unsigned i; + unsigned int i; if (!IS_ERR_OR_NULL(c->debug)) debugfs_remove(c->debug); @@ -1532,7 +1551,7 @@ static void cache_set_flush(struct closure *cl) struct cache_set *c = container_of(cl, struct cache_set, caching); struct cache *ca; struct btree *b; - unsigned i; + unsigned int i; bch_cache_accounting_destroy(&c->accounting); @@ -1671,6 +1690,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) { int iter_size; struct cache_set *c = kzalloc(sizeof(struct cache_set), GFP_KERNEL); + if (!c) return NULL; @@ -1731,8 +1751,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) if (!(c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL)) || mempool_init_slab_pool(&c->search, 32, bch_search_cache) || mempool_init_kmalloc_pool(&c->bio_meta, 2, - sizeof(struct bbio) + sizeof(struct bio_vec) * - bucket_pages(c)) || + sizeof(struct bbio) + sizeof(struct bio_vec) * + bucket_pages(c)) || mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) || bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio), BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) || @@ -1762,7 +1782,7 @@ static void run_cache_set(struct cache_set *c) struct cached_dev *dc, *t; struct cache *ca; struct closure cl; - unsigned i; + unsigned int i; closure_init_stack(&cl); @@ -1804,7 +1824,9 @@ static void run_cache_set(struct cache_set *c) goto err; err = "error reading btree root"; - c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL); + c->root = bch_btree_node_get(c, NULL, k, + j->btree_level, + true, NULL); if (IS_ERR_OR_NULL(c->root)) goto err; @@ -1853,7 +1875,7 @@ static void run_cache_set(struct cache_set *c) pr_notice("invalidating existing data"); for_each_cache(ca, c, i) { - unsigned j; + unsigned int j; ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7, 2, SB_JOURNAL_BUCKETS); @@ -1998,7 +2020,7 @@ err: void bch_cache_release(struct kobject *kobj) { struct cache *ca = container_of(kobj, struct cache, kobj); - unsigned i; + unsigned int i; if (ca->set) { BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca); @@ -2030,6 +2052,8 @@ static int cache_alloc(struct cache *ca) size_t free; size_t btree_buckets; struct bucket *b; + int ret = -ENOMEM; + const char *err = NULL; __module_get(THIS_MODULE); kobject_init(&ca->kobj, &bch_cache_ktype); @@ -2047,27 +2071,93 @@ static int cache_alloc(struct cache *ca) */ btree_buckets = ca->sb.njournal_buckets ?: 8; free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; + if (!free) { + ret = -EPERM; + err = "ca->sb.nbuckets is too small"; + goto err_free; + } - if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, GFP_KERNEL) || - !init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || - !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) || - !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) || - !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) || - !init_heap(&ca->heap, free << 3, GFP_KERNEL) || - !(ca->buckets = vzalloc(array_size(sizeof(struct bucket), - ca->sb.nbuckets))) || - !(ca->prio_buckets = kzalloc(array3_size(sizeof(uint64_t), - prio_buckets(ca), 2), - GFP_KERNEL)) || - !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca))) - return -ENOMEM; + if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, + GFP_KERNEL)) { + err = "ca->free[RESERVE_BTREE] alloc failed"; + goto err_btree_alloc; + } + + if (!init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), + GFP_KERNEL)) { + err = "ca->free[RESERVE_PRIO] alloc failed"; + goto err_prio_alloc; + } + + if (!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL)) { + err = "ca->free[RESERVE_MOVINGGC] alloc failed"; + goto err_movinggc_alloc; + } + + if (!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL)) { + err = "ca->free[RESERVE_NONE] alloc failed"; + goto err_none_alloc; + } + + if (!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL)) { + err = "ca->free_inc alloc failed"; + goto err_free_inc_alloc; + } + + if (!init_heap(&ca->heap, free << 3, GFP_KERNEL)) { + err = "ca->heap alloc failed"; + goto err_heap_alloc; + } + + ca->buckets = vzalloc(array_size(sizeof(struct bucket), + ca->sb.nbuckets)); + if (!ca->buckets) { + err = "ca->buckets alloc failed"; + goto err_buckets_alloc; + } + + ca->prio_buckets = kzalloc(array3_size(sizeof(uint64_t), + prio_buckets(ca), 2), + GFP_KERNEL); + if (!ca->prio_buckets) { + err = "ca->prio_buckets alloc failed"; + goto err_prio_buckets_alloc; + } + + ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca); + if (!ca->disk_buckets) { + err = "ca->disk_buckets alloc failed"; + goto err_disk_buckets_alloc; + } ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca); for_each_bucket(b, ca) atomic_set(&b->pin, 0); - return 0; + +err_disk_buckets_alloc: + kfree(ca->prio_buckets); +err_prio_buckets_alloc: + vfree(ca->buckets); +err_buckets_alloc: + free_heap(&ca->heap); +err_heap_alloc: + free_fifo(&ca->free_inc); +err_free_inc_alloc: + free_fifo(&ca->free[RESERVE_NONE]); +err_none_alloc: + free_fifo(&ca->free[RESERVE_MOVINGGC]); +err_movinggc_alloc: + free_fifo(&ca->free[RESERVE_PRIO]); +err_prio_alloc: + free_fifo(&ca->free[RESERVE_BTREE]); +err_btree_alloc: +err_free: + module_put(THIS_MODULE); + if (err) + pr_notice("error %s: %s", ca->cache_dev_name, err); + return ret; } static int register_cache(struct cache_sb *sb, struct page *sb_page, @@ -2093,12 +2183,16 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); if (ret == -ENOMEM) err = "cache_alloc(): -ENOMEM"; + else if (ret == -EPERM) + err = "cache_alloc(): cache device is too small"; else err = "cache_alloc(): unknown error"; goto err; } - if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) { + if (kobject_add(&ca->kobj, + &part_to_dev(bdev->bd_part)->kobj, + "bcache")) { err = "error calling kobject_add"; ret = -ENOMEM; goto out; @@ -2127,13 +2221,14 @@ err: /* Global interfaces/init */ -static ssize_t register_bcache(struct kobject *, struct kobj_attribute *, - const char *, size_t); +static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, + const char *buffer, size_t size); kobj_attribute_write(register, register_bcache); kobj_attribute_write(register_quiet, register_bcache); -static bool bch_is_open_backing(struct block_device *bdev) { +static bool bch_is_open_backing(struct block_device *bdev) +{ struct cache_set *c, *tc; struct cached_dev *dc, *t; @@ -2147,10 +2242,11 @@ static bool bch_is_open_backing(struct block_device *bdev) { return false; } -static bool bch_is_open_cache(struct block_device *bdev) { +static bool bch_is_open_cache(struct block_device *bdev) +{ struct cache_set *c, *tc; struct cache *ca; - unsigned i; + unsigned int i; list_for_each_entry_safe(c, tc, &bch_cache_sets, list) for_each_cache(ca, c, i) @@ -2159,7 +2255,8 @@ static bool bch_is_open_cache(struct block_device *bdev) { return false; } -static bool bch_is_open(struct block_device *bdev) { +static bool bch_is_open(struct block_device *bdev) +{ return bch_is_open_cache(bdev) || bch_is_open_backing(bdev); } @@ -2216,6 +2313,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, err = "failed to register device"; if (SB_IS_BDEV(sb)) { struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); + if (!dc) goto err_close; @@ -2224,6 +2322,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, mutex_unlock(&bch_register_lock); } else { struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); + if (!ca) goto err_close; @@ -2316,6 +2415,9 @@ static void bcache_exit(void) kobject_put(bcache_kobj); if (bcache_wq) destroy_workqueue(bcache_wq); + if (bch_journal_wq) + destroy_workqueue(bch_journal_wq); + if (bcache_major) unregister_blkdev(bcache_major, "bcache"); unregister_reboot_notifier(&reboot); @@ -2345,6 +2447,10 @@ static int __init bcache_init(void) if (!bcache_wq) goto err; + bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0); + if (!bch_journal_wq) + goto err; + bcache_kobj = kobject_create_and_add("bcache", fs_kobj); if (!bcache_kobj) goto err; @@ -2353,7 +2459,7 @@ static int __init bcache_init(void) sysfs_create_files(bcache_kobj, files)) goto err; - bch_debug_init(bcache_kobj); + bch_debug_init(); closure_debug_init(); return 0; |