From d9dc1702b297ec4a6bb9c0326a70641b322ba886 Mon Sep 17 00:00:00 2001 From: Eric Wheeler Date: Fri, 17 Jun 2016 15:01:54 -0700 Subject: bcache: register_bcache(): call blkdev_put() when cache_alloc() fails register_cache() is supposed to return an error string on error so that register_bcache() will will blkdev_put and cleanup other user counters, but it does not set 'char *err' when cache_alloc() fails (eg, due to memory pressure) and thus register_bcache() performs no cleanup. register_bcache() <----------\ <- no jump to err_close, no blkdev_put() | | +->register_cache() | <- fails to set char *err | | +->cache_alloc() ---/ <- returns error This patch sets `char *err` for this failure case so that register_cache() will cause register_bcache() to correctly jump to err_close and do cleanup. This was tested under OOM conditions that triggered the bug. Signed-off-by: Eric Wheeler Cc: Kent Overstreet Cc: stable@vger.kernel.org --- drivers/md/bcache/super.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 95a4ca6ce6ff..6ada14b9a157 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1844,7 +1844,7 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, struct block_device *bdev, struct cache *ca) { char name[BDEVNAME_SIZE]; - const char *err = NULL; + const char *err = NULL; /* must be set for any error case */ int ret = 0; memcpy(&ca->sb, sb, sizeof(struct cache_sb)); @@ -1861,8 +1861,13 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, ca->discard = CACHE_DISCARD(&ca->sb); ret = cache_alloc(ca); - if (ret != 0) + if (ret != 0) { + if (ret == -ENOMEM) + err = "cache_alloc(): -ENOMEM"; + else + err = "cache_alloc(): unknown error"; goto err; + } if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) { err = "error calling kobject_add"; -- cgit v1.2.1 From acc9cf8c66c66b2cbbdb4a375537edee72be64df Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 17 Aug 2016 18:21:24 -0700 Subject: bcache: RESERVE_PRIO is too small by one when prio_buckets() is a power of two. This patch fixes a cachedev registration-time allocation deadlock. This can deadlock on boot if your initrd auto-registeres bcache devices: Allocator thread: [ 720.727614] INFO: task bcache_allocato:3833 blocked for more than 120 seconds. [ 720.732361] [] schedule+0x37/0x90 [ 720.732963] [] bch_bucket_alloc+0x188/0x360 [bcache] [ 720.733538] [] ? prepare_to_wait_event+0xf0/0xf0 [ 720.734137] [] bch_prio_write+0x19d/0x340 [bcache] [ 720.734715] [] bch_allocator_thread+0x3ff/0x470 [bcache] [ 720.735311] [] ? __schedule+0x2dc/0x950 [ 720.735884] [] ? invalidate_buckets+0x980/0x980 [bcache] Registration thread: [ 720.710403] INFO: task bash:3531 blocked for more than 120 seconds. [ 720.715226] [] schedule+0x37/0x90 [ 720.715805] [] __bch_btree_map_nodes+0x12d/0x150 [bcache] [ 720.716409] [] ? bch_btree_insert_check_key+0x1c0/0x1c0 [bcache] [ 720.717008] [] bch_btree_insert+0xf4/0x170 [bcache] [ 720.717586] [] ? prepare_to_wait_event+0xf0/0xf0 [ 720.718191] [] bch_journal_replay+0x14a/0x290 [bcache] [ 720.718766] [] ? ttwu_do_activate.constprop.94+0x5d/0x70 [ 720.719369] [] ? try_to_wake_up+0x1d4/0x350 [ 720.719968] [] run_cache_set+0x580/0x8e0 [bcache] [ 720.720553] [] register_bcache+0xe2e/0x13b0 [bcache] [ 720.721153] [] kobj_attr_store+0xf/0x20 [ 720.721730] [] sysfs_kf_write+0x3d/0x50 [ 720.722327] [] kernfs_fop_write+0x12a/0x180 [ 720.722904] [] __vfs_write+0x37/0x110 [ 720.723503] [] ? __sb_start_write+0x58/0x110 [ 720.724100] [] ? security_file_permission+0x23/0xa0 [ 720.724675] [] vfs_write+0xa9/0x1b0 [ 720.725275] [] ? do_audit_syscall_entry+0x6c/0x70 [ 720.725849] [] SyS_write+0x55/0xd0 [ 720.726451] [] ? do_page_fault+0x30/0x80 [ 720.727045] [] system_call_fastpath+0x12/0x71 The fifo code in upstream bcache can't use the last element in the buffer, which was the cause of the bug: if you asked for a power of two size, it'd give you a fifo that could hold one less than what you asked for rather than allocating a buffer twice as big. Signed-off-by: Kent Overstreet Tested-by: Eric Wheeler Cc: stable@vger.kernel.org --- drivers/md/bcache/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 6ada14b9a157..6b93e1b77767 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1820,7 +1820,7 @@ static int cache_alloc(struct cache *ca) free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) || - !init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || + !init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) || !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) || -- cgit v1.2.1 From 90706094d5be614ae7285b3c96c3125bb198618c Mon Sep 17 00:00:00 2001 From: Eric Wheeler Date: Thu, 18 Aug 2016 20:15:26 -0700 Subject: bcache: pr_err: more meaningful error message when nr_stripes is invalid The original error was thought to be corruption, but was actually caused by: make-bcache --data-offset N where N was in bytes and should have been in sectors. While userspace tools should be updated to check --data-offset beyond end of volume, hopefully this will help others that might not have noticed the units. Signed-off-by: Eric Wheeler Cc: Kent Overstreet --- drivers/md/bcache/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 6b93e1b77767..849ad441cd76 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -760,7 +760,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, if (!d->nr_stripes || d->nr_stripes > INT_MAX || d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) { - pr_err("nr_stripes too large"); + pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)", + (unsigned)d->nr_stripes); return -ENOMEM; } -- cgit v1.2.1