From 667a5313ecd7308d79629c0738b0db588b0b0a4e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 16 Aug 2012 16:46:12 +1000 Subject: md: Don't truncate size at 4TB for RAID0 and Linear commit 27a7b260f71439c40546b43588448faac01adb93 md: Fix handling for devices from 2TB to 4TB in 0.90 metadata. changed 0.90 metadata handling to truncated size to 4TB as that is all that 0.90 can record. However for RAID0 and Linear, 0.90 doesn't need to record the size, so this truncation is not needed and causes working arrays to become too small. So avoid the truncation for RAID0 and Linear This bug was introduced in 3.1 and is suitable for any stable kernels from then onwards. As the offending commit was tagged for 'stable', any stable kernel that it was applied to should also get this patch. That includes at least 2.6.32, 2.6.33 and 3.0. (Thanks to Ben Hutchings for providing that list). Cc: stable@vger.kernel.org Signed-off-by: Neil Brown --- drivers/md/md.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index fcd098794d37..3f6203a4c7ea 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1108,8 +1108,11 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor ret = 0; } rdev->sectors = rdev->sb_start; - /* Limit to 4TB as metadata cannot record more than that */ - if (rdev->sectors >= (2ULL << 32)) + /* Limit to 4TB as metadata cannot record more than that. + * (not needed for Linear and RAID0 as metadata doesn't + * record this size) + */ + if (rdev->sectors >= (2ULL << 32) && sb->level >= 1) rdev->sectors = (2ULL << 32) - 2; if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1) @@ -1400,7 +1403,7 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) /* Limit to 4TB as metadata cannot record more than that. * 4TB == 2^32 KB, or 2*2^32 sectors. */ - if (num_sectors >= (2ULL << 32)) + if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1) num_sectors = (2ULL << 32) - 2; md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, rdev->sb_page); -- cgit v1.2.1 From e0ee778528bbaad28a5c69d2e219269a3a096607 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 18 Aug 2012 09:51:42 +1000 Subject: md/raid10: fix problem with on-stack allocation of r10bio structure. A 'struct r10bio' has an array of per-copy information at the end. This array is declared with size [0] and r10bio_pool_alloc allocates enough extra space to store the per-copy information depending on the number of copies needed. So declaring a 'struct r10bio on the stack isn't going to work. It won't allocate enough space, and memory corruption will ensue. So in the two places where this is done, declare a sufficiently large structure and use that instead. The two call-sites of this bug were introduced in 3.4 and 3.5 so this is suitable for both those kernels. The patch will have to be modified for 3.4 as it only has one bug. Cc: stable@vger.kernel.org Reported-by: Ivan Vasilyev Tested-by: Ivan Vasilyev Signed-off-by: NeilBrown --- drivers/md/raid10.c | 30 +++++++++++++++++++----------- drivers/md/raid10.h | 2 +- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index de5ed6fd8806..1c2eb38f3c51 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -659,7 +659,11 @@ static int raid10_mergeable_bvec(struct request_queue *q, max = biovec->bv_len; if (mddev->merge_check_needed) { - struct r10bio r10_bio; + struct { + struct r10bio r10_bio; + struct r10dev devs[conf->copies]; + } on_stack; + struct r10bio *r10_bio = &on_stack.r10_bio; int s; if (conf->reshape_progress != MaxSector) { /* Cannot give any guidance during reshape */ @@ -667,18 +671,18 @@ static int raid10_mergeable_bvec(struct request_queue *q, return biovec->bv_len; return 0; } - r10_bio.sector = sector; - raid10_find_phys(conf, &r10_bio); + r10_bio->sector = sector; + raid10_find_phys(conf, r10_bio); rcu_read_lock(); for (s = 0; s < conf->copies; s++) { - int disk = r10_bio.devs[s].devnum; + int disk = r10_bio->devs[s].devnum; struct md_rdev *rdev = rcu_dereference( conf->mirrors[disk].rdev); if (rdev && !test_bit(Faulty, &rdev->flags)) { struct request_queue *q = bdev_get_queue(rdev->bdev); if (q->merge_bvec_fn) { - bvm->bi_sector = r10_bio.devs[s].addr + bvm->bi_sector = r10_bio->devs[s].addr + rdev->data_offset; bvm->bi_bdev = rdev->bdev; max = min(max, q->merge_bvec_fn( @@ -690,7 +694,7 @@ static int raid10_mergeable_bvec(struct request_queue *q, struct request_queue *q = bdev_get_queue(rdev->bdev); if (q->merge_bvec_fn) { - bvm->bi_sector = r10_bio.devs[s].addr + bvm->bi_sector = r10_bio->devs[s].addr + rdev->data_offset; bvm->bi_bdev = rdev->bdev; max = min(max, q->merge_bvec_fn( @@ -4414,14 +4418,18 @@ static int handle_reshape_read_error(struct mddev *mddev, { /* Use sync reads to get the blocks from somewhere else */ int sectors = r10_bio->sectors; - struct r10bio r10b; struct r10conf *conf = mddev->private; + struct { + struct r10bio r10_bio; + struct r10dev devs[conf->copies]; + } on_stack; + struct r10bio *r10b = &on_stack.r10_bio; int slot = 0; int idx = 0; struct bio_vec *bvec = r10_bio->master_bio->bi_io_vec; - r10b.sector = r10_bio->sector; - __raid10_find_phys(&conf->prev, &r10b); + r10b->sector = r10_bio->sector; + __raid10_find_phys(&conf->prev, r10b); while (sectors) { int s = sectors; @@ -4432,7 +4440,7 @@ static int handle_reshape_read_error(struct mddev *mddev, s = PAGE_SIZE >> 9; while (!success) { - int d = r10b.devs[slot].devnum; + int d = r10b->devs[slot].devnum; struct md_rdev *rdev = conf->mirrors[d].rdev; sector_t addr; if (rdev == NULL || @@ -4440,7 +4448,7 @@ static int handle_reshape_read_error(struct mddev *mddev, !test_bit(In_sync, &rdev->flags)) goto failed; - addr = r10b.devs[slot].addr + idx * PAGE_SIZE; + addr = r10b->devs[slot].addr + idx * PAGE_SIZE; success = sync_page_io(rdev, addr, s << 9, diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 007c2c68dd83..1054cf602345 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -110,7 +110,7 @@ struct r10bio { * We choose the number when they are allocated. * We sometimes need an extra bio to write to the replacement. */ - struct { + struct r10dev { struct bio *bio; union { struct bio *repl_bio; /* used for resync and -- cgit v1.2.1