diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 5 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 2 | ||||
-rw-r--r-- | drivers/md/md.c | 9 | ||||
-rw-r--r-- | drivers/md/raid1.c | 57 | ||||
-rw-r--r-- | drivers/md/raid10.c | 30 | ||||
-rw-r--r-- | drivers/md/raid10.h | 2 | ||||
-rw-r--r-- | drivers/md/raid5.c | 107 | ||||
-rw-r--r-- | drivers/md/raid5.h | 1 |
8 files changed, 176 insertions, 37 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 1eee45b69b71..d949b781f6f8 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -268,13 +268,14 @@ config DM_MIRROR needed for live data migration tools such as 'pvmove'. config DM_RAID - tristate "RAID 1/4/5/6 target" + tristate "RAID 1/4/5/6/10 target" depends on BLK_DEV_DM select MD_RAID1 + select MD_RAID10 select MD_RAID456 select BLK_DEV_MD ---help--- - A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings + A dm target that supports RAID1, RAID10, RAID4, RAID5 and RAID6 mappings A RAID-5 set of N drives with a capacity of C MB per drive provides the capacity of C * (N - 1) MB, and protects against a failure diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 15dbe03117e4..94e7f6ba2e11 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1305,7 +1305,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect prepare_to_wait(&bitmap->overflow_wait, &__wait, TASK_UNINTERRUPTIBLE); spin_unlock_irq(&bitmap->counts.lock); - io_schedule(); + schedule(); finish_wait(&bitmap->overflow_wait, &__wait); continue; } diff --git a/drivers/md/md.c b/drivers/md/md.c index fcd098794d37..3f6203a4c7ea 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1108,8 +1108,11 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor ret = 0; } rdev->sectors = rdev->sb_start; - /* Limit to 4TB as metadata cannot record more than that */ - if (rdev->sectors >= (2ULL << 32)) + /* Limit to 4TB as metadata cannot record more than that. + * (not needed for Linear and RAID0 as metadata doesn't + * record this size) + */ + if (rdev->sectors >= (2ULL << 32) && sb->level >= 1) rdev->sectors = (2ULL << 32) - 2; if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1) @@ -1400,7 +1403,7 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) /* Limit to 4TB as metadata cannot record more than that. * 4TB == 2^32 KB, or 2*2^32 sectors. */ - if (num_sectors >= (2ULL << 32)) + if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1) num_sectors = (2ULL << 32) - 2; md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, rdev->sb_page); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 9f7f8bee8442..611b5f797618 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -944,6 +944,44 @@ do_sync_io: pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); } +struct raid1_plug_cb { + struct blk_plug_cb cb; + struct bio_list pending; + int pending_cnt; +}; + +static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) +{ + struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb, + cb); + struct mddev *mddev = plug->cb.data; + struct r1conf *conf = mddev->private; + struct bio *bio; + + if (from_schedule) { + spin_lock_irq(&conf->device_lock); + bio_list_merge(&conf->pending_bio_list, &plug->pending); + conf->pending_count += plug->pending_cnt; + spin_unlock_irq(&conf->device_lock); + md_wakeup_thread(mddev->thread); + kfree(plug); + return; + } + + /* we aren't scheduling, so we can do the write-out directly. */ + bio = bio_list_get(&plug->pending); + bitmap_unplug(mddev->bitmap); + wake_up(&conf->wait_barrier); + + while (bio) { /* submit pending writes */ + struct bio *next = bio->bi_next; + bio->bi_next = NULL; + generic_make_request(bio); + bio = next; + } + kfree(plug); +} + static void make_request(struct mddev *mddev, struct bio * bio) { struct r1conf *conf = mddev->private; @@ -957,6 +995,8 @@ static void make_request(struct mddev *mddev, struct bio * bio) const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); struct md_rdev *blocked_rdev; + struct blk_plug_cb *cb; + struct raid1_plug_cb *plug = NULL; int first_clone; int sectors_handled; int max_sectors; @@ -1259,11 +1299,22 @@ read_again: mbio->bi_private = r1_bio; atomic_inc(&r1_bio->remaining); + + cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug)); + if (cb) + plug = container_of(cb, struct raid1_plug_cb, cb); + else + plug = NULL; spin_lock_irqsave(&conf->device_lock, flags); - bio_list_add(&conf->pending_bio_list, mbio); - conf->pending_count++; + if (plug) { + bio_list_add(&plug->pending, mbio); + plug->pending_cnt++; + } else { + bio_list_add(&conf->pending_bio_list, mbio); + conf->pending_count++; + } spin_unlock_irqrestore(&conf->device_lock, flags); - if (!mddev_check_plugged(mddev)) + if (!plug) md_wakeup_thread(mddev->thread); } /* Mustn't call r1_bio_write_done before this next test, diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index de5ed6fd8806..1c2eb38f3c51 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -659,7 +659,11 @@ static int raid10_mergeable_bvec(struct request_queue *q, max = biovec->bv_len; if (mddev->merge_check_needed) { - struct r10bio r10_bio; + struct { + struct r10bio r10_bio; + struct r10dev devs[conf->copies]; + } on_stack; + struct r10bio *r10_bio = &on_stack.r10_bio; int s; if (conf->reshape_progress != MaxSector) { /* Cannot give any guidance during reshape */ @@ -667,18 +671,18 @@ static int raid10_mergeable_bvec(struct request_queue *q, return biovec->bv_len; return 0; } - r10_bio.sector = sector; - raid10_find_phys(conf, &r10_bio); + r10_bio->sector = sector; + raid10_find_phys(conf, r10_bio); rcu_read_lock(); for (s = 0; s < conf->copies; s++) { - int disk = r10_bio.devs[s].devnum; + int disk = r10_bio->devs[s].devnum; struct md_rdev *rdev = rcu_dereference( conf->mirrors[disk].rdev); if (rdev && !test_bit(Faulty, &rdev->flags)) { struct request_queue *q = bdev_get_queue(rdev->bdev); if (q->merge_bvec_fn) { - bvm->bi_sector = r10_bio.devs[s].addr + bvm->bi_sector = r10_bio->devs[s].addr + rdev->data_offset; bvm->bi_bdev = rdev->bdev; max = min(max, q->merge_bvec_fn( @@ -690,7 +694,7 @@ static int raid10_mergeable_bvec(struct request_queue *q, struct request_queue *q = bdev_get_queue(rdev->bdev); if (q->merge_bvec_fn) { - bvm->bi_sector = r10_bio.devs[s].addr + bvm->bi_sector = r10_bio->devs[s].addr + rdev->data_offset; bvm->bi_bdev = rdev->bdev; max = min(max, q->merge_bvec_fn( @@ -4414,14 +4418,18 @@ static int handle_reshape_read_error(struct mddev *mddev, { /* Use sync reads to get the blocks from somewhere else */ int sectors = r10_bio->sectors; - struct r10bio r10b; struct r10conf *conf = mddev->private; + struct { + struct r10bio r10_bio; + struct r10dev devs[conf->copies]; + } on_stack; + struct r10bio *r10b = &on_stack.r10_bio; int slot = 0; int idx = 0; struct bio_vec *bvec = r10_bio->master_bio->bi_io_vec; - r10b.sector = r10_bio->sector; - __raid10_find_phys(&conf->prev, &r10b); + r10b->sector = r10_bio->sector; + __raid10_find_phys(&conf->prev, r10b); while (sectors) { int s = sectors; @@ -4432,7 +4440,7 @@ static int handle_reshape_read_error(struct mddev *mddev, s = PAGE_SIZE >> 9; while (!success) { - int d = r10b.devs[slot].devnum; + int d = r10b->devs[slot].devnum; struct md_rdev *rdev = conf->mirrors[d].rdev; sector_t addr; if (rdev == NULL || @@ -4440,7 +4448,7 @@ static int handle_reshape_read_error(struct mddev *mddev, !test_bit(In_sync, &rdev->flags)) goto failed; - addr = r10b.devs[slot].addr + idx * PAGE_SIZE; + addr = r10b->devs[slot].addr + idx * PAGE_SIZE; success = sync_page_io(rdev, addr, s << 9, diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 007c2c68dd83..1054cf602345 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -110,7 +110,7 @@ struct r10bio { * We choose the number when they are allocated. * We sometimes need an extra bio to write to the replacement. */ - struct { + struct r10dev { struct bio *bio; union { struct bio *repl_bio; /* used for resync and diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 87a2d0bdedd1..adda94df5eb2 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -484,7 +484,8 @@ get_active_stripe(struct r5conf *conf, sector_t sector, } else { if (atomic_read(&sh->count)) { BUG_ON(!list_empty(&sh->lru) - && !test_bit(STRIPE_EXPANDING, &sh->state)); + && !test_bit(STRIPE_EXPANDING, &sh->state) + && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)); } else { if (!test_bit(STRIPE_HANDLE, &sh->state)) atomic_inc(&conf->active_stripes); @@ -4010,6 +4011,62 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf) return sh; } +struct raid5_plug_cb { + struct blk_plug_cb cb; + struct list_head list; +}; + +static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) +{ + struct raid5_plug_cb *cb = container_of( + blk_cb, struct raid5_plug_cb, cb); + struct stripe_head *sh; + struct mddev *mddev = cb->cb.data; + struct r5conf *conf = mddev->private; + + if (cb->list.next && !list_empty(&cb->list)) { + spin_lock_irq(&conf->device_lock); + while (!list_empty(&cb->list)) { + sh = list_first_entry(&cb->list, struct stripe_head, lru); + list_del_init(&sh->lru); + /* + * avoid race release_stripe_plug() sees + * STRIPE_ON_UNPLUG_LIST clear but the stripe + * is still in our list + */ + smp_mb__before_clear_bit(); + clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); + __release_stripe(conf, sh); + } + spin_unlock_irq(&conf->device_lock); + } + kfree(cb); +} + +static void release_stripe_plug(struct mddev *mddev, + struct stripe_head *sh) +{ + struct blk_plug_cb *blk_cb = blk_check_plugged( + raid5_unplug, mddev, + sizeof(struct raid5_plug_cb)); + struct raid5_plug_cb *cb; + + if (!blk_cb) { + release_stripe(sh); + return; + } + + cb = container_of(blk_cb, struct raid5_plug_cb, cb); + + if (cb->list.next == NULL) + INIT_LIST_HEAD(&cb->list); + + if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)) + list_add_tail(&sh->lru, &cb->list); + else + release_stripe(sh); +} + static void make_request(struct mddev *mddev, struct bio * bi) { struct r5conf *conf = mddev->private; @@ -4138,8 +4195,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) if ((bi->bi_rw & REQ_NOIDLE) && !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) atomic_inc(&conf->preread_active_stripes); - mddev_check_plugged(mddev); - release_stripe(sh); + release_stripe_plug(mddev, sh); } else { /* cannot get stripe for read-ahead, just give-up */ clear_bit(BIO_UPTODATE, &bi->bi_flags); @@ -4537,6 +4593,30 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) return handled; } +#define MAX_STRIPE_BATCH 8 +static int handle_active_stripes(struct r5conf *conf) +{ + struct stripe_head *batch[MAX_STRIPE_BATCH], *sh; + int i, batch_size = 0; + + while (batch_size < MAX_STRIPE_BATCH && + (sh = __get_priority_stripe(conf)) != NULL) + batch[batch_size++] = sh; + + if (batch_size == 0) + return batch_size; + spin_unlock_irq(&conf->device_lock); + + for (i = 0; i < batch_size; i++) + handle_stripe(batch[i]); + + cond_resched(); + + spin_lock_irq(&conf->device_lock); + for (i = 0; i < batch_size; i++) + __release_stripe(conf, batch[i]); + return batch_size; +} /* * This is our raid5 kernel thread. @@ -4547,7 +4627,6 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) */ static void raid5d(struct mddev *mddev) { - struct stripe_head *sh; struct r5conf *conf = mddev->private; int handled; struct blk_plug plug; @@ -4561,6 +4640,7 @@ static void raid5d(struct mddev *mddev) spin_lock_irq(&conf->device_lock); while (1) { struct bio *bio; + int batch_size; if ( !list_empty(&conf->bitmap_list)) { @@ -4584,21 +4664,16 @@ static void raid5d(struct mddev *mddev) handled++; } - sh = __get_priority_stripe(conf); - - if (!sh) + batch_size = handle_active_stripes(conf); + if (!batch_size) break; - spin_unlock_irq(&conf->device_lock); - - handled++; - handle_stripe(sh); - release_stripe(sh); - cond_resched(); + handled += batch_size; - if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) + if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) { + spin_unlock_irq(&conf->device_lock); md_check_recovery(mddev); - - spin_lock_irq(&conf->device_lock); + spin_lock_irq(&conf->device_lock); + } } pr_debug("%d stripes handled\n", handled); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 61dbb615c30b..a9fc24901eda 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -321,6 +321,7 @@ enum { STRIPE_BIOFILL_RUN, STRIPE_COMPUTE_RUN, STRIPE_OPS_REQ_PENDING, + STRIPE_ON_UNPLUG_LIST, }; /* |