summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig5
-rw-r--r--drivers/md/bitmap.c2
-rw-r--r--drivers/md/md.c9
-rw-r--r--drivers/md/raid1.c57
-rw-r--r--drivers/md/raid10.c30
-rw-r--r--drivers/md/raid10.h2
-rw-r--r--drivers/md/raid5.c107
-rw-r--r--drivers/md/raid5.h1
8 files changed, 176 insertions, 37 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 1eee45b69b71..d949b781f6f8 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -268,13 +268,14 @@ config DM_MIRROR
needed for live data migration tools such as 'pvmove'.
config DM_RAID
- tristate "RAID 1/4/5/6 target"
+ tristate "RAID 1/4/5/6/10 target"
depends on BLK_DEV_DM
select MD_RAID1
+ select MD_RAID10
select MD_RAID456
select BLK_DEV_MD
---help---
- A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings
+ A dm target that supports RAID1, RAID10, RAID4, RAID5 and RAID6 mappings
A RAID-5 set of N drives with a capacity of C MB per drive provides
the capacity of C * (N - 1) MB, and protects against a failure
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 15dbe03117e4..94e7f6ba2e11 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1305,7 +1305,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
prepare_to_wait(&bitmap->overflow_wait, &__wait,
TASK_UNINTERRUPTIBLE);
spin_unlock_irq(&bitmap->counts.lock);
- io_schedule();
+ schedule();
finish_wait(&bitmap->overflow_wait, &__wait);
continue;
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fcd098794d37..3f6203a4c7ea 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1108,8 +1108,11 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
ret = 0;
}
rdev->sectors = rdev->sb_start;
- /* Limit to 4TB as metadata cannot record more than that */
- if (rdev->sectors >= (2ULL << 32))
+ /* Limit to 4TB as metadata cannot record more than that.
+ * (not needed for Linear and RAID0 as metadata doesn't
+ * record this size)
+ */
+ if (rdev->sectors >= (2ULL << 32) && sb->level >= 1)
rdev->sectors = (2ULL << 32) - 2;
if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
@@ -1400,7 +1403,7 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
/* Limit to 4TB as metadata cannot record more than that.
* 4TB == 2^32 KB, or 2*2^32 sectors.
*/
- if (num_sectors >= (2ULL << 32))
+ if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
num_sectors = (2ULL << 32) - 2;
md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
rdev->sb_page);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 9f7f8bee8442..611b5f797618 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -944,6 +944,44 @@ do_sync_io:
pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
}
+struct raid1_plug_cb {
+ struct blk_plug_cb cb;
+ struct bio_list pending;
+ int pending_cnt;
+};
+
+static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
+{
+ struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb,
+ cb);
+ struct mddev *mddev = plug->cb.data;
+ struct r1conf *conf = mddev->private;
+ struct bio *bio;
+
+ if (from_schedule) {
+ spin_lock_irq(&conf->device_lock);
+ bio_list_merge(&conf->pending_bio_list, &plug->pending);
+ conf->pending_count += plug->pending_cnt;
+ spin_unlock_irq(&conf->device_lock);
+ md_wakeup_thread(mddev->thread);
+ kfree(plug);
+ return;
+ }
+
+ /* we aren't scheduling, so we can do the write-out directly. */
+ bio = bio_list_get(&plug->pending);
+ bitmap_unplug(mddev->bitmap);
+ wake_up(&conf->wait_barrier);
+
+ while (bio) { /* submit pending writes */
+ struct bio *next = bio->bi_next;
+ bio->bi_next = NULL;
+ generic_make_request(bio);
+ bio = next;
+ }
+ kfree(plug);
+}
+
static void make_request(struct mddev *mddev, struct bio * bio)
{
struct r1conf *conf = mddev->private;
@@ -957,6 +995,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
struct md_rdev *blocked_rdev;
+ struct blk_plug_cb *cb;
+ struct raid1_plug_cb *plug = NULL;
int first_clone;
int sectors_handled;
int max_sectors;
@@ -1259,11 +1299,22 @@ read_again:
mbio->bi_private = r1_bio;
atomic_inc(&r1_bio->remaining);
+
+ cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug));
+ if (cb)
+ plug = container_of(cb, struct raid1_plug_cb, cb);
+ else
+ plug = NULL;
spin_lock_irqsave(&conf->device_lock, flags);
- bio_list_add(&conf->pending_bio_list, mbio);
- conf->pending_count++;
+ if (plug) {
+ bio_list_add(&plug->pending, mbio);
+ plug->pending_cnt++;
+ } else {
+ bio_list_add(&conf->pending_bio_list, mbio);
+ conf->pending_count++;
+ }
spin_unlock_irqrestore(&conf->device_lock, flags);
- if (!mddev_check_plugged(mddev))
+ if (!plug)
md_wakeup_thread(mddev->thread);
}
/* Mustn't call r1_bio_write_done before this next test,
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index de5ed6fd8806..1c2eb38f3c51 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -659,7 +659,11 @@ static int raid10_mergeable_bvec(struct request_queue *q,
max = biovec->bv_len;
if (mddev->merge_check_needed) {
- struct r10bio r10_bio;
+ struct {
+ struct r10bio r10_bio;
+ struct r10dev devs[conf->copies];
+ } on_stack;
+ struct r10bio *r10_bio = &on_stack.r10_bio;
int s;
if (conf->reshape_progress != MaxSector) {
/* Cannot give any guidance during reshape */
@@ -667,18 +671,18 @@ static int raid10_mergeable_bvec(struct request_queue *q,
return biovec->bv_len;
return 0;
}
- r10_bio.sector = sector;
- raid10_find_phys(conf, &r10_bio);
+ r10_bio->sector = sector;
+ raid10_find_phys(conf, r10_bio);
rcu_read_lock();
for (s = 0; s < conf->copies; s++) {
- int disk = r10_bio.devs[s].devnum;
+ int disk = r10_bio->devs[s].devnum;
struct md_rdev *rdev = rcu_dereference(
conf->mirrors[disk].rdev);
if (rdev && !test_bit(Faulty, &rdev->flags)) {
struct request_queue *q =
bdev_get_queue(rdev->bdev);
if (q->merge_bvec_fn) {
- bvm->bi_sector = r10_bio.devs[s].addr
+ bvm->bi_sector = r10_bio->devs[s].addr
+ rdev->data_offset;
bvm->bi_bdev = rdev->bdev;
max = min(max, q->merge_bvec_fn(
@@ -690,7 +694,7 @@ static int raid10_mergeable_bvec(struct request_queue *q,
struct request_queue *q =
bdev_get_queue(rdev->bdev);
if (q->merge_bvec_fn) {
- bvm->bi_sector = r10_bio.devs[s].addr
+ bvm->bi_sector = r10_bio->devs[s].addr
+ rdev->data_offset;
bvm->bi_bdev = rdev->bdev;
max = min(max, q->merge_bvec_fn(
@@ -4414,14 +4418,18 @@ static int handle_reshape_read_error(struct mddev *mddev,
{
/* Use sync reads to get the blocks from somewhere else */
int sectors = r10_bio->sectors;
- struct r10bio r10b;
struct r10conf *conf = mddev->private;
+ struct {
+ struct r10bio r10_bio;
+ struct r10dev devs[conf->copies];
+ } on_stack;
+ struct r10bio *r10b = &on_stack.r10_bio;
int slot = 0;
int idx = 0;
struct bio_vec *bvec = r10_bio->master_bio->bi_io_vec;
- r10b.sector = r10_bio->sector;
- __raid10_find_phys(&conf->prev, &r10b);
+ r10b->sector = r10_bio->sector;
+ __raid10_find_phys(&conf->prev, r10b);
while (sectors) {
int s = sectors;
@@ -4432,7 +4440,7 @@ static int handle_reshape_read_error(struct mddev *mddev,
s = PAGE_SIZE >> 9;
while (!success) {
- int d = r10b.devs[slot].devnum;
+ int d = r10b->devs[slot].devnum;
struct md_rdev *rdev = conf->mirrors[d].rdev;
sector_t addr;
if (rdev == NULL ||
@@ -4440,7 +4448,7 @@ static int handle_reshape_read_error(struct mddev *mddev,
!test_bit(In_sync, &rdev->flags))
goto failed;
- addr = r10b.devs[slot].addr + idx * PAGE_SIZE;
+ addr = r10b->devs[slot].addr + idx * PAGE_SIZE;
success = sync_page_io(rdev,
addr,
s << 9,
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 007c2c68dd83..1054cf602345 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -110,7 +110,7 @@ struct r10bio {
* We choose the number when they are allocated.
* We sometimes need an extra bio to write to the replacement.
*/
- struct {
+ struct r10dev {
struct bio *bio;
union {
struct bio *repl_bio; /* used for resync and
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 87a2d0bdedd1..adda94df5eb2 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -484,7 +484,8 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
} else {
if (atomic_read(&sh->count)) {
BUG_ON(!list_empty(&sh->lru)
- && !test_bit(STRIPE_EXPANDING, &sh->state));
+ && !test_bit(STRIPE_EXPANDING, &sh->state)
+ && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state));
} else {
if (!test_bit(STRIPE_HANDLE, &sh->state))
atomic_inc(&conf->active_stripes);
@@ -4010,6 +4011,62 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf)
return sh;
}
+struct raid5_plug_cb {
+ struct blk_plug_cb cb;
+ struct list_head list;
+};
+
+static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
+{
+ struct raid5_plug_cb *cb = container_of(
+ blk_cb, struct raid5_plug_cb, cb);
+ struct stripe_head *sh;
+ struct mddev *mddev = cb->cb.data;
+ struct r5conf *conf = mddev->private;
+
+ if (cb->list.next && !list_empty(&cb->list)) {
+ spin_lock_irq(&conf->device_lock);
+ while (!list_empty(&cb->list)) {
+ sh = list_first_entry(&cb->list, struct stripe_head, lru);
+ list_del_init(&sh->lru);
+ /*
+ * avoid race release_stripe_plug() sees
+ * STRIPE_ON_UNPLUG_LIST clear but the stripe
+ * is still in our list
+ */
+ smp_mb__before_clear_bit();
+ clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
+ __release_stripe(conf, sh);
+ }
+ spin_unlock_irq(&conf->device_lock);
+ }
+ kfree(cb);
+}
+
+static void release_stripe_plug(struct mddev *mddev,
+ struct stripe_head *sh)
+{
+ struct blk_plug_cb *blk_cb = blk_check_plugged(
+ raid5_unplug, mddev,
+ sizeof(struct raid5_plug_cb));
+ struct raid5_plug_cb *cb;
+
+ if (!blk_cb) {
+ release_stripe(sh);
+ return;
+ }
+
+ cb = container_of(blk_cb, struct raid5_plug_cb, cb);
+
+ if (cb->list.next == NULL)
+ INIT_LIST_HEAD(&cb->list);
+
+ if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state))
+ list_add_tail(&sh->lru, &cb->list);
+ else
+ release_stripe(sh);
+}
+
static void make_request(struct mddev *mddev, struct bio * bi)
{
struct r5conf *conf = mddev->private;
@@ -4138,8 +4195,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
if ((bi->bi_rw & REQ_NOIDLE) &&
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
atomic_inc(&conf->preread_active_stripes);
- mddev_check_plugged(mddev);
- release_stripe(sh);
+ release_stripe_plug(mddev, sh);
} else {
/* cannot get stripe for read-ahead, just give-up */
clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -4537,6 +4593,30 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
return handled;
}
+#define MAX_STRIPE_BATCH 8
+static int handle_active_stripes(struct r5conf *conf)
+{
+ struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
+ int i, batch_size = 0;
+
+ while (batch_size < MAX_STRIPE_BATCH &&
+ (sh = __get_priority_stripe(conf)) != NULL)
+ batch[batch_size++] = sh;
+
+ if (batch_size == 0)
+ return batch_size;
+ spin_unlock_irq(&conf->device_lock);
+
+ for (i = 0; i < batch_size; i++)
+ handle_stripe(batch[i]);
+
+ cond_resched();
+
+ spin_lock_irq(&conf->device_lock);
+ for (i = 0; i < batch_size; i++)
+ __release_stripe(conf, batch[i]);
+ return batch_size;
+}
/*
* This is our raid5 kernel thread.
@@ -4547,7 +4627,6 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
*/
static void raid5d(struct mddev *mddev)
{
- struct stripe_head *sh;
struct r5conf *conf = mddev->private;
int handled;
struct blk_plug plug;
@@ -4561,6 +4640,7 @@ static void raid5d(struct mddev *mddev)
spin_lock_irq(&conf->device_lock);
while (1) {
struct bio *bio;
+ int batch_size;
if (
!list_empty(&conf->bitmap_list)) {
@@ -4584,21 +4664,16 @@ static void raid5d(struct mddev *mddev)
handled++;
}
- sh = __get_priority_stripe(conf);
-
- if (!sh)
+ batch_size = handle_active_stripes(conf);
+ if (!batch_size)
break;
- spin_unlock_irq(&conf->device_lock);
-
- handled++;
- handle_stripe(sh);
- release_stripe(sh);
- cond_resched();
+ handled += batch_size;
- if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
+ if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) {
+ spin_unlock_irq(&conf->device_lock);
md_check_recovery(mddev);
-
- spin_lock_irq(&conf->device_lock);
+ spin_lock_irq(&conf->device_lock);
+ }
}
pr_debug("%d stripes handled\n", handled);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 61dbb615c30b..a9fc24901eda 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -321,6 +321,7 @@ enum {
STRIPE_BIOFILL_RUN,
STRIPE_COMPUTE_RUN,
STRIPE_OPS_REQ_PENDING,
+ STRIPE_ON_UNPLUG_LIST,
};
/*
OpenPOWER on IntegriCloud