summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/raid5.c119
-rw-r--r--drivers/md/raid5.h1
2 files changed, 97 insertions, 23 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index e5795d39d418..14878a9ae09d 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -503,8 +503,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
for (i = disks; i--; ) {
int rw;
- struct bio *bi;
- struct md_rdev *rdev;
+ struct bio *bi, *rbi;
+ struct md_rdev *rdev, *rrdev = NULL;
if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
rw = WRITE_FUA;
@@ -516,27 +516,36 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
continue;
bi = &sh->dev[i].req;
+ rbi = &sh->dev[i].rreq; /* For writing to replacement */
bi->bi_rw = rw;
- if (rw & WRITE)
+ rbi->bi_rw = rw;
+ if (rw & WRITE) {
bi->bi_end_io = raid5_end_write_request;
- else
+ rbi->bi_end_io = raid5_end_write_request;
+ } else
bi->bi_end_io = raid5_end_read_request;
rcu_read_lock();
- if (rw == READ &&
- test_bit(R5_ReadRepl, &sh->dev[i].flags))
+ rdev = rcu_dereference(conf->disks[i].rdev);
+ if (rw & WRITE)
+ rrdev = rcu_dereference(conf->disks[i].replacement);
+ else if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
rdev = rcu_dereference(conf->disks[i].replacement);
- else
- rdev = rcu_dereference(conf->disks[i].rdev);
+
if (rdev && test_bit(Faulty, &rdev->flags))
rdev = NULL;
if (rdev)
atomic_inc(&rdev->nr_pending);
+ if (rrdev && test_bit(Faulty, &rrdev->flags))
+ rrdev = NULL;
+ if (rrdev)
+ atomic_inc(&rrdev->nr_pending);
rcu_read_unlock();
/* We have already checked bad blocks for reads. Now
- * need to check for writes.
+ * need to check for writes. We never accept write errors
+ * on the replacement, so we don't to check rrdev.
*/
while ((rw & WRITE) && rdev &&
test_bit(WriteErrorSeen, &rdev->flags)) {
@@ -583,8 +592,32 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
bi->bi_io_vec[0].bv_offset = 0;
bi->bi_size = STRIPE_SIZE;
bi->bi_next = NULL;
+ if (rrdev)
+ set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
generic_make_request(bi);
- } else {
+ }
+ if (rrdev) {
+ if (s->syncing || s->expanding || s->expanded)
+ md_sync_acct(rrdev->bdev, STRIPE_SECTORS);
+
+ set_bit(STRIPE_IO_STARTED, &sh->state);
+
+ rbi->bi_bdev = rrdev->bdev;
+ pr_debug("%s: for %llu schedule op %ld on "
+ "replacement disc %d\n",
+ __func__, (unsigned long long)sh->sector,
+ rbi->bi_rw, i);
+ atomic_inc(&sh->count);
+ rbi->bi_sector = sh->sector + rrdev->data_offset;
+ rbi->bi_flags = 1 << BIO_UPTODATE;
+ rbi->bi_idx = 0;
+ rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
+ rbi->bi_io_vec[0].bv_offset = 0;
+ rbi->bi_size = STRIPE_SIZE;
+ rbi->bi_next = NULL;
+ generic_make_request(rbi);
+ }
+ if (!rdev && !rrdev) {
if (rw & WRITE)
set_bit(STRIPE_DEGRADED, &sh->state);
pr_debug("skip op %ld on disc %d for sector %llu\n",
@@ -1695,14 +1728,23 @@ static void raid5_end_write_request(struct bio *bi, int error)
struct stripe_head *sh = bi->bi_private;
struct r5conf *conf = sh->raid_conf;
int disks = sh->disks, i;
+ struct md_rdev *uninitialized_var(rdev);
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
sector_t first_bad;
int bad_sectors;
+ int replacement = 0;
- for (i=0 ; i<disks; i++)
- if (bi == &sh->dev[i].req)
+ for (i = 0 ; i < disks; i++) {
+ if (bi == &sh->dev[i].req) {
+ rdev = conf->disks[i].rdev;
break;
-
+ }
+ if (bi == &sh->dev[i].rreq) {
+ rdev = conf->disks[i].replacement;
+ replacement = 1;
+ break;
+ }
+ }
pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n",
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
uptodate);
@@ -1711,21 +1753,30 @@ static void raid5_end_write_request(struct bio *bi, int error)
return;
}
- if (!uptodate) {
- set_bit(WriteErrorSeen, &conf->disks[i].rdev->flags);
- set_bit(R5_WriteError, &sh->dev[i].flags);
- } else if (is_badblock(conf->disks[i].rdev, sh->sector, STRIPE_SECTORS,
- &first_bad, &bad_sectors))
- set_bit(R5_MadeGood, &sh->dev[i].flags);
+ if (replacement) {
+ if (!uptodate)
+ md_error(conf->mddev, rdev);
+ else if (is_badblock(rdev, sh->sector,
+ STRIPE_SECTORS,
+ &first_bad, &bad_sectors))
+ set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
+ } else {
+ if (!uptodate) {
+ set_bit(WriteErrorSeen, &rdev->flags);
+ set_bit(R5_WriteError, &sh->dev[i].flags);
+ } else if (is_badblock(rdev, sh->sector,
+ STRIPE_SECTORS,
+ &first_bad, &bad_sectors))
+ set_bit(R5_MadeGood, &sh->dev[i].flags);
+ }
+ rdev_dec_pending(rdev, conf->mddev);
- rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
-
- clear_bit(R5_LOCKED, &sh->dev[i].flags);
+ if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
+ clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
}
-
static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
static void raid5_build_block(struct stripe_head *sh, int i, int previous)
@@ -1739,6 +1790,13 @@ static void raid5_build_block(struct stripe_head *sh, int i, int previous)
dev->req.bi_private = sh;
dev->vec.bv_page = dev->page;
+ bio_init(&dev->rreq);
+ dev->rreq.bi_io_vec = &dev->rvec;
+ dev->rreq.bi_vcnt++;
+ dev->rreq.bi_max_vecs++;
+ dev->rreq.bi_private = sh;
+ dev->rvec.bv_page = dev->page;
+
dev->flags = 0;
dev->sector = compute_blocknr(sh, i, previous);
}
@@ -3132,6 +3190,15 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
} else
clear_bit(R5_MadeGood, &dev->flags);
}
+ if (test_bit(R5_MadeGoodRepl, &dev->flags)) {
+ struct md_rdev *rdev2 = rcu_dereference(
+ conf->disks[i].replacement);
+ if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
+ s->handle_bad_blocks = 1;
+ atomic_inc(&rdev2->nr_pending);
+ } else
+ clear_bit(R5_MadeGoodRepl, &dev->flags);
+ }
if (!test_bit(R5_Insync, &dev->flags)) {
/* The ReadError flag will just be confusing now */
clear_bit(R5_ReadError, &dev->flags);
@@ -3404,6 +3471,12 @@ finish:
STRIPE_SECTORS);
rdev_dec_pending(rdev, conf->mddev);
}
+ if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
+ rdev = conf->disks[i].replacement;
+ rdev_clear_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS);
+ rdev_dec_pending(rdev, conf->mddev);
+ }
}
if (s.ops_request)
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 4cfd8016010e..f6faaa16a565 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -259,6 +259,7 @@ struct stripe_head_state {
enum r5dev_flags {
R5_UPTODATE, /* page contains current data */
R5_LOCKED, /* IO has been submitted on "req" */
+ R5_DOUBLE_LOCKED,/* Cannot clear R5_LOCKED until 2 writes complete */
R5_OVERWRITE, /* towrite covers whole page */
/* and some that are internal to handle_stripe */
R5_Insync, /* rdev && rdev->in_sync at start */
OpenPOWER on IntegriCloud