summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-28 11:39:25 +1000
committerNeilBrown <neilb@suse.de>2011-07-28 11:39:25 +1000
commit1a0b7cd82657a590f163b090bd9123a3a6b9aae4 (patch)
tree0dc3ccf6ad6ad88283e6d7924c38524865866cba
parentf84ee364dd15af11cada1e673f94128f62db189e (diff)
downloadtalos-obmc-linux-1a0b7cd82657a590f163b090bd9123a3a6b9aae4.tar.gz
talos-obmc-linux-1a0b7cd82657a590f163b090bd9123a3a6b9aae4.zip
md/raid10: record bad blocks due to write errors during resync/recovery.
If we get a write error during resync/recovery don't fail the device but instead record a bad block. If that fails we can then fail the device. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid10.c33
1 files changed, 23 insertions, 10 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 10415ddfcb42..e54ff3274eda 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1452,9 +1452,10 @@ static void end_sync_write(struct bio *bio, int error)
d = find_bio_disk(conf, r10_bio, bio, &slot);
- if (!uptodate)
- md_error(mddev, conf->mirrors[d].rdev);
- else if (is_badblock(conf->mirrors[d].rdev,
+ if (!uptodate) {
+ set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags);
+ set_bit(R10BIO_WriteError, &r10_bio->state);
+ } else if (is_badblock(conf->mirrors[d].rdev,
r10_bio->devs[slot].addr,
r10_bio->sectors,
&first_bad, &bad_sectors))
@@ -1465,7 +1466,8 @@ static void end_sync_write(struct bio *bio, int error)
if (r10_bio->master_bio == NULL) {
/* the primary of several recovery bios */
sector_t s = r10_bio->sectors;
- if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+ if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+ test_bit(R10BIO_WriteError, &r10_bio->state))
reschedule_retry(r10_bio);
else
put_buf(r10_bio);
@@ -1473,7 +1475,8 @@ static void end_sync_write(struct bio *bio, int error)
break;
} else {
r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio;
- if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+ if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+ test_bit(R10BIO_WriteError, &r10_bio->state))
reschedule_retry(r10_bio);
else
put_buf(r10_bio);
@@ -2029,23 +2032,33 @@ static void handle_write_completed(conf_t *conf, r10bio_t *r10_bio)
/* Some sort of write request has finished and it
* succeeded in writing where we thought there was a
* bad block. So forget the bad block.
+ * Or possibly if failed and we need to record
+ * a bad block.
*/
int m;
mdk_rdev_t *rdev;
if (test_bit(R10BIO_IsSync, &r10_bio->state) ||
test_bit(R10BIO_IsRecover, &r10_bio->state)) {
- for (m = 0; m < conf->copies; m++)
- if (r10_bio->devs[m].bio &&
- test_bit(BIO_UPTODATE,
+ for (m = 0; m < conf->copies; m++) {
+ int dev = r10_bio->devs[m].devnum;
+ rdev = conf->mirrors[dev].rdev;
+ if (r10_bio->devs[m].bio == NULL)
+ continue;
+ if (test_bit(BIO_UPTODATE,
&r10_bio->devs[m].bio->bi_flags)) {
- int dev = r10_bio->devs[m].devnum;
- rdev = conf->mirrors[dev].rdev;
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
r10_bio->sectors);
+ } else {
+ if (!rdev_set_badblocks(
+ rdev,
+ r10_bio->devs[m].addr,
+ r10_bio->sectors, 0))
+ md_error(conf->mddev, rdev);
}
+ }
put_buf(r10_bio);
} else {
for (m = 0; m < conf->copies; m++) {
OpenPOWER on IntegriCloud