diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 94 |
1 files changed, 61 insertions, 33 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 087eee0cb809..3b27df52456b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -94,6 +94,8 @@ #define __inline__ #endif +#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args))) + #if !RAID6_USE_EMPTY_ZERO_PAGE /* In .bss so it's zeroed */ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); @@ -1143,10 +1145,12 @@ static void raid5_end_read_request(struct bio * bi, int error) set_bit(R5_UPTODATE, &sh->dev[i].flags); if (test_bit(R5_ReadError, &sh->dev[i].flags)) { rdev = conf->disks[i].rdev; - printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n", - mdname(conf->mddev), STRIPE_SECTORS, - (unsigned long long)(sh->sector + rdev->data_offset), - bdevname(rdev->bdev, b)); + printk_rl(KERN_INFO "raid5:%s: read error corrected" + " (%lu sectors at %llu on %s)\n", + mdname(conf->mddev), STRIPE_SECTORS, + (unsigned long long)(sh->sector + + rdev->data_offset), + bdevname(rdev->bdev, b)); clear_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReWrite, &sh->dev[i].flags); } @@ -1160,16 +1164,22 @@ static void raid5_end_read_request(struct bio * bi, int error) clear_bit(R5_UPTODATE, &sh->dev[i].flags); atomic_inc(&rdev->read_errors); if (conf->mddev->degraded) - printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n", - mdname(conf->mddev), - (unsigned long long)(sh->sector + rdev->data_offset), - bdn); + printk_rl(KERN_WARNING + "raid5:%s: read error not correctable " + "(sector %llu on %s).\n", + mdname(conf->mddev), + (unsigned long long)(sh->sector + + rdev->data_offset), + bdn); else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) /* Oh, no!!! */ - printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n", - mdname(conf->mddev), - (unsigned long long)(sh->sector + rdev->data_offset), - bdn); + printk_rl(KERN_WARNING + "raid5:%s: read error NOT corrected!! " + "(sector %llu on %s).\n", + mdname(conf->mddev), + (unsigned long long)(sh->sector + + rdev->data_offset), + bdn); else if (atomic_read(&rdev->read_errors) > conf->max_nr_stripes) printk(KERN_WARNING @@ -1258,7 +1268,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) /* * if recovery was running, make sure it aborts. */ - set_bit(MD_RECOVERY_ERR, &mddev->recovery); + set_bit(MD_RECOVERY_INTR, &mddev->recovery); } set_bit(Faulty, &rdev->flags); printk (KERN_ALERT @@ -1992,6 +2002,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh, * have quiesced. */ if ((s->uptodate == disks - 1) && + (s->failed && disk_idx == s->failed_num) && !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); set_bit(R5_Wantcompute, &dev->flags); @@ -2006,12 +2017,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh, */ s->uptodate++; return 0; /* uptodate + compute == disks */ - } else if ((s->uptodate < disks - 1) && - test_bit(R5_Insync, &dev->flags)) { - /* Note: we hold off compute operations while checks are - * in flight, but we still prefer 'compute' over 'read' - * hence we only read if (uptodate < * disks-1) - */ + } else if (test_bit(R5_Insync, &dev->flags)) { set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantread, &dev->flags); if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) @@ -2077,7 +2083,9 @@ static void handle_issuing_new_read_requests6(struct stripe_head *sh, /* we would like to get this block, possibly * by computing it, but we might not be able to */ - if (s->uptodate == disks-1) { + if ((s->uptodate == disks - 1) && + (s->failed && (i == r6s->failed_num[0] || + i == r6s->failed_num[1]))) { pr_debug("Computing stripe %llu block %d\n", (unsigned long long)sh->sector, i); compute_block_1(sh, i, 0); @@ -2369,8 +2377,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, /* complete a check operation */ if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { - clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); - clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); + clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); + clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); if (s->failed == 0) { if (sh->ops.zero_sum_result == 0) /* parity is correct (on disc, @@ -2400,16 +2408,6 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, canceled_check = 1; /* STRIPE_INSYNC is not set */ } - /* check if we can clear a parity disk reconstruct */ - if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && - test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { - - clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); - clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); - clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); - clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); - } - /* start a new check operation if there are no failures, the stripe is * not insync, and a repair is not in flight */ @@ -2424,6 +2422,17 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, } } + /* check if we can clear a parity disk reconstruct */ + if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && + test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { + + clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); + } + + /* Wait for check parity and compute block operations to complete * before write-back. If a failure occurred while the check operation * was in flight we need to cycle this stripe through handle_stripe @@ -2634,6 +2643,7 @@ static void handle_stripe5(struct stripe_head *sh) struct r5dev *dev; unsigned long pending = 0; mdk_rdev_t *blocked_rdev = NULL; + int prexor; memset(&s, 0, sizeof(s)); pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " @@ -2763,9 +2773,11 @@ static void handle_stripe5(struct stripe_head *sh) /* leave prexor set until postxor is done, allows us to distinguish * a rmw from a rcw during biodrain */ + prexor = 0; if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { + prexor = 1; clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); @@ -2799,6 +2811,8 @@ static void handle_stripe5(struct stripe_head *sh) if (!test_and_set_bit( STRIPE_OP_IO, &sh->ops.pending)) sh->ops.count++; + if (prexor) + continue; if (!test_bit(R5_Insync, &dev->flags) || (i == sh->pd_idx && s.failed == 0)) set_bit(STRIPE_INSYNC, &sh->state); @@ -2879,6 +2893,8 @@ static void handle_stripe5(struct stripe_head *sh) for (i = conf->raid_disks; i--; ) { set_bit(R5_Wantwrite, &sh->dev[i].flags); + set_bit(R5_LOCKED, &dev->flags); + s.locked++; if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) sh->ops.count++; } @@ -2892,6 +2908,7 @@ static void handle_stripe5(struct stripe_head *sh) conf->raid_disks); s.locked += handle_write_operations5(sh, 1, 1); } else if (s.expanded && + s.locked == 0 && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { clear_bit(STRIPE_EXPAND_READY, &sh->state); atomic_dec(&conf->reshape_stripes); @@ -4256,6 +4273,7 @@ static int run(mddev_t *mddev) goto abort; } spin_lock_init(&conf->device_lock); + mddev->queue->queue_lock = &conf->device_lock; init_waitqueue_head(&conf->wait_for_stripe); init_waitqueue_head(&conf->wait_for_overlap); INIT_LIST_HEAD(&conf->handle_list); @@ -4285,7 +4303,9 @@ static int run(mddev_t *mddev) " disk %d\n", bdevname(rdev->bdev,b), raid_disk); working_disks++; - } + } else + /* Cannot rely on bitmap to complete recovery */ + conf->fullsync = 1; } /* @@ -4562,6 +4582,14 @@ static int raid5_remove_disk(mddev_t *mddev, int number) err = -EBUSY; goto abort; } + /* Only remove non-faulty devices if recovery + * isn't possible. + */ + if (!test_bit(Faulty, &rdev->flags) && + mddev->degraded <= conf->max_degraded) { + err = -EBUSY; + goto abort; + } p->rdev = NULL; synchronize_rcu(); if (atomic_read(&rdev->nr_pending)) { |