diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 140 |
1 files changed, 120 insertions, 20 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b29135acb1d9..20ae32d67e21 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -22,6 +22,7 @@ #include <linux/raid/raid5.h> #include <linux/highmem.h> #include <linux/bitops.h> +#include <linux/kthread.h> #include <asm/atomic.h> #include <linux/raid/bitmap.h> @@ -1504,6 +1505,7 @@ static void handle_stripe(struct stripe_head *sh) clear_bit(STRIPE_EXPANDING, &sh->state); } else if (expanded) { clear_bit(STRIPE_EXPAND_READY, &sh->state); + atomic_dec(&conf->reshape_stripes); wake_up(&conf->wait_for_overlap); md_done_sync(conf->mddev, STRIPE_SECTORS, 1); } @@ -1875,6 +1877,26 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i */ int i; int dd_idx; + + if (sector_nr == 0 && + conf->expand_progress != 0) { + /* restarting in the middle, skip the initial sectors */ + sector_nr = conf->expand_progress; + sector_div(sector_nr, conf->raid_disks-1); + *skipped = 1; + return sector_nr; + } + + /* Cannot proceed until we've updated the superblock... */ + wait_event(conf->wait_for_overlap, + atomic_read(&conf->reshape_stripes)==0); + mddev->reshape_position = conf->expand_progress; + + mddev->sb_dirty = 1; + md_wakeup_thread(mddev->thread); + wait_event(mddev->sb_wait, mddev->sb_dirty == 0 || + kthread_should_stop()); + for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) { int j; int skipped = 0; @@ -1882,6 +1904,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i sh = get_active_stripe(conf, sector_nr+i, conf->raid_disks, pd_idx, 0); set_bit(STRIPE_EXPANDING, &sh->state); + atomic_inc(&conf->reshape_stripes); /* If any of this stripe is beyond the end of the old * array, then we need to zero those blocks */ @@ -2121,10 +2144,61 @@ static int run(mddev_t *mddev) return -EIO; } + if (mddev->reshape_position != MaxSector) { + /* Check that we can continue the reshape. + * Currently only disks can change, it must + * increase, and we must be past the point where + * a stripe over-writes itself + */ + sector_t here_new, here_old; + int old_disks; + + if (mddev->new_level != mddev->level || + mddev->new_layout != mddev->layout || + mddev->new_chunk != mddev->chunk_size) { + printk(KERN_ERR "raid5: %s: unsupported reshape required - aborting.\n", + mdname(mddev)); + return -EINVAL; + } + if (mddev->delta_disks <= 0) { + printk(KERN_ERR "raid5: %s: unsupported reshape (reduce disks) required - aborting.\n", + mdname(mddev)); + return -EINVAL; + } + old_disks = mddev->raid_disks - mddev->delta_disks; + /* reshape_position must be on a new-stripe boundary, and one + * further up in new geometry must map after here in old geometry. + */ + here_new = mddev->reshape_position; + if (sector_div(here_new, (mddev->chunk_size>>9)*(mddev->raid_disks-1))) { + printk(KERN_ERR "raid5: reshape_position not on a stripe boundary\n"); + return -EINVAL; + } + /* here_new is the stripe we will write to */ + here_old = mddev->reshape_position; + sector_div(here_old, (mddev->chunk_size>>9)*(old_disks-1)); + /* here_old is the first stripe that we might need to read from */ + if (here_new >= here_old) { + /* Reading from the same stripe as writing to - bad */ + printk(KERN_ERR "raid5: reshape_position too early for auto-recovery - aborting.\n"); + return -EINVAL; + } + printk(KERN_INFO "raid5: reshape will continue\n"); + /* OK, we should be able to continue; */ + } + + mddev->private = kzalloc(sizeof (raid5_conf_t), GFP_KERNEL); if ((conf = mddev->private) == NULL) goto abort; - conf->disks = kzalloc(mddev->raid_disks * sizeof(struct disk_info), + if (mddev->reshape_position == MaxSector) { + conf->previous_raid_disks = conf->raid_disks = mddev->raid_disks; + } else { + conf->raid_disks = mddev->raid_disks; + conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; + } + + conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info), GFP_KERNEL); if (!conf->disks) goto abort; @@ -2148,7 +2222,7 @@ static int run(mddev_t *mddev) ITERATE_RDEV(mddev,rdev,tmp) { raid_disk = rdev->raid_disk; - if (raid_disk >= mddev->raid_disks + if (raid_disk >= conf->raid_disks || raid_disk < 0) continue; disk = conf->disks + raid_disk; @@ -2164,7 +2238,6 @@ static int run(mddev_t *mddev) } } - conf->raid_disks = mddev->raid_disks; /* * 0 for a fully functional array, 1 for a degraded array. */ @@ -2174,7 +2247,7 @@ static int run(mddev_t *mddev) conf->level = mddev->level; conf->algorithm = mddev->layout; conf->max_nr_stripes = NR_STRIPES; - conf->expand_progress = MaxSector; + conf->expand_progress = mddev->reshape_position; /* device size must be a multiple of chunk size */ mddev->size &= ~(mddev->chunk_size/1024 -1); @@ -2247,6 +2320,20 @@ static int run(mddev_t *mddev) print_raid5_conf(conf); + if (conf->expand_progress != MaxSector) { + printk("...ok start reshape thread\n"); + atomic_set(&conf->reshape_stripes, 0); + clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); + clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); + set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); + mddev->sync_thread = md_register_thread(md_do_sync, mddev, + "%s_reshape"); + /* FIXME if md_register_thread fails?? */ + md_wakeup_thread(mddev->sync_thread); + + } + /* read-ahead size must cover two whole stripes, which is * 2 * (n-1) * chunksize where 'n' is the number of raid devices */ @@ -2262,8 +2349,8 @@ static int run(mddev_t *mddev) mddev->queue->unplug_fn = raid5_unplug_device; mddev->queue->issue_flush_fn = raid5_issue_flush; + mddev->array_size = mddev->size * (conf->previous_raid_disks - 1); - mddev->array_size = mddev->size * (mddev->raid_disks - 1); return 0; abort: if (conf) { @@ -2436,7 +2523,7 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) /* * find the disk ... */ - for (disk=0; disk < mddev->raid_disks; disk++) + for (disk=0; disk < conf->raid_disks; disk++) if ((p=conf->disks + disk)->rdev == NULL) { clear_bit(In_sync, &rdev->flags); rdev->raid_disk = disk; @@ -2518,9 +2605,10 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks) if (err) return err; + atomic_set(&conf->reshape_stripes, 0); spin_lock_irq(&conf->device_lock); conf->previous_raid_disks = conf->raid_disks; - mddev->raid_disks = conf->raid_disks = raid_disks; + conf->raid_disks = raid_disks; conf->expand_progress = 0; spin_unlock_irq(&conf->device_lock); @@ -2542,6 +2630,14 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks) } mddev->degraded = (raid_disks - conf->previous_raid_disks) - added_devices; + mddev->new_chunk = mddev->chunk_size; + mddev->new_layout = mddev->layout; + mddev->new_level = mddev->level; + mddev->raid_disks = raid_disks; + mddev->delta_disks = raid_disks - conf->previous_raid_disks; + mddev->reshape_position = 0; + mddev->sb_dirty = 1; + clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); @@ -2552,6 +2648,7 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks) mddev->recovery = 0; spin_lock_irq(&conf->device_lock); mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; + mddev->delta_disks = 0; conf->expand_progress = MaxSector; spin_unlock_irq(&conf->device_lock); return -EAGAIN; @@ -2566,20 +2663,23 @@ static void end_reshape(raid5_conf_t *conf) { struct block_device *bdev; - conf->mddev->array_size = conf->mddev->size * (conf->mddev->raid_disks-1); - set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1); - conf->mddev->changed = 1; - - bdev = bdget_disk(conf->mddev->gendisk, 0); - if (bdev) { - mutex_lock(&bdev->bd_inode->i_mutex); - i_size_write(bdev->bd_inode, conf->mddev->array_size << 10); - mutex_unlock(&bdev->bd_inode->i_mutex); - bdput(bdev); + if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { + conf->mddev->array_size = conf->mddev->size * (conf->raid_disks-1); + set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1); + conf->mddev->changed = 1; + + bdev = bdget_disk(conf->mddev->gendisk, 0); + if (bdev) { + mutex_lock(&bdev->bd_inode->i_mutex); + i_size_write(bdev->bd_inode, conf->mddev->array_size << 10); + mutex_unlock(&bdev->bd_inode->i_mutex); + bdput(bdev); + } + spin_lock_irq(&conf->device_lock); + conf->expand_progress = MaxSector; + spin_unlock_irq(&conf->device_lock); + conf->mddev->reshape_position = MaxSector; } - spin_lock_irq(&conf->device_lock); - conf->expand_progress = MaxSector; - spin_unlock_irq(&conf->device_lock); } static void raid5_quiesce(mddev_t *mddev, int state) |