diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-log-userspace-transfer.c | 2 | ||||
-rw-r--r-- | drivers/md/linear.c | 1 | ||||
-rw-r--r-- | drivers/md/md.c | 80 | ||||
-rw-r--r-- | drivers/md/md.h | 4 | ||||
-rw-r--r-- | drivers/md/multipath.c | 1 | ||||
-rw-r--r-- | drivers/md/raid0.c | 42 | ||||
-rw-r--r-- | drivers/md/raid1.c | 6 | ||||
-rw-r--r-- | drivers/md/raid10.c | 13 | ||||
-rw-r--r-- | drivers/md/raid5.c | 61 |
9 files changed, 146 insertions, 64 deletions
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c index 049eaf12aaab..1f23e048f077 100644 --- a/drivers/md/dm-log-userspace-transfer.c +++ b/drivers/md/dm-log-userspace-transfer.c @@ -134,7 +134,7 @@ static void cn_ulog_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) { struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1); - if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) + if (!cap_raised(current_cap(), CAP_SYS_ADMIN)) return; spin_lock(&receiving_list_lock); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 8a2f767f26d8..0ed7f6bc2a7f 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -216,7 +216,6 @@ static int linear_run (mddev_t *mddev) if (md_check_no_bitmap(mddev)) return -EINVAL; - mddev->queue->queue_lock = &mddev->queue->__queue_lock; conf = linear_conf(mddev, mddev->raid_disks); if (!conf) diff --git a/drivers/md/md.c b/drivers/md/md.c index b76cfc89e1b5..d5ad7723b172 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -287,6 +287,7 @@ static int md_make_request(struct request_queue *q, struct bio *bio) mddev_t *mddev = q->queuedata; int rv; int cpu; + unsigned int sectors; if (mddev == NULL || mddev->pers == NULL || !mddev->ready) { @@ -311,12 +312,16 @@ static int md_make_request(struct request_queue *q, struct bio *bio) atomic_inc(&mddev->active_io); rcu_read_unlock(); + /* + * save the sectors now since our bio can + * go away inside make_request + */ + sectors = bio_sectors(bio); rv = mddev->pers->make_request(mddev, bio); cpu = part_stat_lock(); part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); - part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], - bio_sectors(bio)); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors); part_stat_unlock(); if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) @@ -548,6 +553,9 @@ static mddev_t * mddev_find(dev_t unit) { mddev_t *mddev, *new = NULL; + if (unit && MAJOR(unit) != MD_MAJOR) + unit &= ~((1<<MdpMinorShift)-1); + retry: spin_lock(&all_mddevs_lock); @@ -1947,8 +1955,6 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared) __bdevname(dev, b)); return PTR_ERR(bdev); } - if (!shared) - set_bit(AllReserved, &rdev->flags); rdev->bdev = bdev; return err; } @@ -2465,6 +2471,9 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) if (rdev->raid_disk != -1) return -EBUSY; + if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery)) + return -EBUSY; + if (rdev->mddev->pers->hot_add_disk == NULL) return -EINVAL; @@ -2610,12 +2619,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) mddev_lock(mddev); list_for_each_entry(rdev2, &mddev->disks, same_set) - if (test_bit(AllReserved, &rdev2->flags) || - (rdev->bdev == rdev2->bdev && - rdev != rdev2 && - overlaps(rdev->data_offset, rdev->sectors, - rdev2->data_offset, - rdev2->sectors))) { + if (rdev->bdev == rdev2->bdev && + rdev != rdev2 && + overlaps(rdev->data_offset, rdev->sectors, + rdev2->data_offset, + rdev2->sectors)) { overlap = 1; break; } @@ -4133,10 +4141,10 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len) } mddev->array_sectors = sectors; - set_capacity(mddev->gendisk, mddev->array_sectors); - if (mddev->pers) + if (mddev->pers) { + set_capacity(mddev->gendisk, mddev->array_sectors); revalidate_disk(mddev->gendisk); - + } return len; } @@ -4619,6 +4627,7 @@ static int do_md_run(mddev_t *mddev) } set_capacity(mddev->gendisk, mddev->array_sectors); revalidate_disk(mddev->gendisk); + mddev->changed = 1; kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); out: return err; @@ -4707,6 +4716,7 @@ static void md_clean(mddev_t *mddev) mddev->sync_speed_min = mddev->sync_speed_max = 0; mddev->recovery = 0; mddev->in_sync = 0; + mddev->changed = 0; mddev->degraded = 0; mddev->safemode = 0; mddev->bitmap_info.offset = 0; @@ -4822,6 +4832,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) set_capacity(disk, 0); mutex_unlock(&mddev->open_mutex); + mddev->changed = 1; revalidate_disk(disk); if (mddev->ro) @@ -5578,6 +5589,8 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks) mddev->delta_disks = raid_disks - mddev->raid_disks; rv = mddev->pers->check_reshape(mddev); + if (rv < 0) + mddev->delta_disks = 0; return rv; } @@ -6004,7 +6017,7 @@ static int md_open(struct block_device *bdev, fmode_t mode) atomic_inc(&mddev->openers); mutex_unlock(&mddev->open_mutex); - check_disk_size_change(mddev->gendisk, bdev); + check_disk_change(bdev); out: return err; } @@ -6019,6 +6032,21 @@ static int md_release(struct gendisk *disk, fmode_t mode) return 0; } + +static int md_media_changed(struct gendisk *disk) +{ + mddev_t *mddev = disk->private_data; + + return mddev->changed; +} + +static int md_revalidate(struct gendisk *disk) +{ + mddev_t *mddev = disk->private_data; + + mddev->changed = 0; + return 0; +} static const struct block_device_operations md_fops = { .owner = THIS_MODULE, @@ -6029,6 +6057,8 @@ static const struct block_device_operations md_fops = .compat_ioctl = md_compat_ioctl, #endif .getgeo = md_getgeo, + .media_changed = md_media_changed, + .revalidate_disk= md_revalidate, }; static int md_thread(void * arg) @@ -6985,9 +7015,6 @@ void md_do_sync(mddev_t *mddev) } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) mddev->resync_min = mddev->curr_resync_completed; mddev->curr_resync = 0; - if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) - mddev->curr_resync_completed = 0; - sysfs_notify(&mddev->kobj, NULL, "sync_completed"); wake_up(&resync_wait); set_bit(MD_RECOVERY_DONE, &mddev->recovery); md_wakeup_thread(mddev->thread); @@ -7028,7 +7055,7 @@ static int remove_and_add_spares(mddev_t *mddev) } } - if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) { + if (mddev->degraded && !mddev->recovery_disabled) { list_for_each_entry(rdev, &mddev->disks, same_set) { if (rdev->raid_disk >= 0 && !test_bit(In_sync, &rdev->flags) && @@ -7151,7 +7178,20 @@ void md_check_recovery(mddev_t *mddev) /* Only thing we do on a ro array is remove * failed devices. */ - remove_and_add_spares(mddev); + mdk_rdev_t *rdev; + list_for_each_entry(rdev, &mddev->disks, same_set) + if (rdev->raid_disk >= 0 && + !test_bit(Blocked, &rdev->flags) && + test_bit(Faulty, &rdev->flags) && + atomic_read(&rdev->nr_pending)==0) { + if (mddev->pers->hot_remove_disk( + mddev, rdev->raid_disk)==0) { + char nm[20]; + sprintf(nm,"rd%d", rdev->raid_disk); + sysfs_remove_link(&mddev->kobj, nm); + rdev->raid_disk = -1; + } + } clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); goto unlock; } @@ -7321,7 +7361,7 @@ static int __init md_init(void) { int ret = -ENOMEM; - md_wq = alloc_workqueue("md", WQ_RESCUER, 0); + md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0); if (!md_wq) goto err_wq; diff --git a/drivers/md/md.h b/drivers/md/md.h index eec517ced31a..12215d437fcc 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -93,8 +93,6 @@ struct mdk_rdev_s #define Faulty 1 /* device is known to have a fault */ #define In_sync 2 /* device is in_sync with rest of array */ #define WriteMostly 4 /* Avoid reading if at all possible */ -#define AllReserved 6 /* If whole device is reserved for - * one array */ #define AutoDetected 7 /* added by auto-detect */ #define Blocked 8 /* An error occured on an externally * managed array, don't allow writes @@ -276,6 +274,8 @@ struct mddev_s atomic_t active; /* general refcount */ atomic_t openers; /* number of active opens */ + int changed; /* True if we might need to + * reread partition info */ int degraded; /* whether md should consider * adding a spare */ diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 6d7ddf32ef2e..3a62d440e27b 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -435,7 +435,6 @@ static int multipath_run (mddev_t *mddev) * bookkeeping area. [whatever we allocate in multipath_run(), * should be freed in multipath_stop()] */ - mddev->queue->queue_lock = &mddev->queue->__queue_lock; conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL); mddev->private = conf; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index a39f4c355e55..c0ac457f1218 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -179,6 +179,14 @@ static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf) rdev1->new_raid_disk = j; } + if (mddev->level == 1) { + /* taiking over a raid1 array- + * we have only one active disk + */ + j = 0; + rdev1->new_raid_disk = j; + } + if (j < 0 || j >= mddev->raid_disks) { printk(KERN_ERR "md/raid0:%s: bad disk number %d - " "aborting!\n", mdname(mddev), j); @@ -353,7 +361,6 @@ static int raid0_run(mddev_t *mddev) if (md_check_no_bitmap(mddev)) return -EINVAL; blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); - mddev->queue->queue_lock = &mddev->queue->__queue_lock; /* if private is not null, we are here after takeover */ if (mddev->private == NULL) { @@ -644,12 +651,39 @@ static void *raid0_takeover_raid10(mddev_t *mddev) return priv_conf; } +static void *raid0_takeover_raid1(mddev_t *mddev) +{ + raid0_conf_t *priv_conf; + + /* Check layout: + * - (N - 1) mirror drives must be already faulty + */ + if ((mddev->raid_disks - 1) != mddev->degraded) { + printk(KERN_ERR "md/raid0:%s: (N - 1) mirrors drives must be already faulty!\n", + mdname(mddev)); + return ERR_PTR(-EINVAL); + } + + /* Set new parameters */ + mddev->new_level = 0; + mddev->new_layout = 0; + mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */ + mddev->delta_disks = 1 - mddev->raid_disks; + mddev->raid_disks = 1; + /* make sure it will be not marked as dirty */ + mddev->recovery_cp = MaxSector; + + create_strip_zones(mddev, &priv_conf); + return priv_conf; +} + static void *raid0_takeover(mddev_t *mddev) { /* raid0 can take over: * raid4 - if all data disks are active. * raid5 - providing it is Raid4 layout and one disk is faulty * raid10 - assuming we have all necessary active disks + * raid1 - with (N -1) mirror drives faulty */ if (mddev->level == 4) return raid0_takeover_raid45(mddev); @@ -665,6 +699,12 @@ static void *raid0_takeover(mddev_t *mddev) if (mddev->level == 10) return raid0_takeover_raid10(mddev); + if (mddev->level == 1) + return raid0_takeover_raid1(mddev); + + printk(KERN_ERR "Takeover from raid%i to raid0 not supported\n", + mddev->level); + return ERR_PTR(-EINVAL); } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a23ffa397ba9..06cd712807d0 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -593,7 +593,10 @@ static int flush_pending_writes(conf_t *conf) if (conf->pending_bio_list.head) { struct bio *bio; bio = bio_list_get(&conf->pending_bio_list); + /* Only take the spinlock to quiet a warning */ + spin_lock(conf->mddev->queue->queue_lock); blk_remove_plug(conf->mddev->queue); + spin_unlock(conf->mddev->queue->queue_lock); spin_unlock_irq(&conf->device_lock); /* flush any pending bitmap writes to * disk before proceeding w/ I/O */ @@ -959,7 +962,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) atomic_inc(&r1_bio->remaining); spin_lock_irqsave(&conf->device_lock, flags); bio_list_add(&conf->pending_bio_list, mbio); - blk_plug_device(mddev->queue); + blk_plug_device_unlocked(mddev->queue); spin_unlock_irqrestore(&conf->device_lock, flags); } r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL); @@ -2021,7 +2024,6 @@ static int run(mddev_t *mddev) if (IS_ERR(conf)) return PTR_ERR(conf); - mddev->queue->queue_lock = &conf->device_lock; list_for_each_entry(rdev, &mddev->disks, same_set) { disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 69b659544390..747d061d8e05 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -662,7 +662,10 @@ static int flush_pending_writes(conf_t *conf) if (conf->pending_bio_list.head) { struct bio *bio; bio = bio_list_get(&conf->pending_bio_list); + /* Spinlock only taken to quiet a warning */ + spin_lock(conf->mddev->queue->queue_lock); blk_remove_plug(conf->mddev->queue); + spin_unlock(conf->mddev->queue->queue_lock); spin_unlock_irq(&conf->device_lock); /* flush any pending bitmap writes to disk * before proceeding w/ I/O */ @@ -971,7 +974,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) atomic_inc(&r10_bio->remaining); spin_lock_irqsave(&conf->device_lock, flags); bio_list_add(&conf->pending_bio_list, mbio); - blk_plug_device(mddev->queue); + blk_plug_device_unlocked(mddev->queue); spin_unlock_irqrestore(&conf->device_lock, flags); } @@ -2304,8 +2307,6 @@ static int run(mddev_t *mddev) if (!conf) goto out; - mddev->queue->queue_lock = &conf->device_lock; - mddev->thread = conf->thread; conf->thread = NULL; @@ -2463,11 +2464,13 @@ static void *raid10_takeover_raid0(mddev_t *mddev) mddev->recovery_cp = MaxSector; conf = setup_conf(mddev); - if (!IS_ERR(conf)) + if (!IS_ERR(conf)) { list_for_each_entry(rdev, &mddev->disks, same_set) if (rdev->raid_disk >= 0) rdev->new_raid_disk = rdev->raid_disk * 2; - + conf->barrier = 1; + } + return conf; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5044babfcda0..78536fdbd87f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5204,7 +5204,6 @@ static int run(mddev_t *mddev) mddev->queue->backing_dev_info.congested_data = mddev; mddev->queue->backing_dev_info.congested_fn = raid5_congested; - mddev->queue->queue_lock = &conf->device_lock; mddev->queue->unplug_fn = raid5_unplug_queue; chunk_size = mddev->chunk_sectors << 9; @@ -5517,7 +5516,6 @@ static int raid5_start_reshape(mddev_t *mddev) raid5_conf_t *conf = mddev->private; mdk_rdev_t *rdev; int spares = 0; - int added_devices = 0; unsigned long flags; if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) @@ -5527,8 +5525,8 @@ static int raid5_start_reshape(mddev_t *mddev) return -ENOSPC; list_for_each_entry(rdev, &mddev->disks, same_set) - if ((rdev->raid_disk < 0 || rdev->raid_disk >= conf->raid_disks) - && !test_bit(Faulty, &rdev->flags)) + if (!test_bit(In_sync, &rdev->flags) + && !test_bit(Faulty, &rdev->flags)) spares++; if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded) @@ -5571,34 +5569,35 @@ static int raid5_start_reshape(mddev_t *mddev) * to correctly record the "partially reconstructed" state of * such devices during the reshape and confusion could result. */ - if (mddev->delta_disks >= 0) - list_for_each_entry(rdev, &mddev->disks, same_set) - if (rdev->raid_disk < 0 && - !test_bit(Faulty, &rdev->flags)) { - if (raid5_add_disk(mddev, rdev) == 0) { - char nm[20]; - if (rdev->raid_disk >= conf->previous_raid_disks) { - set_bit(In_sync, &rdev->flags); - added_devices++; - } else - rdev->recovery_offset = 0; - sprintf(nm, "rd%d", rdev->raid_disk); - if (sysfs_create_link(&mddev->kobj, - &rdev->kobj, nm)) - /* Failure here is OK */; - } else - break; - } else if (rdev->raid_disk >= conf->previous_raid_disks - && !test_bit(Faulty, &rdev->flags)) { - /* This is a spare that was manually added */ - set_bit(In_sync, &rdev->flags); - added_devices++; - } + if (mddev->delta_disks >= 0) { + int added_devices = 0; + list_for_each_entry(rdev, &mddev->disks, same_set) + if (rdev->raid_disk < 0 && + !test_bit(Faulty, &rdev->flags)) { + if (raid5_add_disk(mddev, rdev) == 0) { + char nm[20]; + if (rdev->raid_disk + >= conf->previous_raid_disks) { + set_bit(In_sync, &rdev->flags); + added_devices++; + } else + rdev->recovery_offset = 0; + sprintf(nm, "rd%d", rdev->raid_disk); + if (sysfs_create_link(&mddev->kobj, + &rdev->kobj, nm)) + /* Failure here is OK */; + } + } else if (rdev->raid_disk >= conf->previous_raid_disks + && !test_bit(Faulty, &rdev->flags)) { + /* This is a spare that was manually added */ + set_bit(In_sync, &rdev->flags); + added_devices++; + } - /* When a reshape changes the number of devices, ->degraded - * is measured against the larger of the pre and post number of - * devices.*/ - if (mddev->delta_disks > 0) { + /* When a reshape changes the number of devices, + * ->degraded is measured against the larger of the + * pre and post number of devices. + */ spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded += (conf->raid_disks - conf->previous_raid_disks) - added_devices; |