summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-log-userspace-transfer.c2
-rw-r--r--drivers/md/linear.c1
-rw-r--r--drivers/md/md.c80
-rw-r--r--drivers/md/md.h4
-rw-r--r--drivers/md/multipath.c1
-rw-r--r--drivers/md/raid0.c42
-rw-r--r--drivers/md/raid1.c6
-rw-r--r--drivers/md/raid10.c13
-rw-r--r--drivers/md/raid5.c61
9 files changed, 146 insertions, 64 deletions
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index 049eaf12aaab..1f23e048f077 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -134,7 +134,7 @@ static void cn_ulog_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
{
struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1);
- if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN))
+ if (!cap_raised(current_cap(), CAP_SYS_ADMIN))
return;
spin_lock(&receiving_list_lock);
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 8a2f767f26d8..0ed7f6bc2a7f 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -216,7 +216,6 @@ static int linear_run (mddev_t *mddev)
if (md_check_no_bitmap(mddev))
return -EINVAL;
- mddev->queue->queue_lock = &mddev->queue->__queue_lock;
conf = linear_conf(mddev, mddev->raid_disks);
if (!conf)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b76cfc89e1b5..d5ad7723b172 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -287,6 +287,7 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
mddev_t *mddev = q->queuedata;
int rv;
int cpu;
+ unsigned int sectors;
if (mddev == NULL || mddev->pers == NULL
|| !mddev->ready) {
@@ -311,12 +312,16 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
atomic_inc(&mddev->active_io);
rcu_read_unlock();
+ /*
+ * save the sectors now since our bio can
+ * go away inside make_request
+ */
+ sectors = bio_sectors(bio);
rv = mddev->pers->make_request(mddev, bio);
cpu = part_stat_lock();
part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
- part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
- bio_sectors(bio));
+ part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
part_stat_unlock();
if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
@@ -548,6 +553,9 @@ static mddev_t * mddev_find(dev_t unit)
{
mddev_t *mddev, *new = NULL;
+ if (unit && MAJOR(unit) != MD_MAJOR)
+ unit &= ~((1<<MdpMinorShift)-1);
+
retry:
spin_lock(&all_mddevs_lock);
@@ -1947,8 +1955,6 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
__bdevname(dev, b));
return PTR_ERR(bdev);
}
- if (!shared)
- set_bit(AllReserved, &rdev->flags);
rdev->bdev = bdev;
return err;
}
@@ -2465,6 +2471,9 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
if (rdev->raid_disk != -1)
return -EBUSY;
+ if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
+ return -EBUSY;
+
if (rdev->mddev->pers->hot_add_disk == NULL)
return -EINVAL;
@@ -2610,12 +2619,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
mddev_lock(mddev);
list_for_each_entry(rdev2, &mddev->disks, same_set)
- if (test_bit(AllReserved, &rdev2->flags) ||
- (rdev->bdev == rdev2->bdev &&
- rdev != rdev2 &&
- overlaps(rdev->data_offset, rdev->sectors,
- rdev2->data_offset,
- rdev2->sectors))) {
+ if (rdev->bdev == rdev2->bdev &&
+ rdev != rdev2 &&
+ overlaps(rdev->data_offset, rdev->sectors,
+ rdev2->data_offset,
+ rdev2->sectors)) {
overlap = 1;
break;
}
@@ -4133,10 +4141,10 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len)
}
mddev->array_sectors = sectors;
- set_capacity(mddev->gendisk, mddev->array_sectors);
- if (mddev->pers)
+ if (mddev->pers) {
+ set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
-
+ }
return len;
}
@@ -4619,6 +4627,7 @@ static int do_md_run(mddev_t *mddev)
}
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
+ mddev->changed = 1;
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
out:
return err;
@@ -4707,6 +4716,7 @@ static void md_clean(mddev_t *mddev)
mddev->sync_speed_min = mddev->sync_speed_max = 0;
mddev->recovery = 0;
mddev->in_sync = 0;
+ mddev->changed = 0;
mddev->degraded = 0;
mddev->safemode = 0;
mddev->bitmap_info.offset = 0;
@@ -4822,6 +4832,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
set_capacity(disk, 0);
mutex_unlock(&mddev->open_mutex);
+ mddev->changed = 1;
revalidate_disk(disk);
if (mddev->ro)
@@ -5578,6 +5589,8 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks)
mddev->delta_disks = raid_disks - mddev->raid_disks;
rv = mddev->pers->check_reshape(mddev);
+ if (rv < 0)
+ mddev->delta_disks = 0;
return rv;
}
@@ -6004,7 +6017,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
atomic_inc(&mddev->openers);
mutex_unlock(&mddev->open_mutex);
- check_disk_size_change(mddev->gendisk, bdev);
+ check_disk_change(bdev);
out:
return err;
}
@@ -6019,6 +6032,21 @@ static int md_release(struct gendisk *disk, fmode_t mode)
return 0;
}
+
+static int md_media_changed(struct gendisk *disk)
+{
+ mddev_t *mddev = disk->private_data;
+
+ return mddev->changed;
+}
+
+static int md_revalidate(struct gendisk *disk)
+{
+ mddev_t *mddev = disk->private_data;
+
+ mddev->changed = 0;
+ return 0;
+}
static const struct block_device_operations md_fops =
{
.owner = THIS_MODULE,
@@ -6029,6 +6057,8 @@ static const struct block_device_operations md_fops =
.compat_ioctl = md_compat_ioctl,
#endif
.getgeo = md_getgeo,
+ .media_changed = md_media_changed,
+ .revalidate_disk= md_revalidate,
};
static int md_thread(void * arg)
@@ -6985,9 +7015,6 @@ void md_do_sync(mddev_t *mddev)
} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
mddev->resync_min = mddev->curr_resync_completed;
mddev->curr_resync = 0;
- if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
- mddev->curr_resync_completed = 0;
- sysfs_notify(&mddev->kobj, NULL, "sync_completed");
wake_up(&resync_wait);
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
md_wakeup_thread(mddev->thread);
@@ -7028,7 +7055,7 @@ static int remove_and_add_spares(mddev_t *mddev)
}
}
- if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) {
+ if (mddev->degraded && !mddev->recovery_disabled) {
list_for_each_entry(rdev, &mddev->disks, same_set) {
if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) &&
@@ -7151,7 +7178,20 @@ void md_check_recovery(mddev_t *mddev)
/* Only thing we do on a ro array is remove
* failed devices.
*/
- remove_and_add_spares(mddev);
+ mdk_rdev_t *rdev;
+ list_for_each_entry(rdev, &mddev->disks, same_set)
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(Blocked, &rdev->flags) &&
+ test_bit(Faulty, &rdev->flags) &&
+ atomic_read(&rdev->nr_pending)==0) {
+ if (mddev->pers->hot_remove_disk(
+ mddev, rdev->raid_disk)==0) {
+ char nm[20];
+ sprintf(nm,"rd%d", rdev->raid_disk);
+ sysfs_remove_link(&mddev->kobj, nm);
+ rdev->raid_disk = -1;
+ }
+ }
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
goto unlock;
}
@@ -7321,7 +7361,7 @@ static int __init md_init(void)
{
int ret = -ENOMEM;
- md_wq = alloc_workqueue("md", WQ_RESCUER, 0);
+ md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
if (!md_wq)
goto err_wq;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index eec517ced31a..12215d437fcc 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -93,8 +93,6 @@ struct mdk_rdev_s
#define Faulty 1 /* device is known to have a fault */
#define In_sync 2 /* device is in_sync with rest of array */
#define WriteMostly 4 /* Avoid reading if at all possible */
-#define AllReserved 6 /* If whole device is reserved for
- * one array */
#define AutoDetected 7 /* added by auto-detect */
#define Blocked 8 /* An error occured on an externally
* managed array, don't allow writes
@@ -276,6 +274,8 @@ struct mddev_s
atomic_t active; /* general refcount */
atomic_t openers; /* number of active opens */
+ int changed; /* True if we might need to
+ * reread partition info */
int degraded; /* whether md should consider
* adding a spare
*/
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 6d7ddf32ef2e..3a62d440e27b 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -435,7 +435,6 @@ static int multipath_run (mddev_t *mddev)
* bookkeeping area. [whatever we allocate in multipath_run(),
* should be freed in multipath_stop()]
*/
- mddev->queue->queue_lock = &mddev->queue->__queue_lock;
conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL);
mddev->private = conf;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index a39f4c355e55..c0ac457f1218 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -179,6 +179,14 @@ static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf)
rdev1->new_raid_disk = j;
}
+ if (mddev->level == 1) {
+ /* taiking over a raid1 array-
+ * we have only one active disk
+ */
+ j = 0;
+ rdev1->new_raid_disk = j;
+ }
+
if (j < 0 || j >= mddev->raid_disks) {
printk(KERN_ERR "md/raid0:%s: bad disk number %d - "
"aborting!\n", mdname(mddev), j);
@@ -353,7 +361,6 @@ static int raid0_run(mddev_t *mddev)
if (md_check_no_bitmap(mddev))
return -EINVAL;
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
- mddev->queue->queue_lock = &mddev->queue->__queue_lock;
/* if private is not null, we are here after takeover */
if (mddev->private == NULL) {
@@ -644,12 +651,39 @@ static void *raid0_takeover_raid10(mddev_t *mddev)
return priv_conf;
}
+static void *raid0_takeover_raid1(mddev_t *mddev)
+{
+ raid0_conf_t *priv_conf;
+
+ /* Check layout:
+ * - (N - 1) mirror drives must be already faulty
+ */
+ if ((mddev->raid_disks - 1) != mddev->degraded) {
+ printk(KERN_ERR "md/raid0:%s: (N - 1) mirrors drives must be already faulty!\n",
+ mdname(mddev));
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* Set new parameters */
+ mddev->new_level = 0;
+ mddev->new_layout = 0;
+ mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */
+ mddev->delta_disks = 1 - mddev->raid_disks;
+ mddev->raid_disks = 1;
+ /* make sure it will be not marked as dirty */
+ mddev->recovery_cp = MaxSector;
+
+ create_strip_zones(mddev, &priv_conf);
+ return priv_conf;
+}
+
static void *raid0_takeover(mddev_t *mddev)
{
/* raid0 can take over:
* raid4 - if all data disks are active.
* raid5 - providing it is Raid4 layout and one disk is faulty
* raid10 - assuming we have all necessary active disks
+ * raid1 - with (N -1) mirror drives faulty
*/
if (mddev->level == 4)
return raid0_takeover_raid45(mddev);
@@ -665,6 +699,12 @@ static void *raid0_takeover(mddev_t *mddev)
if (mddev->level == 10)
return raid0_takeover_raid10(mddev);
+ if (mddev->level == 1)
+ return raid0_takeover_raid1(mddev);
+
+ printk(KERN_ERR "Takeover from raid%i to raid0 not supported\n",
+ mddev->level);
+
return ERR_PTR(-EINVAL);
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a23ffa397ba9..06cd712807d0 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -593,7 +593,10 @@ static int flush_pending_writes(conf_t *conf)
if (conf->pending_bio_list.head) {
struct bio *bio;
bio = bio_list_get(&conf->pending_bio_list);
+ /* Only take the spinlock to quiet a warning */
+ spin_lock(conf->mddev->queue->queue_lock);
blk_remove_plug(conf->mddev->queue);
+ spin_unlock(conf->mddev->queue->queue_lock);
spin_unlock_irq(&conf->device_lock);
/* flush any pending bitmap writes to
* disk before proceeding w/ I/O */
@@ -959,7 +962,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
atomic_inc(&r1_bio->remaining);
spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
- blk_plug_device(mddev->queue);
+ blk_plug_device_unlocked(mddev->queue);
spin_unlock_irqrestore(&conf->device_lock, flags);
}
r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL);
@@ -2021,7 +2024,6 @@ static int run(mddev_t *mddev)
if (IS_ERR(conf))
return PTR_ERR(conf);
- mddev->queue->queue_lock = &conf->device_lock;
list_for_each_entry(rdev, &mddev->disks, same_set) {
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 69b659544390..747d061d8e05 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -662,7 +662,10 @@ static int flush_pending_writes(conf_t *conf)
if (conf->pending_bio_list.head) {
struct bio *bio;
bio = bio_list_get(&conf->pending_bio_list);
+ /* Spinlock only taken to quiet a warning */
+ spin_lock(conf->mddev->queue->queue_lock);
blk_remove_plug(conf->mddev->queue);
+ spin_unlock(conf->mddev->queue->queue_lock);
spin_unlock_irq(&conf->device_lock);
/* flush any pending bitmap writes to disk
* before proceeding w/ I/O */
@@ -971,7 +974,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
atomic_inc(&r10_bio->remaining);
spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
- blk_plug_device(mddev->queue);
+ blk_plug_device_unlocked(mddev->queue);
spin_unlock_irqrestore(&conf->device_lock, flags);
}
@@ -2304,8 +2307,6 @@ static int run(mddev_t *mddev)
if (!conf)
goto out;
- mddev->queue->queue_lock = &conf->device_lock;
-
mddev->thread = conf->thread;
conf->thread = NULL;
@@ -2463,11 +2464,13 @@ static void *raid10_takeover_raid0(mddev_t *mddev)
mddev->recovery_cp = MaxSector;
conf = setup_conf(mddev);
- if (!IS_ERR(conf))
+ if (!IS_ERR(conf)) {
list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0)
rdev->new_raid_disk = rdev->raid_disk * 2;
-
+ conf->barrier = 1;
+ }
+
return conf;
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 5044babfcda0..78536fdbd87f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5204,7 +5204,6 @@ static int run(mddev_t *mddev)
mddev->queue->backing_dev_info.congested_data = mddev;
mddev->queue->backing_dev_info.congested_fn = raid5_congested;
- mddev->queue->queue_lock = &conf->device_lock;
mddev->queue->unplug_fn = raid5_unplug_queue;
chunk_size = mddev->chunk_sectors << 9;
@@ -5517,7 +5516,6 @@ static int raid5_start_reshape(mddev_t *mddev)
raid5_conf_t *conf = mddev->private;
mdk_rdev_t *rdev;
int spares = 0;
- int added_devices = 0;
unsigned long flags;
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
@@ -5527,8 +5525,8 @@ static int raid5_start_reshape(mddev_t *mddev)
return -ENOSPC;
list_for_each_entry(rdev, &mddev->disks, same_set)
- if ((rdev->raid_disk < 0 || rdev->raid_disk >= conf->raid_disks)
- && !test_bit(Faulty, &rdev->flags))
+ if (!test_bit(In_sync, &rdev->flags)
+ && !test_bit(Faulty, &rdev->flags))
spares++;
if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded)
@@ -5571,34 +5569,35 @@ static int raid5_start_reshape(mddev_t *mddev)
* to correctly record the "partially reconstructed" state of
* such devices during the reshape and confusion could result.
*/
- if (mddev->delta_disks >= 0)
- list_for_each_entry(rdev, &mddev->disks, same_set)
- if (rdev->raid_disk < 0 &&
- !test_bit(Faulty, &rdev->flags)) {
- if (raid5_add_disk(mddev, rdev) == 0) {
- char nm[20];
- if (rdev->raid_disk >= conf->previous_raid_disks) {
- set_bit(In_sync, &rdev->flags);
- added_devices++;
- } else
- rdev->recovery_offset = 0;
- sprintf(nm, "rd%d", rdev->raid_disk);
- if (sysfs_create_link(&mddev->kobj,
- &rdev->kobj, nm))
- /* Failure here is OK */;
- } else
- break;
- } else if (rdev->raid_disk >= conf->previous_raid_disks
- && !test_bit(Faulty, &rdev->flags)) {
- /* This is a spare that was manually added */
- set_bit(In_sync, &rdev->flags);
- added_devices++;
- }
+ if (mddev->delta_disks >= 0) {
+ int added_devices = 0;
+ list_for_each_entry(rdev, &mddev->disks, same_set)
+ if (rdev->raid_disk < 0 &&
+ !test_bit(Faulty, &rdev->flags)) {
+ if (raid5_add_disk(mddev, rdev) == 0) {
+ char nm[20];
+ if (rdev->raid_disk
+ >= conf->previous_raid_disks) {
+ set_bit(In_sync, &rdev->flags);
+ added_devices++;
+ } else
+ rdev->recovery_offset = 0;
+ sprintf(nm, "rd%d", rdev->raid_disk);
+ if (sysfs_create_link(&mddev->kobj,
+ &rdev->kobj, nm))
+ /* Failure here is OK */;
+ }
+ } else if (rdev->raid_disk >= conf->previous_raid_disks
+ && !test_bit(Faulty, &rdev->flags)) {
+ /* This is a spare that was manually added */
+ set_bit(In_sync, &rdev->flags);
+ added_devices++;
+ }
- /* When a reshape changes the number of devices, ->degraded
- * is measured against the larger of the pre and post number of
- * devices.*/
- if (mddev->delta_disks > 0) {
+ /* When a reshape changes the number of devices,
+ * ->degraded is measured against the larger of the
+ * pre and post number of devices.
+ */
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded += (conf->raid_disks - conf->previous_raid_disks)
- added_devices;
OpenPOWER on IntegriCloud