From 00bcb4ac7ee7e557a491b614219142cea0ef16f4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 1 Jun 2010 19:37:23 +1000 Subject: md: reduce dependence on sysfs. We will want md devices to live as dm targets where sysfs is not visible. So allow md to not connect to sysfs. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 96c690279fc6..6a7a30113161 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5158,7 +5158,8 @@ static int run(mddev_t *mddev) /* Ok, everything is just fine now */ if (mddev->to_remove == &raid5_attrs_group) mddev->to_remove = NULL; - else if (sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) + else if (mddev->kobj.sd && + sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) printk(KERN_WARNING "md/raid:%s: failed to create sysfs attributes.\n", mdname(mddev)); @@ -5545,10 +5546,7 @@ static int raid5_start_reshape(mddev_t *mddev) sprintf(nm, "rd%d", rdev->raid_disk); if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) - printk(KERN_WARNING - "md/raid:%s: failed to create " - " link %s\n", - mdname(mddev), nm); + /* Failure here is OK */; } else break; } -- cgit v1.2.1 From c41d4ac40df0d01bf9c383ff28f194d1df2d4fd9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 1 Jun 2010 19:37:24 +1000 Subject: md/raid5: factor out code for changing size of stripe cache. Separate the actual 'change' code from the sysfs interface so that it can eventually be called internally. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 6a7a30113161..bd4067a70834 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4566,23 +4566,15 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page) return 0; } -static ssize_t -raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) +int +raid5_set_cache_size(mddev_t *mddev, int size) { raid5_conf_t *conf = mddev->private; - unsigned long new; int err; - if (len >= PAGE_SIZE) + if (size <= 16 || size > 32768) return -EINVAL; - if (!conf) - return -ENODEV; - - if (strict_strtoul(page, 10, &new)) - return -EINVAL; - if (new <= 16 || new > 32768) - return -EINVAL; - while (new < conf->max_nr_stripes) { + while (size < conf->max_nr_stripes) { if (drop_one_stripe(conf)) conf->max_nr_stripes--; else @@ -4591,11 +4583,32 @@ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) err = md_allow_write(mddev); if (err) return err; - while (new > conf->max_nr_stripes) { + while (size > conf->max_nr_stripes) { if (grow_one_stripe(conf)) conf->max_nr_stripes++; else break; } + return 0; +} +EXPORT_SYMBOL(raid5_set_cache_size); + +static ssize_t +raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) +{ + raid5_conf_t *conf = mddev->private; + unsigned long new; + int err; + + if (len >= PAGE_SIZE) + return -EINVAL; + if (!conf) + return -ENODEV; + + if (strict_strtoul(page, 10, &new)) + return -EINVAL; + err = raid5_set_cache_size(mddev, new); + if (err) + return err; return len; } -- cgit v1.2.1 From f4be6b43f1ac60dff00ef0923ee43b0e08872947 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 1 Jun 2010 19:37:25 +1000 Subject: md/raid5: ensure we create a unique name for kmem_cache when mddev has no gendisk We will shortly allow md devices with no gendisk (they are attached to a dm-target instead). That will cause mdname() to return 'mdX'. There is one place where mdname really needs to be unique: when creating the name for a slab cache. So in that case, if there is no gendisk, you the address of the mddev formatted in HEX to provide a unique name. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index bd4067a70834..6fa60e416a09 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1337,10 +1337,14 @@ static int grow_stripes(raid5_conf_t *conf, int num) struct kmem_cache *sc; int devs = max(conf->raid_disks, conf->previous_raid_disks); - sprintf(conf->cache_name[0], - "raid%d-%s", conf->level, mdname(conf->mddev)); - sprintf(conf->cache_name[1], - "raid%d-%s-alt", conf->level, mdname(conf->mddev)); + if (conf->mddev->gendisk) + sprintf(conf->cache_name[0], + "raid%d-%s", conf->level, mdname(conf->mddev)); + else + sprintf(conf->cache_name[0], + "raid%d-%p", conf->level, conf->mddev); + sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]); + conf->active_name = 0; sc = kmem_cache_create(conf->cache_name[conf->active_name], sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), -- cgit v1.2.1 From 4a5add49951e698073011855d1a8a7306bc9308d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 1 Jun 2010 19:37:28 +1000 Subject: raid5: Don't set read-ahead when there is no queue dm-raid456 does not provide a 'queue' for raid5 to use, so we must make raid5 stop depending on the queue. First: read_ahead dm handles read-ahead adjustment fully in userspace, so simply don't do any readahead adjustments if there is no queue. Also re-arrange code slightly so all the accesses to ->queue are together. Finally, move the blk_queue_merge_bvec function into the 'if' as the ->split_io setting in dm-raid456 has the same effect. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 6fa60e416a09..9c462f6659c3 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5161,16 +5161,6 @@ static int run(mddev_t *mddev) "reshape"); } - /* read-ahead size must cover two whole stripes, which is - * 2 * (datadisks) * chunksize where 'n' is the number of raid devices - */ - { - int data_disks = conf->previous_raid_disks - conf->max_degraded; - int stripe = data_disks * - ((mddev->chunk_sectors << 9) / PAGE_SIZE); - if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) - mddev->queue->backing_dev_info.ra_pages = 2 * stripe; - } /* Ok, everything is just fine now */ if (mddev->to_remove == &raid5_attrs_group) @@ -5178,8 +5168,23 @@ static int run(mddev_t *mddev) else if (mddev->kobj.sd && sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) printk(KERN_WARNING - "md/raid:%s: failed to create sysfs attributes.\n", + "raid5: failed to create sysfs attributes for %s\n", mdname(mddev)); + md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); + + if (mddev->queue) { + /* read-ahead size must cover two whole stripes, which + * is 2 * (datadisks) * chunksize where 'n' is the + * number of raid devices + */ + int data_disks = conf->previous_raid_disks - conf->max_degraded; + int stripe = data_disks * + ((mddev->chunk_sectors << 9) / PAGE_SIZE); + if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) + mddev->queue->backing_dev_info.ra_pages = 2 * stripe; + + blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); + } mddev->queue->queue_lock = &conf->device_lock; @@ -5187,9 +5192,6 @@ static int run(mddev_t *mddev) mddev->queue->backing_dev_info.congested_data = mddev; mddev->queue->backing_dev_info.congested_fn = raid5_congested; - md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); - - blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); chunk_size = mddev->chunk_sectors << 9; blk_queue_io_min(mddev->queue, chunk_size); blk_queue_io_opt(mddev->queue, chunk_size * @@ -5618,7 +5620,7 @@ static void end_reshape(raid5_conf_t *conf) /* read-ahead size must cover two whole stripes, which is * 2 * (datadisks) * chunksize where 'n' is the number of raid devices */ - { + if (conf->mddev->queue) { int data_disks = conf->raid_disks - conf->max_degraded; int stripe = data_disks * ((conf->chunk_sectors << 9) / PAGE_SIZE); -- cgit v1.2.1 From 11d8a6e3719519fbc0e2c9d61b6fa931b84bf813 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 26 Jul 2010 11:57:07 +1000 Subject: md/raid5: export is_congested test the dm module will need this for dm-raid45. Also only access ->queue->backing_dev_info->congested_fn if ->queue actually exists. It won't in a dm target. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 9c462f6659c3..ad6694f8a3a8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3678,17 +3678,14 @@ static void raid5_unplug_device(struct request_queue *q) unplug_slaves(mddev); } -static int raid5_congested(void *data, int bits) +int md_raid5_congested(mddev_t *mddev, int bits) { - mddev_t *mddev = data; raid5_conf_t *conf = mddev->private; /* No difference between reads and writes. Just check * how busy the stripe_cache is */ - if (mddev_congested(mddev, bits)) - return 1; if (conf->inactive_blocked) return 1; if (conf->quiesce) @@ -3698,6 +3695,15 @@ static int raid5_congested(void *data, int bits) return 0; } +EXPORT_SYMBOL_GPL(md_raid5_congested); + +static int raid5_congested(void *data, int bits) +{ + mddev_t *mddev = data; + + return mddev_congested(mddev, bits) || + md_raid5_congested(mddev, bits); +} /* We want read requests to align with chunks where possible, * but write requests don't need to. @@ -5184,13 +5190,14 @@ static int run(mddev_t *mddev) mddev->queue->backing_dev_info.ra_pages = 2 * stripe; blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); + + mddev->queue->backing_dev_info.congested_data = mddev; + mddev->queue->backing_dev_info.congested_fn = raid5_congested; } mddev->queue->queue_lock = &conf->device_lock; mddev->queue->unplug_fn = raid5_unplug_device; - mddev->queue->backing_dev_info.congested_data = mddev; - mddev->queue->backing_dev_info.congested_fn = raid5_congested; chunk_size = mddev->chunk_sectors << 9; blk_queue_io_min(mddev->queue, chunk_size); @@ -5220,7 +5227,8 @@ static int stop(mddev_t *mddev) md_unregister_thread(mddev->thread); mddev->thread = NULL; - mddev->queue->backing_dev_info.congested_fn = NULL; + if (mddev->queue) + mddev->queue->backing_dev_info.congested_fn = NULL; blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ free_conf(conf); mddev->private = NULL; -- cgit v1.2.1 From 2ac8740151b082f045e58010eb92560c3a23a0e9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 1 Jun 2010 19:37:29 +1000 Subject: md/raid5: add simple plugging infrastructure. md/raid5 uses the plugging infrastructure provided by the block layer and 'struct request_queue'. However when we plug raid5 under dm there is no request queue so we cannot use that. So create a similar infrastructure that is much lighter weight and use it for raid5. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ad6694f8a3a8..84bb9aec2211 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -201,11 +201,11 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) if (test_bit(STRIPE_HANDLE, &sh->state)) { if (test_bit(STRIPE_DELAYED, &sh->state)) { list_add_tail(&sh->lru, &conf->delayed_list); - blk_plug_device(conf->mddev->queue); + plugger_set_plug(&conf->plug); } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && sh->bm_seq - conf->seq_write > 0) { list_add_tail(&sh->lru, &conf->bitmap_list); - blk_plug_device(conf->mddev->queue); + plugger_set_plug(&conf->plug); } else { clear_bit(STRIPE_BIT_DELAY, &sh->state); list_add_tail(&sh->lru, &conf->handle_list); @@ -434,7 +434,7 @@ static int has_failed(raid5_conf_t *conf) } static void unplug_slaves(mddev_t *mddev); -static void raid5_unplug_device(struct request_queue *q); +static void raid5_unplug_device(raid5_conf_t *conf); static struct stripe_head * get_active_stripe(raid5_conf_t *conf, sector_t sector, @@ -464,7 +464,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector, < (conf->max_nr_stripes *3/4) || !conf->inactive_blocked), conf->device_lock, - raid5_unplug_device(conf->mddev->queue) + raid5_unplug_device(conf) ); conf->inactive_blocked = 0; } else @@ -3618,7 +3618,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf) list_add_tail(&sh->lru, &conf->hold_list); } } else - blk_plug_device(conf->mddev->queue); + plugger_set_plug(&conf->plug); } static void activate_bit_delay(raid5_conf_t *conf) @@ -3659,23 +3659,33 @@ static void unplug_slaves(mddev_t *mddev) rcu_read_unlock(); } -static void raid5_unplug_device(struct request_queue *q) +static void raid5_unplug_device(raid5_conf_t *conf) { - mddev_t *mddev = q->queuedata; - raid5_conf_t *conf = mddev->private; unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); - if (blk_remove_plug(q)) { + if (plugger_remove_plug(&conf->plug)) { conf->seq_flush++; raid5_activate_delayed(conf); } - md_wakeup_thread(mddev->thread); + md_wakeup_thread(conf->mddev->thread); spin_unlock_irqrestore(&conf->device_lock, flags); - unplug_slaves(mddev); + unplug_slaves(conf->mddev); +} + +static void raid5_unplug(struct plug_handle *plug) +{ + raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug); + raid5_unplug_device(conf); +} + +static void raid5_unplug_queue(struct request_queue *q) +{ + mddev_t *mddev = q->queuedata; + raid5_unplug_device(mddev->private); } int md_raid5_congested(mddev_t *mddev, int bits) @@ -4085,7 +4095,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) * add failed due to overlap. Flush everything * and wait a while */ - raid5_unplug_device(mddev->queue); + raid5_unplug_device(conf); release_stripe(sh); schedule(); goto retry; @@ -5178,6 +5188,7 @@ static int run(mddev_t *mddev) mdname(mddev)); md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); + plugger_init(&conf->plug, raid5_unplug); if (mddev->queue) { /* read-ahead size must cover two whole stripes, which * is 2 * (datadisks) * chunksize where 'n' is the @@ -5197,7 +5208,7 @@ static int run(mddev_t *mddev) mddev->queue->queue_lock = &conf->device_lock; - mddev->queue->unplug_fn = raid5_unplug_device; + mddev->queue->unplug_fn = raid5_unplug_queue; chunk_size = mddev->chunk_sectors << 9; blk_queue_io_min(mddev->queue, chunk_size); @@ -5229,7 +5240,7 @@ static int stop(mddev_t *mddev) mddev->thread = NULL; if (mddev->queue) mddev->queue->backing_dev_info.congested_fn = NULL; - blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ + plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/ free_conf(conf); mddev->private = NULL; mddev->to_remove = &raid5_attrs_group; -- cgit v1.2.1 From 252ac5221a71be72b7e7c7b7482af91e9c962e8c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 1 Jun 2010 19:37:29 +1000 Subject: md/plug: optionally use plugger to unplug an array during resync/recovery. If an array doesn't have a 'queue' then md_do_sync cannot unplug it. In that case it will have a 'plugger', so make that available to the mddev, and use it to unplug the array if needed. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 84bb9aec2211..0a8173e650bb 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5189,6 +5189,7 @@ static int run(mddev_t *mddev) md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); plugger_init(&conf->plug, raid5_unplug); + mddev->plug = &conf->plug; if (mddev->queue) { /* read-ahead size must cover two whole stripes, which * is 2 * (datadisks) * chunksize where 'n' is the -- cgit v1.2.1 From 9f7c2220017771253d7d10b3cc017cb79eeac0fb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 26 Jul 2010 12:04:13 +1000 Subject: md/raid5: export raid5 unplugging interface. Also remove remaining accesses to ->queue and ->gendisk when ->queue is NULL (As it is in a DM target). Signed-off-by: NeilBrown --- drivers/md/raid5.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0a8173e650bb..e30a809cbea0 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -434,7 +434,6 @@ static int has_failed(raid5_conf_t *conf) } static void unplug_slaves(mddev_t *mddev); -static void raid5_unplug_device(raid5_conf_t *conf); static struct stripe_head * get_active_stripe(raid5_conf_t *conf, sector_t sector, @@ -464,7 +463,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector, < (conf->max_nr_stripes *3/4) || !conf->inactive_blocked), conf->device_lock, - raid5_unplug_device(conf) + md_raid5_unplug_device(conf) ); conf->inactive_blocked = 0; } else @@ -3659,7 +3658,7 @@ static void unplug_slaves(mddev_t *mddev) rcu_read_unlock(); } -static void raid5_unplug_device(raid5_conf_t *conf) +void md_raid5_unplug_device(raid5_conf_t *conf) { unsigned long flags; @@ -3675,17 +3674,18 @@ static void raid5_unplug_device(raid5_conf_t *conf) unplug_slaves(conf->mddev); } +EXPORT_SYMBOL_GPL(md_raid5_unplug_device); static void raid5_unplug(struct plug_handle *plug) { raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug); - raid5_unplug_device(conf); + md_raid5_unplug_device(conf); } static void raid5_unplug_queue(struct request_queue *q) { mddev_t *mddev = q->queuedata; - raid5_unplug_device(mddev->private); + md_raid5_unplug_device(mddev->private); } int md_raid5_congested(mddev_t *mddev, int bits) @@ -4095,7 +4095,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) * add failed due to overlap. Flush everything * and wait a while */ - raid5_unplug_device(conf); + md_raid5_unplug_device(conf); release_stripe(sh); schedule(); goto retry; @@ -4991,7 +4991,7 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded static int run(mddev_t *mddev) { raid5_conf_t *conf; - int working_disks = 0, chunk_size; + int working_disks = 0; int dirty_parity_disks = 0; mdk_rdev_t *rdev; sector_t reshape_offset = 0; @@ -5191,6 +5191,7 @@ static int run(mddev_t *mddev) plugger_init(&conf->plug, raid5_unplug); mddev->plug = &conf->plug; if (mddev->queue) { + int chunk_size; /* read-ahead size must cover two whole stripes, which * is 2 * (datadisks) * chunksize where 'n' is the * number of raid devices @@ -5205,20 +5206,18 @@ static int run(mddev_t *mddev) mddev->queue->backing_dev_info.congested_data = mddev; mddev->queue->backing_dev_info.congested_fn = raid5_congested; - } - - mddev->queue->queue_lock = &conf->device_lock; - - mddev->queue->unplug_fn = raid5_unplug_queue; + mddev->queue->queue_lock = &conf->device_lock; + mddev->queue->unplug_fn = raid5_unplug_queue; - chunk_size = mddev->chunk_sectors << 9; - blk_queue_io_min(mddev->queue, chunk_size); - blk_queue_io_opt(mddev->queue, chunk_size * - (conf->raid_disks - conf->max_degraded)); + chunk_size = mddev->chunk_sectors << 9; + blk_queue_io_min(mddev->queue, chunk_size); + blk_queue_io_opt(mddev->queue, chunk_size * + (conf->raid_disks - conf->max_degraded)); - list_for_each_entry(rdev, &mddev->disks, same_set) - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); + list_for_each_entry(rdev, &mddev->disks, same_set) + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); + } return 0; abort: -- cgit v1.2.1