diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bcache/request.c | 23 | ||||
-rw-r--r-- | drivers/md/dm-cache-metadata.c | 101 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 89 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 29 | ||||
-rw-r--r-- | drivers/md/dm.c | 24 | ||||
-rw-r--r-- | drivers/md/md.c | 44 | ||||
-rw-r--r-- | drivers/md/raid5.c | 7 |
7 files changed, 218 insertions, 99 deletions
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 62e6e98186b5..ab43faddb447 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -601,13 +601,8 @@ static void request_endio(struct bio *bio, int error) static void bio_complete(struct search *s) { if (s->orig_bio) { - int cpu, rw = bio_data_dir(s->orig_bio); - unsigned long duration = jiffies - s->start_time; - - cpu = part_stat_lock(); - part_round_stats(cpu, &s->d->disk->part0); - part_stat_add(cpu, &s->d->disk->part0, ticks[rw], duration); - part_stat_unlock(); + generic_end_io_acct(bio_data_dir(s->orig_bio), + &s->d->disk->part0, s->start_time); trace_bcache_request_end(s->d, s->orig_bio); bio_endio(s->orig_bio, s->iop.error); @@ -959,12 +954,9 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio) struct search *s; struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; struct cached_dev *dc = container_of(d, struct cached_dev, disk); - int cpu, rw = bio_data_dir(bio); + int rw = bio_data_dir(bio); - cpu = part_stat_lock(); - part_stat_inc(cpu, &d->disk->part0, ios[rw]); - part_stat_add(cpu, &d->disk->part0, sectors[rw], bio_sectors(bio)); - part_stat_unlock(); + generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0); bio->bi_bdev = dc->bdev; bio->bi_iter.bi_sector += dc->sb.data_offset; @@ -1074,12 +1066,9 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio) struct search *s; struct closure *cl; struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; - int cpu, rw = bio_data_dir(bio); + int rw = bio_data_dir(bio); - cpu = part_stat_lock(); - part_stat_inc(cpu, &d->disk->part0, ios[rw]); - part_stat_add(cpu, &d->disk->part0, sectors[rw], bio_sectors(bio)); - part_stat_unlock(); + generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0); s = search_alloc(bio, d); cl = &s->cl; diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 9fc616c2755e..21b156242e42 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -94,6 +94,9 @@ struct cache_disk_superblock { } __packed; struct dm_cache_metadata { + atomic_t ref_count; + struct list_head list; + struct block_device *bdev; struct dm_block_manager *bm; struct dm_space_map *metadata_sm; @@ -669,10 +672,10 @@ static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags) /*----------------------------------------------------------------*/ -struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, - sector_t data_block_size, - bool may_format_device, - size_t policy_hint_size) +static struct dm_cache_metadata *metadata_open(struct block_device *bdev, + sector_t data_block_size, + bool may_format_device, + size_t policy_hint_size) { int r; struct dm_cache_metadata *cmd; @@ -683,6 +686,7 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, return NULL; } + atomic_set(&cmd->ref_count, 1); init_rwsem(&cmd->root_lock); cmd->bdev = bdev; cmd->data_block_size = data_block_size; @@ -705,10 +709,95 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, return cmd; } +/* + * We keep a little list of ref counted metadata objects to prevent two + * different target instances creating separate bufio instances. This is + * an issue if a table is reloaded before the suspend. + */ +static DEFINE_MUTEX(table_lock); +static LIST_HEAD(table); + +static struct dm_cache_metadata *lookup(struct block_device *bdev) +{ + struct dm_cache_metadata *cmd; + + list_for_each_entry(cmd, &table, list) + if (cmd->bdev == bdev) { + atomic_inc(&cmd->ref_count); + return cmd; + } + + return NULL; +} + +static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, + sector_t data_block_size, + bool may_format_device, + size_t policy_hint_size) +{ + struct dm_cache_metadata *cmd, *cmd2; + + mutex_lock(&table_lock); + cmd = lookup(bdev); + mutex_unlock(&table_lock); + + if (cmd) + return cmd; + + cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size); + if (cmd) { + mutex_lock(&table_lock); + cmd2 = lookup(bdev); + if (cmd2) { + mutex_unlock(&table_lock); + __destroy_persistent_data_objects(cmd); + kfree(cmd); + return cmd2; + } + list_add(&cmd->list, &table); + mutex_unlock(&table_lock); + } + + return cmd; +} + +static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size) +{ + if (cmd->data_block_size != data_block_size) { + DMERR("data_block_size (%llu) different from that in metadata (%llu)\n", + (unsigned long long) data_block_size, + (unsigned long long) cmd->data_block_size); + return false; + } + + return true; +} + +struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, + sector_t data_block_size, + bool may_format_device, + size_t policy_hint_size) +{ + struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, + may_format_device, policy_hint_size); + if (cmd && !same_params(cmd, data_block_size)) { + dm_cache_metadata_close(cmd); + return NULL; + } + + return cmd; +} + void dm_cache_metadata_close(struct dm_cache_metadata *cmd) { - __destroy_persistent_data_objects(cmd); - kfree(cmd); + if (atomic_dec_and_test(&cmd->ref_count)) { + mutex_lock(&table_lock); + list_del(&cmd->list); + mutex_unlock(&table_lock); + + __destroy_persistent_data_objects(cmd); + kfree(cmd); + } } /* diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1e96d7889f51..e1650539cc2f 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -221,7 +221,13 @@ struct cache { struct list_head need_commit_migrations; sector_t migration_threshold; wait_queue_head_t migration_wait; - atomic_t nr_migrations; + atomic_t nr_allocated_migrations; + + /* + * The number of in flight migrations that are performing + * background io. eg, promotion, writeback. + */ + atomic_t nr_io_migrations; wait_queue_head_t quiescing_wait; atomic_t quiescing; @@ -258,7 +264,6 @@ struct cache { struct dm_deferred_set *all_io_ds; mempool_t *migration_pool; - struct dm_cache_migration *next_migration; struct dm_cache_policy *policy; unsigned policy_nr_args; @@ -350,10 +355,31 @@ static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cel dm_bio_prison_free_cell(cache->prison, cell); } +static struct dm_cache_migration *alloc_migration(struct cache *cache) +{ + struct dm_cache_migration *mg; + + mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); + if (mg) { + mg->cache = cache; + atomic_inc(&mg->cache->nr_allocated_migrations); + } + + return mg; +} + +static void free_migration(struct dm_cache_migration *mg) +{ + if (atomic_dec_and_test(&mg->cache->nr_allocated_migrations)) + wake_up(&mg->cache->migration_wait); + + mempool_free(mg, mg->cache->migration_pool); +} + static int prealloc_data_structs(struct cache *cache, struct prealloc *p) { if (!p->mg) { - p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); + p->mg = alloc_migration(cache); if (!p->mg) return -ENOMEM; } @@ -382,7 +408,7 @@ static void prealloc_free_structs(struct cache *cache, struct prealloc *p) free_prison_cell(cache, p->cell1); if (p->mg) - mempool_free(p->mg, cache->migration_pool); + free_migration(p->mg); } static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p) @@ -854,24 +880,14 @@ static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, * Migration covers moving data from the origin device to the cache, or * vice versa. *--------------------------------------------------------------*/ -static void free_migration(struct dm_cache_migration *mg) -{ - mempool_free(mg, mg->cache->migration_pool); -} - -static void inc_nr_migrations(struct cache *cache) +static void inc_io_migrations(struct cache *cache) { - atomic_inc(&cache->nr_migrations); + atomic_inc(&cache->nr_io_migrations); } -static void dec_nr_migrations(struct cache *cache) +static void dec_io_migrations(struct cache *cache) { - atomic_dec(&cache->nr_migrations); - - /* - * Wake the worker in case we're suspending the target. - */ - wake_up(&cache->migration_wait); + atomic_dec(&cache->nr_io_migrations); } static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, @@ -894,11 +910,10 @@ static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, wake_worker(cache); } -static void cleanup_migration(struct dm_cache_migration *mg) +static void free_io_migration(struct dm_cache_migration *mg) { - struct cache *cache = mg->cache; + dec_io_migrations(mg->cache); free_migration(mg); - dec_nr_migrations(cache); } static void migration_failure(struct dm_cache_migration *mg) @@ -923,7 +938,7 @@ static void migration_failure(struct dm_cache_migration *mg) cell_defer(cache, mg->new_ocell, true); } - cleanup_migration(mg); + free_io_migration(mg); } static void migration_success_pre_commit(struct dm_cache_migration *mg) @@ -934,7 +949,7 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg) if (mg->writeback) { clear_dirty(cache, mg->old_oblock, mg->cblock); cell_defer(cache, mg->old_ocell, false); - cleanup_migration(mg); + free_io_migration(mg); return; } else if (mg->demote) { @@ -944,14 +959,14 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg) mg->old_oblock); if (mg->promote) cell_defer(cache, mg->new_ocell, true); - cleanup_migration(mg); + free_io_migration(mg); return; } } else { if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) { DMWARN_LIMIT("promotion failed; couldn't update on disk metadata"); policy_remove_mapping(cache->policy, mg->new_oblock); - cleanup_migration(mg); + free_io_migration(mg); return; } } @@ -984,7 +999,7 @@ static void migration_success_post_commit(struct dm_cache_migration *mg) } else { if (mg->invalidate) policy_remove_mapping(cache->policy, mg->old_oblock); - cleanup_migration(mg); + free_io_migration(mg); } } else { @@ -999,7 +1014,7 @@ static void migration_success_post_commit(struct dm_cache_migration *mg) bio_endio(mg->new_ocell->holder, 0); cell_defer(cache, mg->new_ocell, false); } - cleanup_migration(mg); + free_io_migration(mg); } } @@ -1251,7 +1266,7 @@ static void promote(struct cache *cache, struct prealloc *structs, mg->new_ocell = cell; mg->start_jiffies = jiffies; - inc_nr_migrations(cache); + inc_io_migrations(cache); quiesce_migration(mg); } @@ -1275,7 +1290,7 @@ static void writeback(struct cache *cache, struct prealloc *structs, mg->new_ocell = NULL; mg->start_jiffies = jiffies; - inc_nr_migrations(cache); + inc_io_migrations(cache); quiesce_migration(mg); } @@ -1302,7 +1317,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs, mg->new_ocell = new_ocell; mg->start_jiffies = jiffies; - inc_nr_migrations(cache); + inc_io_migrations(cache); quiesce_migration(mg); } @@ -1330,7 +1345,7 @@ static void invalidate(struct cache *cache, struct prealloc *structs, mg->new_ocell = NULL; mg->start_jiffies = jiffies; - inc_nr_migrations(cache); + inc_io_migrations(cache); quiesce_migration(mg); } @@ -1412,7 +1427,7 @@ static void process_discard_bio(struct cache *cache, struct prealloc *structs, static bool spare_migration_bandwidth(struct cache *cache) { - sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) * + sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) * cache->sectors_per_block; return current_volume < cache->migration_threshold; } @@ -1764,7 +1779,7 @@ static void stop_quiescing(struct cache *cache) static void wait_for_migrations(struct cache *cache) { - wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations)); + wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations)); } static void stop_worker(struct cache *cache) @@ -1876,9 +1891,6 @@ static void destroy(struct cache *cache) { unsigned i; - if (cache->next_migration) - mempool_free(cache->next_migration, cache->migration_pool); - if (cache->migration_pool) mempool_destroy(cache->migration_pool); @@ -2424,7 +2436,8 @@ static int cache_create(struct cache_args *ca, struct cache **result) INIT_LIST_HEAD(&cache->quiesced_migrations); INIT_LIST_HEAD(&cache->completed_migrations); INIT_LIST_HEAD(&cache->need_commit_migrations); - atomic_set(&cache->nr_migrations, 0); + atomic_set(&cache->nr_allocated_migrations, 0); + atomic_set(&cache->nr_io_migrations, 0); init_waitqueue_head(&cache->migration_wait); init_waitqueue_head(&cache->quiescing_wait); @@ -2487,8 +2500,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) goto bad; } - cache->next_migration = NULL; - cache->need_tick_bio = true; cache->sized = false; cache->invalidate = false; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 8735543eacdb..493478989dbd 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1127,6 +1127,24 @@ static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block, schedule_zero(tc, virt_block, data_dest, cell, bio); } +static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); + +static void check_for_space(struct pool *pool) +{ + int r; + dm_block_t nr_free; + + if (get_pool_mode(pool) != PM_OUT_OF_DATA_SPACE) + return; + + r = dm_pool_get_free_block_count(pool->pmd, &nr_free); + if (r) + return; + + if (nr_free) + set_pool_mode(pool, PM_WRITE); +} + /* * A non-zero return indicates read_only or fail_io mode. * Many callers don't care about the return value. @@ -1141,6 +1159,8 @@ static int commit(struct pool *pool) r = dm_pool_commit_metadata(pool->pmd); if (r) metadata_operation_failed(pool, "dm_pool_commit_metadata", r); + else + check_for_space(pool); return r; } @@ -1159,8 +1179,6 @@ static void check_low_water_mark(struct pool *pool, dm_block_t free_blocks) } } -static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); - static int alloc_data_block(struct thin_c *tc, dm_block_t *result) { int r; @@ -2155,7 +2173,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) pool->process_cell = process_cell_read_only; pool->process_discard_cell = process_discard_cell; pool->process_prepared_mapping = process_prepared_mapping; - pool->process_prepared_discard = process_prepared_discard_passdown; + pool->process_prepared_discard = process_prepared_discard; if (!pool->pf.error_if_no_space && no_space_timeout) queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout); @@ -3814,6 +3832,8 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) r = -EINVAL; goto bad; } + atomic_set(&tc->refcount, 1); + init_completion(&tc->can_destroy); list_add_tail_rcu(&tc->list, &tc->pool->active_thins); spin_unlock_irqrestore(&tc->pool->lock, flags); /* @@ -3826,9 +3846,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) dm_put(pool_md); - atomic_set(&tc->refcount, 1); - init_completion(&tc->can_destroy); - return 0; bad: diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8f37ed215b19..2caf5b374649 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -206,6 +206,9 @@ struct mapped_device { /* zero-length flush that will be cloned and submitted to targets */ struct bio flush_bio; + /* the number of internal suspends */ + unsigned internal_suspend_count; + struct dm_stats stats; }; @@ -605,13 +608,10 @@ static void end_io_acct(struct dm_io *io) struct mapped_device *md = io->md; struct bio *bio = io->bio; unsigned long duration = jiffies - io->start_time; - int pending, cpu; + int pending; int rw = bio_data_dir(bio); - cpu = part_stat_lock(); - part_round_stats(cpu, &dm_disk(md)->part0); - part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); - part_stat_unlock(); + generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time); if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector, @@ -902,7 +902,7 @@ static void disable_write_same(struct mapped_device *md) static void clone_endio(struct bio *bio, int error) { - int r = 0; + int r = error; struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); struct dm_io *io = tio->io; struct mapped_device *md = tio->io->md; @@ -1651,16 +1651,12 @@ static void _dm_request(struct request_queue *q, struct bio *bio) { int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; - int cpu; int srcu_idx; struct dm_table *map; map = dm_get_live_table(md, &srcu_idx); - cpu = part_stat_lock(); - part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); - part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); - part_stat_unlock(); + generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0); /* if we're suspended, we have to queue this io for later */ if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { @@ -2935,7 +2931,7 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla { struct dm_table *map = NULL; - if (dm_suspended_internally_md(md)) + if (md->internal_suspend_count++) return; /* nested internal suspend */ if (dm_suspended_md(md)) { @@ -2960,7 +2956,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla static void __dm_internal_resume(struct mapped_device *md) { - if (!dm_suspended_internally_md(md)) + BUG_ON(!md->internal_suspend_count); + + if (--md->internal_suspend_count) return; /* resume from nested internal suspend */ if (dm_suspended_md(md)) diff --git a/drivers/md/md.c b/drivers/md/md.c index 9233c71138f1..709755fb6d7b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -247,7 +247,6 @@ static void md_make_request(struct request_queue *q, struct bio *bio) { const int rw = bio_data_dir(bio); struct mddev *mddev = q->queuedata; - int cpu; unsigned int sectors; if (mddev == NULL || mddev->pers == NULL @@ -284,10 +283,7 @@ static void md_make_request(struct request_queue *q, struct bio *bio) sectors = bio_sectors(bio); mddev->pers->make_request(mddev, bio); - cpu = part_stat_lock(); - part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); - part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors); - part_stat_unlock(); + generic_start_io_acct(rw, sectors, &mddev->gendisk->part0); if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) wake_up(&mddev->sb_wait); @@ -2695,7 +2691,8 @@ static ssize_t new_offset_store(struct md_rdev *rdev, if (kstrtoull(buf, 10, &new_offset) < 0) return -EINVAL; - if (mddev->sync_thread) + if (mddev->sync_thread || + test_bit(MD_RECOVERY_RUNNING,&mddev->recovery)) return -EBUSY; if (new_offset == rdev->data_offset) /* reset is always permitted */ @@ -3272,6 +3269,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) */ if (mddev->sync_thread || + test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || mddev->reshape_position != MaxSector || mddev->sysfs_active) return -EBUSY; @@ -4026,6 +4024,7 @@ action_store(struct mddev *mddev, const char *page, size_t len) clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (cmd_match(page, "idle") || cmd_match(page, "frozen")) { + flush_workqueue(md_misc_wq); if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_reap_sync_thread(mddev); @@ -5044,6 +5043,7 @@ static void md_clean(struct mddev *mddev) static void __md_stop_writes(struct mddev *mddev) { set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + flush_workqueue(md_misc_wq); if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_reap_sync_thread(mddev); @@ -5104,19 +5104,22 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); md_wakeup_thread(mddev->thread); } - if (mddev->sync_thread) { + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) set_bit(MD_RECOVERY_INTR, &mddev->recovery); + if (mddev->sync_thread) /* Thread might be blocked waiting for metadata update * which will now never happen */ wake_up_process(mddev->sync_thread->tsk); - } + mddev_unlock(mddev); - wait_event(resync_wait, mddev->sync_thread == NULL); + wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING, + &mddev->recovery)); mddev_lock_nointr(mddev); mutex_lock(&mddev->open_mutex); if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || mddev->sync_thread || + test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) { printk("md: %s still in use.\n",mdname(mddev)); if (did_freeze) { @@ -5162,20 +5165,24 @@ static int do_md_stop(struct mddev *mddev, int mode, set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); md_wakeup_thread(mddev->thread); } - if (mddev->sync_thread) { + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) set_bit(MD_RECOVERY_INTR, &mddev->recovery); + if (mddev->sync_thread) /* Thread might be blocked waiting for metadata update * which will now never happen */ wake_up_process(mddev->sync_thread->tsk); - } + mddev_unlock(mddev); - wait_event(resync_wait, mddev->sync_thread == NULL); + wait_event(resync_wait, (mddev->sync_thread == NULL && + !test_bit(MD_RECOVERY_RUNNING, + &mddev->recovery))); mddev_lock_nointr(mddev); mutex_lock(&mddev->open_mutex); if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || mddev->sysfs_active || mddev->sync_thread || + test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) { printk("md: %s still in use.\n",mdname(mddev)); mutex_unlock(&mddev->open_mutex); @@ -5950,7 +5957,8 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) * of each device. If num_sectors is zero, we find the largest size * that fits. */ - if (mddev->sync_thread) + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + mddev->sync_thread) return -EBUSY; if (mddev->ro) return -EROFS; @@ -5981,7 +5989,9 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) if (raid_disks <= 0 || (mddev->max_disks && raid_disks >= mddev->max_disks)) return -EINVAL; - if (mddev->sync_thread || mddev->reshape_position != MaxSector) + if (mddev->sync_thread || + test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + mddev->reshape_position != MaxSector) return -EBUSY; rdev_for_each(rdev, mddev) { @@ -6969,7 +6979,7 @@ static unsigned int mdstat_poll(struct file *filp, poll_table *wait) int mask; if (md_unloading) - return POLLIN|POLLRDNORM|POLLERR|POLLPRI;; + return POLLIN|POLLRDNORM|POLLERR|POLLPRI; poll_wait(filp, &md_event_waiters, wait); /* always allow read */ @@ -7593,6 +7603,7 @@ static void md_start_sync(struct work_struct *ws) clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); + wake_up(&resync_wait); if (test_and_clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) if (mddev->sysfs_action) @@ -7761,6 +7772,7 @@ void md_check_recovery(struct mddev *mddev) not_running: if (!mddev->sync_thread) { clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); + wake_up(&resync_wait); if (test_and_clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) if (mddev->sysfs_action) @@ -7779,7 +7791,6 @@ void md_reap_sync_thread(struct mddev *mddev) /* resync has finished, collect result */ md_unregister_thread(&mddev->sync_thread); - wake_up(&resync_wait); if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* success...*/ @@ -7807,6 +7818,7 @@ void md_reap_sync_thread(struct mddev *mddev) clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); + wake_up(&resync_wait); /* flag recovery needed just to double check */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); sysfs_notify_dirent_safe(mddev->sysfs_action); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 9c66e5997fc8..c1b0d52bfcb0 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2917,8 +2917,11 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) && !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || - (sh->raid_conf->level == 6 && s->failed && s->to_write && - s->to_write - s->non_overwrite < sh->raid_conf->raid_disks - 2 && + ((sh->raid_conf->level == 6 || + sh->sector >= sh->raid_conf->mddev->recovery_cp) + && s->failed && s->to_write && + (s->to_write - s->non_overwrite < + sh->raid_conf->raid_disks - sh->raid_conf->max_degraded) && (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) { /* we would like to get this block, possibly by computing it, * otherwise read it if the backing disk is insync |