summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/request.c23
-rw-r--r--drivers/md/dm-cache-metadata.c101
-rw-r--r--drivers/md/dm-cache-target.c89
-rw-r--r--drivers/md/dm-thin.c29
-rw-r--r--drivers/md/dm.c24
-rw-r--r--drivers/md/md.c44
-rw-r--r--drivers/md/raid5.c7
7 files changed, 218 insertions, 99 deletions
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 62e6e98186b5..ab43faddb447 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -601,13 +601,8 @@ static void request_endio(struct bio *bio, int error)
static void bio_complete(struct search *s)
{
if (s->orig_bio) {
- int cpu, rw = bio_data_dir(s->orig_bio);
- unsigned long duration = jiffies - s->start_time;
-
- cpu = part_stat_lock();
- part_round_stats(cpu, &s->d->disk->part0);
- part_stat_add(cpu, &s->d->disk->part0, ticks[rw], duration);
- part_stat_unlock();
+ generic_end_io_acct(bio_data_dir(s->orig_bio),
+ &s->d->disk->part0, s->start_time);
trace_bcache_request_end(s->d, s->orig_bio);
bio_endio(s->orig_bio, s->iop.error);
@@ -959,12 +954,9 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio)
struct search *s;
struct bcache_device *d = bio->bi_bdev->bd_disk->private_data;
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- int cpu, rw = bio_data_dir(bio);
+ int rw = bio_data_dir(bio);
- cpu = part_stat_lock();
- part_stat_inc(cpu, &d->disk->part0, ios[rw]);
- part_stat_add(cpu, &d->disk->part0, sectors[rw], bio_sectors(bio));
- part_stat_unlock();
+ generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0);
bio->bi_bdev = dc->bdev;
bio->bi_iter.bi_sector += dc->sb.data_offset;
@@ -1074,12 +1066,9 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio)
struct search *s;
struct closure *cl;
struct bcache_device *d = bio->bi_bdev->bd_disk->private_data;
- int cpu, rw = bio_data_dir(bio);
+ int rw = bio_data_dir(bio);
- cpu = part_stat_lock();
- part_stat_inc(cpu, &d->disk->part0, ios[rw]);
- part_stat_add(cpu, &d->disk->part0, sectors[rw], bio_sectors(bio));
- part_stat_unlock();
+ generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0);
s = search_alloc(bio, d);
cl = &s->cl;
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 9fc616c2755e..21b156242e42 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -94,6 +94,9 @@ struct cache_disk_superblock {
} __packed;
struct dm_cache_metadata {
+ atomic_t ref_count;
+ struct list_head list;
+
struct block_device *bdev;
struct dm_block_manager *bm;
struct dm_space_map *metadata_sm;
@@ -669,10 +672,10 @@ static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)
/*----------------------------------------------------------------*/
-struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
- sector_t data_block_size,
- bool may_format_device,
- size_t policy_hint_size)
+static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
+ sector_t data_block_size,
+ bool may_format_device,
+ size_t policy_hint_size)
{
int r;
struct dm_cache_metadata *cmd;
@@ -683,6 +686,7 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
return NULL;
}
+ atomic_set(&cmd->ref_count, 1);
init_rwsem(&cmd->root_lock);
cmd->bdev = bdev;
cmd->data_block_size = data_block_size;
@@ -705,10 +709,95 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
return cmd;
}
+/*
+ * We keep a little list of ref counted metadata objects to prevent two
+ * different target instances creating separate bufio instances. This is
+ * an issue if a table is reloaded before the suspend.
+ */
+static DEFINE_MUTEX(table_lock);
+static LIST_HEAD(table);
+
+static struct dm_cache_metadata *lookup(struct block_device *bdev)
+{
+ struct dm_cache_metadata *cmd;
+
+ list_for_each_entry(cmd, &table, list)
+ if (cmd->bdev == bdev) {
+ atomic_inc(&cmd->ref_count);
+ return cmd;
+ }
+
+ return NULL;
+}
+
+static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
+ sector_t data_block_size,
+ bool may_format_device,
+ size_t policy_hint_size)
+{
+ struct dm_cache_metadata *cmd, *cmd2;
+
+ mutex_lock(&table_lock);
+ cmd = lookup(bdev);
+ mutex_unlock(&table_lock);
+
+ if (cmd)
+ return cmd;
+
+ cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size);
+ if (cmd) {
+ mutex_lock(&table_lock);
+ cmd2 = lookup(bdev);
+ if (cmd2) {
+ mutex_unlock(&table_lock);
+ __destroy_persistent_data_objects(cmd);
+ kfree(cmd);
+ return cmd2;
+ }
+ list_add(&cmd->list, &table);
+ mutex_unlock(&table_lock);
+ }
+
+ return cmd;
+}
+
+static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
+{
+ if (cmd->data_block_size != data_block_size) {
+ DMERR("data_block_size (%llu) different from that in metadata (%llu)\n",
+ (unsigned long long) data_block_size,
+ (unsigned long long) cmd->data_block_size);
+ return false;
+ }
+
+ return true;
+}
+
+struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
+ sector_t data_block_size,
+ bool may_format_device,
+ size_t policy_hint_size)
+{
+ struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size,
+ may_format_device, policy_hint_size);
+ if (cmd && !same_params(cmd, data_block_size)) {
+ dm_cache_metadata_close(cmd);
+ return NULL;
+ }
+
+ return cmd;
+}
+
void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
{
- __destroy_persistent_data_objects(cmd);
- kfree(cmd);
+ if (atomic_dec_and_test(&cmd->ref_count)) {
+ mutex_lock(&table_lock);
+ list_del(&cmd->list);
+ mutex_unlock(&table_lock);
+
+ __destroy_persistent_data_objects(cmd);
+ kfree(cmd);
+ }
}
/*
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 1e96d7889f51..e1650539cc2f 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -221,7 +221,13 @@ struct cache {
struct list_head need_commit_migrations;
sector_t migration_threshold;
wait_queue_head_t migration_wait;
- atomic_t nr_migrations;
+ atomic_t nr_allocated_migrations;
+
+ /*
+ * The number of in flight migrations that are performing
+ * background io. eg, promotion, writeback.
+ */
+ atomic_t nr_io_migrations;
wait_queue_head_t quiescing_wait;
atomic_t quiescing;
@@ -258,7 +264,6 @@ struct cache {
struct dm_deferred_set *all_io_ds;
mempool_t *migration_pool;
- struct dm_cache_migration *next_migration;
struct dm_cache_policy *policy;
unsigned policy_nr_args;
@@ -350,10 +355,31 @@ static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cel
dm_bio_prison_free_cell(cache->prison, cell);
}
+static struct dm_cache_migration *alloc_migration(struct cache *cache)
+{
+ struct dm_cache_migration *mg;
+
+ mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
+ if (mg) {
+ mg->cache = cache;
+ atomic_inc(&mg->cache->nr_allocated_migrations);
+ }
+
+ return mg;
+}
+
+static void free_migration(struct dm_cache_migration *mg)
+{
+ if (atomic_dec_and_test(&mg->cache->nr_allocated_migrations))
+ wake_up(&mg->cache->migration_wait);
+
+ mempool_free(mg, mg->cache->migration_pool);
+}
+
static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
{
if (!p->mg) {
- p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
+ p->mg = alloc_migration(cache);
if (!p->mg)
return -ENOMEM;
}
@@ -382,7 +408,7 @@ static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
free_prison_cell(cache, p->cell1);
if (p->mg)
- mempool_free(p->mg, cache->migration_pool);
+ free_migration(p->mg);
}
static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
@@ -854,24 +880,14 @@ static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
* Migration covers moving data from the origin device to the cache, or
* vice versa.
*--------------------------------------------------------------*/
-static void free_migration(struct dm_cache_migration *mg)
-{
- mempool_free(mg, mg->cache->migration_pool);
-}
-
-static void inc_nr_migrations(struct cache *cache)
+static void inc_io_migrations(struct cache *cache)
{
- atomic_inc(&cache->nr_migrations);
+ atomic_inc(&cache->nr_io_migrations);
}
-static void dec_nr_migrations(struct cache *cache)
+static void dec_io_migrations(struct cache *cache)
{
- atomic_dec(&cache->nr_migrations);
-
- /*
- * Wake the worker in case we're suspending the target.
- */
- wake_up(&cache->migration_wait);
+ atomic_dec(&cache->nr_io_migrations);
}
static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
@@ -894,11 +910,10 @@ static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
wake_worker(cache);
}
-static void cleanup_migration(struct dm_cache_migration *mg)
+static void free_io_migration(struct dm_cache_migration *mg)
{
- struct cache *cache = mg->cache;
+ dec_io_migrations(mg->cache);
free_migration(mg);
- dec_nr_migrations(cache);
}
static void migration_failure(struct dm_cache_migration *mg)
@@ -923,7 +938,7 @@ static void migration_failure(struct dm_cache_migration *mg)
cell_defer(cache, mg->new_ocell, true);
}
- cleanup_migration(mg);
+ free_io_migration(mg);
}
static void migration_success_pre_commit(struct dm_cache_migration *mg)
@@ -934,7 +949,7 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg)
if (mg->writeback) {
clear_dirty(cache, mg->old_oblock, mg->cblock);
cell_defer(cache, mg->old_ocell, false);
- cleanup_migration(mg);
+ free_io_migration(mg);
return;
} else if (mg->demote) {
@@ -944,14 +959,14 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg)
mg->old_oblock);
if (mg->promote)
cell_defer(cache, mg->new_ocell, true);
- cleanup_migration(mg);
+ free_io_migration(mg);
return;
}
} else {
if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
policy_remove_mapping(cache->policy, mg->new_oblock);
- cleanup_migration(mg);
+ free_io_migration(mg);
return;
}
}
@@ -984,7 +999,7 @@ static void migration_success_post_commit(struct dm_cache_migration *mg)
} else {
if (mg->invalidate)
policy_remove_mapping(cache->policy, mg->old_oblock);
- cleanup_migration(mg);
+ free_io_migration(mg);
}
} else {
@@ -999,7 +1014,7 @@ static void migration_success_post_commit(struct dm_cache_migration *mg)
bio_endio(mg->new_ocell->holder, 0);
cell_defer(cache, mg->new_ocell, false);
}
- cleanup_migration(mg);
+ free_io_migration(mg);
}
}
@@ -1251,7 +1266,7 @@ static void promote(struct cache *cache, struct prealloc *structs,
mg->new_ocell = cell;
mg->start_jiffies = jiffies;
- inc_nr_migrations(cache);
+ inc_io_migrations(cache);
quiesce_migration(mg);
}
@@ -1275,7 +1290,7 @@ static void writeback(struct cache *cache, struct prealloc *structs,
mg->new_ocell = NULL;
mg->start_jiffies = jiffies;
- inc_nr_migrations(cache);
+ inc_io_migrations(cache);
quiesce_migration(mg);
}
@@ -1302,7 +1317,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs,
mg->new_ocell = new_ocell;
mg->start_jiffies = jiffies;
- inc_nr_migrations(cache);
+ inc_io_migrations(cache);
quiesce_migration(mg);
}
@@ -1330,7 +1345,7 @@ static void invalidate(struct cache *cache, struct prealloc *structs,
mg->new_ocell = NULL;
mg->start_jiffies = jiffies;
- inc_nr_migrations(cache);
+ inc_io_migrations(cache);
quiesce_migration(mg);
}
@@ -1412,7 +1427,7 @@ static void process_discard_bio(struct cache *cache, struct prealloc *structs,
static bool spare_migration_bandwidth(struct cache *cache)
{
- sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) *
+ sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
cache->sectors_per_block;
return current_volume < cache->migration_threshold;
}
@@ -1764,7 +1779,7 @@ static void stop_quiescing(struct cache *cache)
static void wait_for_migrations(struct cache *cache)
{
- wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations));
+ wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations));
}
static void stop_worker(struct cache *cache)
@@ -1876,9 +1891,6 @@ static void destroy(struct cache *cache)
{
unsigned i;
- if (cache->next_migration)
- mempool_free(cache->next_migration, cache->migration_pool);
-
if (cache->migration_pool)
mempool_destroy(cache->migration_pool);
@@ -2424,7 +2436,8 @@ static int cache_create(struct cache_args *ca, struct cache **result)
INIT_LIST_HEAD(&cache->quiesced_migrations);
INIT_LIST_HEAD(&cache->completed_migrations);
INIT_LIST_HEAD(&cache->need_commit_migrations);
- atomic_set(&cache->nr_migrations, 0);
+ atomic_set(&cache->nr_allocated_migrations, 0);
+ atomic_set(&cache->nr_io_migrations, 0);
init_waitqueue_head(&cache->migration_wait);
init_waitqueue_head(&cache->quiescing_wait);
@@ -2487,8 +2500,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
goto bad;
}
- cache->next_migration = NULL;
-
cache->need_tick_bio = true;
cache->sized = false;
cache->invalidate = false;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 8735543eacdb..493478989dbd 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1127,6 +1127,24 @@ static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
schedule_zero(tc, virt_block, data_dest, cell, bio);
}
+static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
+
+static void check_for_space(struct pool *pool)
+{
+ int r;
+ dm_block_t nr_free;
+
+ if (get_pool_mode(pool) != PM_OUT_OF_DATA_SPACE)
+ return;
+
+ r = dm_pool_get_free_block_count(pool->pmd, &nr_free);
+ if (r)
+ return;
+
+ if (nr_free)
+ set_pool_mode(pool, PM_WRITE);
+}
+
/*
* A non-zero return indicates read_only or fail_io mode.
* Many callers don't care about the return value.
@@ -1141,6 +1159,8 @@ static int commit(struct pool *pool)
r = dm_pool_commit_metadata(pool->pmd);
if (r)
metadata_operation_failed(pool, "dm_pool_commit_metadata", r);
+ else
+ check_for_space(pool);
return r;
}
@@ -1159,8 +1179,6 @@ static void check_low_water_mark(struct pool *pool, dm_block_t free_blocks)
}
}
-static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
-
static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
{
int r;
@@ -2155,7 +2173,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
pool->process_cell = process_cell_read_only;
pool->process_discard_cell = process_discard_cell;
pool->process_prepared_mapping = process_prepared_mapping;
- pool->process_prepared_discard = process_prepared_discard_passdown;
+ pool->process_prepared_discard = process_prepared_discard;
if (!pool->pf.error_if_no_space && no_space_timeout)
queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout);
@@ -3814,6 +3832,8 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
r = -EINVAL;
goto bad;
}
+ atomic_set(&tc->refcount, 1);
+ init_completion(&tc->can_destroy);
list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
spin_unlock_irqrestore(&tc->pool->lock, flags);
/*
@@ -3826,9 +3846,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
dm_put(pool_md);
- atomic_set(&tc->refcount, 1);
- init_completion(&tc->can_destroy);
-
return 0;
bad:
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8f37ed215b19..2caf5b374649 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -206,6 +206,9 @@ struct mapped_device {
/* zero-length flush that will be cloned and submitted to targets */
struct bio flush_bio;
+ /* the number of internal suspends */
+ unsigned internal_suspend_count;
+
struct dm_stats stats;
};
@@ -605,13 +608,10 @@ static void end_io_acct(struct dm_io *io)
struct mapped_device *md = io->md;
struct bio *bio = io->bio;
unsigned long duration = jiffies - io->start_time;
- int pending, cpu;
+ int pending;
int rw = bio_data_dir(bio);
- cpu = part_stat_lock();
- part_round_stats(cpu, &dm_disk(md)->part0);
- part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
- part_stat_unlock();
+ generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time);
if (unlikely(dm_stats_used(&md->stats)))
dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
@@ -902,7 +902,7 @@ static void disable_write_same(struct mapped_device *md)
static void clone_endio(struct bio *bio, int error)
{
- int r = 0;
+ int r = error;
struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
struct dm_io *io = tio->io;
struct mapped_device *md = tio->io->md;
@@ -1651,16 +1651,12 @@ static void _dm_request(struct request_queue *q, struct bio *bio)
{
int rw = bio_data_dir(bio);
struct mapped_device *md = q->queuedata;
- int cpu;
int srcu_idx;
struct dm_table *map;
map = dm_get_live_table(md, &srcu_idx);
- cpu = part_stat_lock();
- part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]);
- part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
- part_stat_unlock();
+ generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0);
/* if we're suspended, we have to queue this io for later */
if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
@@ -2935,7 +2931,7 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla
{
struct dm_table *map = NULL;
- if (dm_suspended_internally_md(md))
+ if (md->internal_suspend_count++)
return; /* nested internal suspend */
if (dm_suspended_md(md)) {
@@ -2960,7 +2956,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla
static void __dm_internal_resume(struct mapped_device *md)
{
- if (!dm_suspended_internally_md(md))
+ BUG_ON(!md->internal_suspend_count);
+
+ if (--md->internal_suspend_count)
return; /* resume from nested internal suspend */
if (dm_suspended_md(md))
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9233c71138f1..709755fb6d7b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -247,7 +247,6 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
{
const int rw = bio_data_dir(bio);
struct mddev *mddev = q->queuedata;
- int cpu;
unsigned int sectors;
if (mddev == NULL || mddev->pers == NULL
@@ -284,10 +283,7 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
sectors = bio_sectors(bio);
mddev->pers->make_request(mddev, bio);
- cpu = part_stat_lock();
- part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
- part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
- part_stat_unlock();
+ generic_start_io_acct(rw, sectors, &mddev->gendisk->part0);
if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
wake_up(&mddev->sb_wait);
@@ -2695,7 +2691,8 @@ static ssize_t new_offset_store(struct md_rdev *rdev,
if (kstrtoull(buf, 10, &new_offset) < 0)
return -EINVAL;
- if (mddev->sync_thread)
+ if (mddev->sync_thread ||
+ test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
return -EBUSY;
if (new_offset == rdev->data_offset)
/* reset is always permitted */
@@ -3272,6 +3269,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
*/
if (mddev->sync_thread ||
+ test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
mddev->reshape_position != MaxSector ||
mddev->sysfs_active)
return -EBUSY;
@@ -4026,6 +4024,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
+ flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
@@ -5044,6 +5043,7 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev)
{
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
@@ -5104,19 +5104,22 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
- if (mddev->sync_thread) {
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+ if (mddev->sync_thread)
/* Thread might be blocked waiting for metadata update
* which will now never happen */
wake_up_process(mddev->sync_thread->tsk);
- }
+
mddev_unlock(mddev);
- wait_event(resync_wait, mddev->sync_thread == NULL);
+ wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
+ &mddev->recovery));
mddev_lock_nointr(mddev);
mutex_lock(&mddev->open_mutex);
if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
mddev->sync_thread ||
+ test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
(bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
printk("md: %s still in use.\n",mdname(mddev));
if (did_freeze) {
@@ -5162,20 +5165,24 @@ static int do_md_stop(struct mddev *mddev, int mode,
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
- if (mddev->sync_thread) {
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+ if (mddev->sync_thread)
/* Thread might be blocked waiting for metadata update
* which will now never happen */
wake_up_process(mddev->sync_thread->tsk);
- }
+
mddev_unlock(mddev);
- wait_event(resync_wait, mddev->sync_thread == NULL);
+ wait_event(resync_wait, (mddev->sync_thread == NULL &&
+ !test_bit(MD_RECOVERY_RUNNING,
+ &mddev->recovery)));
mddev_lock_nointr(mddev);
mutex_lock(&mddev->open_mutex);
if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
mddev->sysfs_active ||
mddev->sync_thread ||
+ test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
(bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
printk("md: %s still in use.\n",mdname(mddev));
mutex_unlock(&mddev->open_mutex);
@@ -5950,7 +5957,8 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
* of each device. If num_sectors is zero, we find the largest size
* that fits.
*/
- if (mddev->sync_thread)
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+ mddev->sync_thread)
return -EBUSY;
if (mddev->ro)
return -EROFS;
@@ -5981,7 +5989,9 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
if (raid_disks <= 0 ||
(mddev->max_disks && raid_disks >= mddev->max_disks))
return -EINVAL;
- if (mddev->sync_thread || mddev->reshape_position != MaxSector)
+ if (mddev->sync_thread ||
+ test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+ mddev->reshape_position != MaxSector)
return -EBUSY;
rdev_for_each(rdev, mddev) {
@@ -6969,7 +6979,7 @@ static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
int mask;
if (md_unloading)
- return POLLIN|POLLRDNORM|POLLERR|POLLPRI;;
+ return POLLIN|POLLRDNORM|POLLERR|POLLPRI;
poll_wait(filp, &md_event_waiters, wait);
/* always allow read */
@@ -7593,6 +7603,7 @@ static void md_start_sync(struct work_struct *ws)
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
+ wake_up(&resync_wait);
if (test_and_clear_bit(MD_RECOVERY_RECOVER,
&mddev->recovery))
if (mddev->sysfs_action)
@@ -7761,6 +7772,7 @@ void md_check_recovery(struct mddev *mddev)
not_running:
if (!mddev->sync_thread) {
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
+ wake_up(&resync_wait);
if (test_and_clear_bit(MD_RECOVERY_RECOVER,
&mddev->recovery))
if (mddev->sysfs_action)
@@ -7779,7 +7791,6 @@ void md_reap_sync_thread(struct mddev *mddev)
/* resync has finished, collect result */
md_unregister_thread(&mddev->sync_thread);
- wake_up(&resync_wait);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
/* success...*/
@@ -7807,6 +7818,7 @@ void md_reap_sync_thread(struct mddev *mddev)
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+ wake_up(&resync_wait);
/* flag recovery needed just to double check */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
sysfs_notify_dirent_safe(mddev->sysfs_action);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9c66e5997fc8..c1b0d52bfcb0 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2917,8 +2917,11 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
(sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
(!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) &&
!test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
- (sh->raid_conf->level == 6 && s->failed && s->to_write &&
- s->to_write - s->non_overwrite < sh->raid_conf->raid_disks - 2 &&
+ ((sh->raid_conf->level == 6 ||
+ sh->sector >= sh->raid_conf->mddev->recovery_cp)
+ && s->failed && s->to_write &&
+ (s->to_write - s->non_overwrite <
+ sh->raid_conf->raid_disks - sh->raid_conf->max_degraded) &&
(!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) {
/* we would like to get this block, possibly by computing it,
* otherwise read it if the backing disk is insync
OpenPOWER on IntegriCloud