summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorSong Liu <songliubraving@fb.com>2017-11-19 22:17:01 -0800
committerShaohua Li <shli@fb.com>2017-12-11 08:52:34 -0800
commitd5d885fd514fcebc9da5503c88aa0112df7514ef (patch)
tree71cf428b0bba2cf5c1afd32b8a32fc3ab5724708 /drivers/md
parent50c4c4e268a2d7a3e58ebb698ac74da0de40ae36 (diff)
downloadblackbird-op-linux-d5d885fd514fcebc9da5503c88aa0112df7514ef.tar.gz
blackbird-op-linux-d5d885fd514fcebc9da5503c88aa0112df7514ef.zip
md: introduce new personality funciton start()
In do_md_run(), md threads should not wake up until the array is fully initialized in md_run(). However, in raid5_run(), raid5-cache may wake up mddev->thread to flush stripes that need to be written back. This design doesn't break badly right now. But it could lead to bad bug in the future. This patch tries to resolve this problem by splitting start up work into two personality functions, run() and start(). Tasks that do not require the md threads should go into run(), while task that require the md threads go into start(). r5l_load_log() is moved to raid5_start(), so it is not called until the md threads are started in do_md_run(). Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-raid.c9
-rw-r--r--drivers/md/md.c26
-rw-r--r--drivers/md/md.h8
-rw-r--r--drivers/md/raid5-cache.c22
-rw-r--r--drivers/md/raid5-log.h1
-rw-r--r--drivers/md/raid5.c10
6 files changed, 65 insertions, 11 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 6319d846e0ad..e5ef0757fe23 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3151,6 +3151,14 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
+ r = md_start(&rs->md);
+
+ if (r) {
+ ti->error = "Failed to start raid array";
+ mddev_unlock(&rs->md);
+ goto bad_md_start;
+ }
+
rs->callbacks.congested_fn = raid_is_congested;
dm_table_add_target_callbacks(ti->table, &rs->callbacks);
@@ -3198,6 +3206,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
mddev_unlock(&rs->md);
return 0;
+bad_md_start:
bad_journal_mode_set:
bad_stripe_cache:
bad_check_reshape:
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4e4dee0ec2de..a71adb3c34b9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5560,11 +5560,6 @@ int md_run(struct mddev *mddev)
if (start_readonly && mddev->ro == 0)
mddev->ro = 2; /* read-only, but switch on first write */
- /*
- * NOTE: some pers->run(), for example r5l_recovery_log(), wakes
- * up mddev->thread. It is important to initialize critical
- * resources for mddev->thread BEFORE calling pers->run().
- */
err = pers->run(mddev);
if (err)
pr_warn("md: pers->run() failed ...\n");
@@ -5678,6 +5673,9 @@ static int do_md_run(struct mddev *mddev)
if (mddev_is_clustered(mddev))
md_allow_write(mddev);
+ /* run start up tasks that require md_thread */
+ md_start(mddev);
+
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
@@ -5689,6 +5687,21 @@ out:
return err;
}
+int md_start(struct mddev *mddev)
+{
+ int ret = 0;
+
+ if (mddev->pers->start) {
+ set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
+ md_wakeup_thread(mddev->thread);
+ ret = mddev->pers->start(mddev);
+ clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
+ md_wakeup_thread(mddev->sync_thread);
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(md_start);
+
static int restart_array(struct mddev *mddev)
{
struct gendisk *disk = mddev->gendisk;
@@ -8169,7 +8182,8 @@ void md_do_sync(struct md_thread *thread)
int ret;
/* just incase thread restarts... */
- if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
+ if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
+ test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
return;
if (mddev->ro) {/* never try to sync a read-only array */
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 7d6bcf0eba0c..be8f72a9e30b 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -485,6 +485,7 @@ enum recovery_flags {
MD_RECOVERY_RESHAPE, /* A reshape is happening */
MD_RECOVERY_FROZEN, /* User request to abort, and not restart, any action */
MD_RECOVERY_ERROR, /* sync-action interrupted because io-error */
+ MD_RECOVERY_WAIT, /* waiting for pers->start() to finish */
};
static inline int __must_check mddev_lock(struct mddev *mddev)
@@ -523,7 +524,13 @@ struct md_personality
struct list_head list;
struct module *owner;
bool (*make_request)(struct mddev *mddev, struct bio *bio);
+ /*
+ * start up works that do NOT require md_thread. tasks that
+ * requires md_thread should go into start()
+ */
int (*run)(struct mddev *mddev);
+ /* start up works that require md threads */
+ int (*start)(struct mddev *mddev);
void (*free)(struct mddev *mddev, void *priv);
void (*status)(struct seq_file *seq, struct mddev *mddev);
/* error_handler must set ->faulty and clear ->in_sync
@@ -687,6 +694,7 @@ extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
extern void mddev_init(struct mddev *mddev);
extern int md_run(struct mddev *mddev);
+extern int md_start(struct mddev *mddev);
extern void md_stop(struct mddev *mddev);
extern void md_stop_writes(struct mddev *mddev);
extern int md_rdev_init(struct md_rdev *rdev);
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 39f31f07ffe9..f259a5fd3fbd 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2448,7 +2448,6 @@ static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log,
raid5_release_stripe(sh);
}
- md_wakeup_thread(conf->mddev->thread);
/* reuse conf->wait_for_quiescent in recovery */
wait_event(conf->wait_for_quiescent,
atomic_read(&conf->active_stripes) == 0);
@@ -3036,6 +3035,23 @@ ioerr:
return ret;
}
+int r5l_start(struct r5l_log *log)
+{
+ int ret;
+
+ if (!log)
+ return 0;
+
+ ret = r5l_load_log(log);
+ if (ret) {
+ struct mddev *mddev = log->rdev->mddev;
+ struct r5conf *conf = mddev->private;
+
+ r5l_exit_log(conf);
+ }
+ return ret;
+}
+
void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
{
struct r5conf *conf = mddev->private;
@@ -3138,13 +3154,9 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
rcu_assign_pointer(conf->log, log);
- if (r5l_load_log(log))
- goto error;
-
set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
return 0;
-error:
rcu_assign_pointer(conf->log, NULL);
md_unregister_thread(&log->reclaim_thread);
reclaim_thread:
diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h
index 284578b0a349..3860041e8b74 100644
--- a/drivers/md/raid5-log.h
+++ b/drivers/md/raid5-log.h
@@ -32,6 +32,7 @@ extern struct md_sysfs_entry r5c_journal_mode;
extern void r5c_update_on_rdev_error(struct mddev *mddev,
struct md_rdev *rdev);
extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
+extern int r5l_start(struct r5l_log *log);
extern struct dma_async_tx_descriptor *
ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 98ce4272ace9..5a2a29bd02dd 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -8364,6 +8364,13 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
return err;
}
+static int raid5_start(struct mddev *mddev)
+{
+ struct r5conf *conf = mddev->private;
+
+ return r5l_start(conf->log);
+}
+
static struct md_personality raid6_personality =
{
.name = "raid6",
@@ -8371,6 +8378,7 @@ static struct md_personality raid6_personality =
.owner = THIS_MODULE,
.make_request = raid5_make_request,
.run = raid5_run,
+ .start = raid5_start,
.free = raid5_free,
.status = raid5_status,
.error_handler = raid5_error,
@@ -8395,6 +8403,7 @@ static struct md_personality raid5_personality =
.owner = THIS_MODULE,
.make_request = raid5_make_request,
.run = raid5_run,
+ .start = raid5_start,
.free = raid5_free,
.status = raid5_status,
.error_handler = raid5_error,
@@ -8420,6 +8429,7 @@ static struct md_personality raid4_personality =
.owner = THIS_MODULE,
.make_request = raid5_make_request,
.run = raid5_run,
+ .start = raid5_start,
.free = raid5_free,
.status = raid5_status,
.error_handler = raid5_error,
OpenPOWER on IntegriCloud