summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuoqing Jiang <guoqing.jiang@cloud.ionos.com>2019-12-23 10:49:00 +0100
committerSong Liu <songliubraving@fb.com>2020-01-13 11:44:10 -0800
commit69b00b5bb23552d43e8bbed73ef6624604bb94a2 (patch)
tree6192d67dc8dab6d064c370666d990f22081c189b
parent4d26d32fe4dafd29e168addb7c11949a36e7e5f8 (diff)
downloadtalos-op-linux-69b00b5bb23552d43e8bbed73ef6624604bb94a2.tar.gz
talos-op-linux-69b00b5bb23552d43e8bbed73ef6624604bb94a2.zip
md: introduce a new struct for IO serialization
Obviously, IO serialization could cause the degradation of performance a lot. In order to reduce the degradation, so a rb interval tree is added in raid1 to speed up the check of collision. So, a rb root is needed in md_rdev, then abstract all the serialize related members to a new struct (serial_in_rdev), embed it into md_rdev. Of course, we need to free the struct if it is not needed anymore, so rdev/rdevs_uninit_serial are added accordingly. And they should be called when destroty memory pool or can't alloc memory. And we need to consider to call mddev_destroy_serial_pool in case serialize_policy/write-behind is disabled, bitmap is destroyed or in __md_stop_writes. Signed-off-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com> Signed-off-by: Song Liu <songliubraving@fb.com>
-rw-r--r--drivers/md/md-bitmap.c12
-rw-r--r--drivers/md/md.c80
-rw-r--r--drivers/md/md.h26
-rw-r--r--drivers/md/raid1.c61
4 files changed, 116 insertions, 63 deletions
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 92f0d45946e8..e230052c2107 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -1789,10 +1789,8 @@ void md_bitmap_destroy(struct mddev *mddev)
return;
md_bitmap_wait_behind_writes(mddev);
- if (!mddev->serialize_policy) {
- mempool_destroy(mddev->serial_info_pool);
- mddev->serial_info_pool = NULL;
- }
+ if (!mddev->serialize_policy)
+ mddev_destroy_serial_pool(mddev, NULL, true);
mutex_lock(&mddev->bitmap_info.mutex);
spin_lock(&mddev->lock);
@@ -2478,10 +2476,8 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
mddev->bitmap_info.max_write_behind = backlog;
if (!backlog && mddev->serial_info_pool) {
/* serial_info_pool is not needed if backlog is zero */
- if (!mddev->serialize_policy) {
- mempool_destroy(mddev->serial_info_pool);
- mddev->serial_info_pool = NULL;
- }
+ if (!mddev->serialize_policy)
+ mddev_destroy_serial_pool(mddev, NULL, false);
} else if (backlog && !mddev->serial_info_pool) {
/* serial_info_pool is needed since backlog is not zero */
struct md_rdev *rdev;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 788559f42d43..9c4e61c988ac 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -125,25 +125,59 @@ static inline int speed_max(struct mddev *mddev)
mddev->sync_speed_max : sysctl_speed_limit_max;
}
+static void rdev_uninit_serial(struct md_rdev *rdev)
+{
+ if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
+ return;
+
+ kfree(rdev->serial);
+ rdev->serial = NULL;
+}
+
+static void rdevs_uninit_serial(struct mddev *mddev)
+{
+ struct md_rdev *rdev;
+
+ rdev_for_each(rdev, mddev)
+ rdev_uninit_serial(rdev);
+}
+
static int rdev_init_serial(struct md_rdev *rdev)
{
- spin_lock_init(&rdev->serial_list_lock);
- INIT_LIST_HEAD(&rdev->serial_list);
- init_waitqueue_head(&rdev->serial_io_wait);
+ struct serial_in_rdev *serial = NULL;
+
+ if (test_bit(CollisionCheck, &rdev->flags))
+ return 0;
+
+ serial = kmalloc(sizeof(struct serial_in_rdev), GFP_KERNEL);
+ if (!serial)
+ return -ENOMEM;
+
+ spin_lock_init(&serial->serial_lock);
+ serial->serial_rb = RB_ROOT_CACHED;
+ init_waitqueue_head(&serial->serial_io_wait);
+ rdev->serial = serial;
set_bit(CollisionCheck, &rdev->flags);
- return 1;
+ return 0;
}
-static void rdevs_init_serial(struct mddev *mddev)
+static int rdevs_init_serial(struct mddev *mddev)
{
struct md_rdev *rdev;
+ int ret = 0;
rdev_for_each(rdev, mddev) {
- if (test_bit(CollisionCheck, &rdev->flags))
- continue;
- rdev_init_serial(rdev);
+ ret = rdev_init_serial(rdev);
+ if (ret)
+ break;
}
+
+ /* Free all resources if pool is not existed */
+ if (ret && !mddev->serial_info_pool)
+ rdevs_uninit_serial(mddev);
+
+ return ret;
}
/*
@@ -166,6 +200,8 @@ static int rdev_need_serial(struct md_rdev *rdev)
void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
bool is_suspend)
{
+ int ret = 0;
+
if (rdev && !rdev_need_serial(rdev) &&
!test_bit(CollisionCheck, &rdev->flags))
return;
@@ -174,9 +210,11 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
mddev_suspend(mddev);
if (!rdev)
- rdevs_init_serial(mddev);
+ ret = rdevs_init_serial(mddev);
else
- rdev_init_serial(rdev);
+ ret = rdev_init_serial(rdev);
+ if (ret)
+ goto abort;
if (mddev->serial_info_pool == NULL) {
unsigned int noio_flag;
@@ -186,9 +224,13 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
sizeof(struct serial_info));
memalloc_noio_restore(noio_flag);
- if (!mddev->serial_info_pool)
+ if (!mddev->serial_info_pool) {
+ rdevs_uninit_serial(mddev);
pr_err("can't alloc memory pool for serialization\n");
+ }
}
+
+abort:
if (!is_suspend)
mddev_resume(mddev);
}
@@ -199,8 +241,8 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
* 2. when bitmap is destroyed while policy is not enabled.
* 3. for disable policy, the pool is destroyed only when no rdev needs it.
*/
-static void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
- bool is_suspend)
+void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
+ bool is_suspend)
{
if (rdev && !test_bit(CollisionCheck, &rdev->flags))
return;
@@ -213,8 +255,9 @@ static void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
mddev_suspend(mddev);
rdev_for_each(temp, mddev) {
if (!rdev) {
- if (!rdev_need_serial(temp))
- clear_bit(CollisionCheck, &temp->flags);
+ if (!mddev->serialize_policy ||
+ !rdev_need_serial(temp))
+ rdev_uninit_serial(temp);
else
num++;
} else if (temp != rdev &&
@@ -223,7 +266,7 @@ static void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
}
if (rdev)
- clear_bit(CollisionCheck, &rdev->flags);
+ rdev_uninit_serial(rdev);
if (num)
pr_info("The mempool could be used by other devices\n");
@@ -6117,8 +6160,9 @@ static void __md_stop_writes(struct mddev *mddev)
mddev->in_sync = 1;
md_update_sb(mddev, 1);
}
- mempool_destroy(mddev->serial_info_pool);
- mddev->serial_info_pool = NULL;
+ /* disable policy to guarantee rdevs free resources for serialization */
+ mddev->serialize_policy = 0;
+ mddev_destroy_serial_pool(mddev, NULL, true);
}
void md_stop_writes(struct mddev *mddev)
diff --git a/drivers/md/md.h b/drivers/md/md.h
index f51a3afaee1b..acd681939112 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -32,6 +32,16 @@
* be retried.
*/
#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
+
+/*
+ * The struct embedded in rdev is used to serialize IO.
+ */
+struct serial_in_rdev {
+ struct rb_root_cached serial_rb;
+ spinlock_t serial_lock;
+ wait_queue_head_t serial_io_wait;
+};
+
/*
* MD's 'extended' device
*/
@@ -110,12 +120,7 @@ struct md_rdev {
* in superblock.
*/
- /*
- * The members for check collision of write IOs.
- */
- struct list_head serial_list;
- spinlock_t serial_list_lock;
- wait_queue_head_t serial_io_wait;
+ struct serial_in_rdev *serial; /* used for raid1 io serialization */
struct work_struct del_work; /* used for delayed sysfs removal */
@@ -266,9 +271,10 @@ enum mddev_sb_flags {
#define NR_SERIAL_INFOS 8
/* record current range of serialize IOs */
struct serial_info {
- sector_t lo;
- sector_t hi;
- struct list_head list;
+ struct rb_node node;
+ sector_t start; /* start sector of rb node */
+ sector_t last; /* end sector of rb node */
+ sector_t _subtree_last; /* highest sector in subtree of rb node */
};
struct mddev {
@@ -740,6 +746,8 @@ extern void md_update_sb(struct mddev *mddev, int force);
extern void md_kick_rdev_from_array(struct md_rdev * rdev);
extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
bool is_suspend);
+extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
+ bool is_suspend);
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 3ad2f5a59d08..5c6a03747448 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -29,6 +29,7 @@
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/ratelimit.h>
+#include <linux/interval_tree_generic.h>
#include <trace/events/block.h>
@@ -50,55 +51,58 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
#include "raid1-10.c"
+#define START(node) ((node)->start)
+#define LAST(node) ((node)->last)
+INTERVAL_TREE_DEFINE(struct serial_info, node, sector_t, _subtree_last,
+ START, LAST, static inline, raid1_rb);
+
static int check_and_add_serial(struct md_rdev *rdev, sector_t lo, sector_t hi)
{
- struct serial_info *wi, *temp_wi;
+ struct serial_info *si;
unsigned long flags;
int ret = 0;
struct mddev *mddev = rdev->mddev;
+ struct serial_in_rdev *serial = rdev->serial;
- wi = mempool_alloc(mddev->serial_info_pool, GFP_NOIO);
-
- spin_lock_irqsave(&rdev->serial_list_lock, flags);
- list_for_each_entry(temp_wi, &rdev->serial_list, list) {
- /* collision happened */
- if (hi > temp_wi->lo && lo < temp_wi->hi) {
- ret = -EBUSY;
- break;
- }
- }
+ si = mempool_alloc(mddev->serial_info_pool, GFP_NOIO);
+ spin_lock_irqsave(&serial->serial_lock, flags);
+ /* collision happened */
+ if (raid1_rb_iter_first(&serial->serial_rb, lo, hi))
+ ret = -EBUSY;
if (!ret) {
- wi->lo = lo;
- wi->hi = hi;
- list_add(&wi->list, &rdev->serial_list);
+ si->start = lo;
+ si->last = hi;
+ raid1_rb_insert(si, &serial->serial_rb);
} else
- mempool_free(wi, mddev->serial_info_pool);
- spin_unlock_irqrestore(&rdev->serial_list_lock, flags);
+ mempool_free(si, mddev->serial_info_pool);
+ spin_unlock_irqrestore(&serial->serial_lock, flags);
return ret;
}
static void remove_serial(struct md_rdev *rdev, sector_t lo, sector_t hi)
{
- struct serial_info *wi;
+ struct serial_info *si;
unsigned long flags;
int found = 0;
struct mddev *mddev = rdev->mddev;
-
- spin_lock_irqsave(&rdev->serial_list_lock, flags);
- list_for_each_entry(wi, &rdev->serial_list, list)
- if (hi == wi->hi && lo == wi->lo) {
- list_del(&wi->list);
- mempool_free(wi, mddev->serial_info_pool);
+ struct serial_in_rdev *serial = rdev->serial;
+
+ spin_lock_irqsave(&serial->serial_lock, flags);
+ for (si = raid1_rb_iter_first(&serial->serial_rb, lo, hi);
+ si; si = raid1_rb_iter_next(si, lo, hi)) {
+ if (si->start == lo && si->last == hi) {
+ raid1_rb_remove(si, &serial->serial_rb);
+ mempool_free(si, mddev->serial_info_pool);
found = 1;
break;
}
-
+ }
if (!found)
WARN(1, "The write IO is not recorded for serialization\n");
- spin_unlock_irqrestore(&rdev->serial_list_lock, flags);
- wake_up(&rdev->serial_io_wait);
+ spin_unlock_irqrestore(&serial->serial_lock, flags);
+ wake_up(&serial->serial_io_wait);
}
/*
@@ -1482,6 +1486,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
for (i = 0; i < disks; i++) {
struct bio *mbio = NULL;
struct md_rdev *rdev = conf->mirrors[i].rdev;
+ struct serial_in_rdev *serial = rdev->serial;
if (!r1_bio->bios[i])
continue;
@@ -1510,13 +1515,13 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
if (r1_bio->behind_master_bio) {
if (test_bit(CollisionCheck, &rdev->flags))
- wait_event(rdev->serial_io_wait,
+ wait_event(serial->serial_io_wait,
check_and_add_serial(rdev, lo, hi)
== 0);
if (test_bit(WriteMostly, &rdev->flags))
atomic_inc(&r1_bio->behind_remaining);
} else if (mddev->serialize_policy)
- wait_event(rdev->serial_io_wait,
+ wait_event(serial->serial_io_wait,
check_and_add_serial(rdev, lo, hi) == 0);
r1_bio->bios[i] = mbio;
OpenPOWER on IntegriCloud