summaryrefslogtreecommitdiffstats
path: root/drivers/md/md.h
diff options
context:
space:
mode:
authorGuoqing Jiang <gqjiang@suse.com>2019-06-19 17:30:46 +0800
committerSong Liu <songliubraving@fb.com>2019-06-20 16:35:59 -0700
commit3e148a3209792e04f63ec99701235c960765fc9a (patch)
tree2661881c5231d9ccf5ae1f92d078572dbdaf6850 /drivers/md/md.h
parent0ce353794b6c4dc88592b942e94b33cd1bf2ef54 (diff)
downloadtalos-op-linux-3e148a3209792e04f63ec99701235c960765fc9a.tar.gz
talos-op-linux-3e148a3209792e04f63ec99701235c960765fc9a.zip
md/raid1: fix potential data inconsistency issue with write behind device
For write-behind mode, we think write IO is complete once it has reached all the non-writemostly devices. It works fine for single queue devices. But for multiqueue device, if there are lots of IOs come from upper layer, then the write-behind device could issue those IOs to different queues, depends on the each queue's delay, so there is no guarantee that those IOs can arrive in order. To address the issue, we need to check the collision among write behind IOs, we can only continue without collision, otherwise wait for the completion of previous collisioned IO. And WBCollision is introduced for multiqueue device which is worked under write-behind mode. But this patch doesn't handle below cases which could have the data inconsistency issue as well, these cases will be handled in later patches. 1. modify max_write_behind by write backlog node. 2. add or remove array's bitmap dynamically. 3. the change of member disk. Reviewed-by: NeilBrown <neilb@suse.com> Signed-off-by: Guoqing Jiang <gqjiang@suse.com> Signed-off-by: Song Liu <songliubraving@fb.com>
Diffstat (limited to 'drivers/md/md.h')
-rw-r--r--drivers/md/md.h21
1 files changed, 21 insertions, 0 deletions
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 7c930c091193..d449d514cff9 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -109,6 +109,14 @@ struct md_rdev {
* for reporting to userspace and storing
* in superblock.
*/
+
+ /*
+ * The members for check collision of write behind IOs.
+ */
+ struct list_head wb_list;
+ spinlock_t wb_list_lock;
+ wait_queue_head_t wb_io_wait;
+
struct work_struct del_work; /* used for delayed sysfs removal */
struct kernfs_node *sysfs_state; /* handle for 'state'
@@ -193,6 +201,10 @@ enum flag_bits {
* it didn't fail, so don't use FailFast
* any more for metadata
*/
+ WBCollisionCheck, /*
+ * multiqueue device should check if there
+ * is collision between write behind bios.
+ */
};
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
@@ -245,6 +257,14 @@ enum mddev_sb_flags {
MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */
};
+#define NR_WB_INFOS 8
+/* record current range of write behind IOs */
+struct wb_info {
+ sector_t lo;
+ sector_t hi;
+ struct list_head list;
+};
+
struct mddev {
void *private;
struct md_personality *pers;
@@ -461,6 +481,7 @@ struct mddev {
*/
struct work_struct flush_work;
struct work_struct event_work; /* used by dm to report failure event */
+ mempool_t *wb_info_pool;
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
struct md_cluster_info *cluster_info;
unsigned int good_device_nr; /* good device num within cluster raid */
OpenPOWER on IntegriCloud