From 06eb061f48594aa369f6e852b352410298b317a8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 7 Apr 2017 16:50:44 -0700 Subject: dm mpath: requeue after a small delay if blk_get_request() fails If blk_get_request() returns ENODEV then multipath_clone_and_map() causes a request to be requeued immediately. This can cause a kworker thread to spend 100% of the CPU time of a single core in __blk_mq_run_hw_queue() and also can cause device removal to never finish. Avoid this by only requeuing after a delay if blk_get_request() fails. Additionally, reduce the requeue delay. Cc: stable@vger.kernel.org # 4.9+ Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 7f223dbed49f..a4cc4d42117b 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -484,7 +484,6 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, struct request **__clone) { struct multipath *m = ti->private; - int r = DM_MAPIO_REQUEUE; size_t nr_bytes = blk_rq_bytes(rq); struct pgpath *pgpath; struct block_device *bdev; @@ -503,7 +502,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { pg_init_all_paths(m); - return r; + return DM_MAPIO_REQUEUE; } memset(mpio, 0, sizeof(*mpio)); @@ -517,7 +516,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, GFP_ATOMIC); if (IS_ERR(clone)) { /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ - return r; + return DM_MAPIO_DELAY_REQUEUE; } clone->bio = clone->biotail = NULL; clone->rq_disk = bdev->bd_disk; -- cgit v1.2.1 From 89bfce763e43fa4897e0d3af6b29ed909df64cfd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 27 Apr 2017 10:11:14 -0700 Subject: dm mpath: split and rename activate_path() to prepare for its expanded use activate_path() is renamed to activate_path_work() which now calls activate_or_offline_path(). activate_or_offline_path() will be used by the next commit. Signed-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Christoph Hellwig Cc: Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index a4cc4d42117b..4b891d9ff349 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -111,7 +111,8 @@ typedef int (*action_fn) (struct pgpath *pgpath); static struct workqueue_struct *kmultipathd, *kmpath_handlerd; static void trigger_event(struct work_struct *work); -static void activate_path(struct work_struct *work); +static void activate_or_offline_path(struct pgpath *pgpath); +static void activate_path_work(struct work_struct *work); static void process_queued_bios(struct work_struct *work); /*----------------------------------------------- @@ -136,7 +137,7 @@ static struct pgpath *alloc_pgpath(void) if (pgpath) { pgpath->is_active = true; - INIT_DELAYED_WORK(&pgpath->activate_path, activate_path); + INIT_DELAYED_WORK(&pgpath->activate_path, activate_path_work); } return pgpath; @@ -1436,10 +1437,8 @@ out: spin_unlock_irqrestore(&m->lock, flags); } -static void activate_path(struct work_struct *work) +static void activate_or_offline_path(struct pgpath *pgpath) { - struct pgpath *pgpath = - container_of(work, struct pgpath, activate_path.work); struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); if (pgpath->is_active && !blk_queue_dying(q)) @@ -1448,6 +1447,14 @@ static void activate_path(struct work_struct *work) pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED); } +static void activate_path_work(struct work_struct *work) +{ + struct pgpath *pgpath = + container_of(work, struct pgpath, activate_path.work); + + activate_or_offline_path(pgpath); +} + static int noretry_error(int error) { switch (error) { -- cgit v1.2.1 From 7083abbbfc4fa706ff72d27d33a5214881979336 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 27 Apr 2017 10:11:15 -0700 Subject: dm mpath: avoid that path removal can trigger an infinite loop If blk_get_request() fails, check whether the failure is due to a path being removed. If that is the case, fail the path by triggering a call to fail_path(). This avoids that the following scenario can be encountered while removing paths: * CPU usage of a kworker thread jumps to 100%. * Removing the DM device becomes impossible. Delay requeueing if blk_get_request() returns -EBUSY or -EWOULDBLOCK, and the queue is not dying, because in these cases immediate requeuing is inappropriate. Signed-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Christoph Hellwig Cc: Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 4b891d9ff349..f3c79f188747 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -489,6 +489,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, struct pgpath *pgpath; struct block_device *bdev; struct dm_mpath_io *mpio = get_mpio(map_context); + struct request_queue *q; struct request *clone; /* Do we need to select a new pgpath? */ @@ -511,12 +512,18 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, mpio->nr_bytes = nr_bytes; bdev = pgpath->path.dev->bdev; - - clone = blk_get_request(bdev_get_queue(bdev), - rq->cmd_flags | REQ_NOMERGE, - GFP_ATOMIC); + q = bdev_get_queue(bdev); + clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, GFP_ATOMIC); if (IS_ERR(clone)) { /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ + bool queue_dying = blk_queue_dying(q); + DMERR_LIMIT("blk_get_request() returned %ld%s - requeuing", + PTR_ERR(clone), queue_dying ? " (path offline)" : ""); + if (queue_dying) { + atomic_inc(&m->pg_init_in_progress); + activate_or_offline_path(pgpath); + return DM_MAPIO_REQUEUE; + } return DM_MAPIO_DELAY_REQUEUE; } clone->bio = clone->biotail = NULL; -- cgit v1.2.1 From c1d7ecf7ca11d0edd3085262c8597203440d056c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 27 Apr 2017 10:11:16 -0700 Subject: dm mpath: delay requeuing while path initialization is in progress Requeuing a request immediately while path initialization is ongoing causes high CPU usage, something that is undesired. Hence delay requeuing while path initialization is in progress. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Cc: Christoph Hellwig Cc: Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index f3c79f188747..d85baffa3377 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -322,13 +322,16 @@ static int __pg_init_all_paths(struct multipath *m) return atomic_read(&m->pg_init_in_progress); } -static void pg_init_all_paths(struct multipath *m) +static int pg_init_all_paths(struct multipath *m) { + int ret; unsigned long flags; spin_lock_irqsave(&m->lock, flags); - __pg_init_all_paths(m); + ret = __pg_init_all_paths(m); spin_unlock_irqrestore(&m->lock, flags); + + return ret; } static void __switch_pg(struct multipath *m, struct priority_group *pg) @@ -503,7 +506,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, return -EIO; /* Failed */ } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { - pg_init_all_paths(m); + if (pg_init_all_paths(m)) + return DM_MAPIO_DELAY_REQUEUE; return DM_MAPIO_REQUEUE; } -- cgit v1.2.1 From b194679fac3067e785f432109c69759aa75b038c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 27 Apr 2017 10:11:22 -0700 Subject: dm mpath: verify __pg_init_all_paths locking assumptions at runtime Verify at runtime that __pg_init_all_paths() is called with multipath.lock held if lockdep is enabled. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d85baffa3377..730ec0be1afa 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -298,6 +298,8 @@ static int __pg_init_all_paths(struct multipath *m) struct pgpath *pgpath; unsigned long pg_init_delay = 0; + lockdep_assert_held(&m->lock); + if (atomic_read(&m->pg_init_in_progress) || test_bit(MPATHF_PG_INIT_DISABLED, &m->flags)) return 0; -- cgit v1.2.1 From 7e0d574f2683a2346c978613a72ff07afc89b17a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 27 Apr 2017 10:11:23 -0700 Subject: dm: introduce enum dm_queue_mode to cleanup related code Introduce an enumeration type for the queue mode. This patch does not change any functionality but makes the DM code easier to read. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 730ec0be1afa..8336332bd61f 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -90,7 +90,7 @@ struct multipath { atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */ atomic_t pg_init_count; /* Number of times pg_init called */ - unsigned queue_mode; + enum dm_queue_mode queue_mode; struct mutex work_mutex; struct work_struct trigger_event; @@ -1700,6 +1700,9 @@ static void multipath_status(struct dm_target *ti, status_type_t type, case DM_TYPE_MQ_REQUEST_BASED: DMEMIT("queue_mode mq "); break; + default: + WARN_ON_ONCE(true); + break; } } } -- cgit v1.2.1 From ca5beb76c32af33e5be4e01c85c8bd5a067c4543 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 27 Apr 2017 10:11:24 -0700 Subject: dm mpath: micro-optimize the hot path relative to MPATHF_QUEUE_IF_NO_PATH Instead of checking MPATHF_QUEUE_IF_NO_PATH, MPATHF_SAVED_QUEUE_IF_NO_PATH and the no_flush flag to decide whether or not to push back a request (or bio) if there are no paths available, only clear MPATHF_QUEUE_IF_NO_PATH in queue_if_no_path() if no_flush has not been set. The result is that only a single bit has to be tested in the hot path to decide whether or not a request must be pushed back and also that m->lock does not have to be taken in the hot path. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 70 ++++++++------------------------------------------- 1 file changed, 11 insertions(+), 59 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 8336332bd61f..5cb1beccd1e2 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -441,47 +441,6 @@ failed: return NULL; } -/* - * Check whether bios must be queued in the device-mapper core rather - * than here in the target. - * - * If m->queue_if_no_path and m->saved_queue_if_no_path hold the - * same value then we are not between multipath_presuspend() - * and multipath_resume() calls and we have no need to check - * for the DMF_NOFLUSH_SUSPENDING flag. - */ -static bool __must_push_back(struct multipath *m) -{ - return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) != - test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) && - dm_noflush_suspending(m->ti)); -} - -static bool must_push_back_rq(struct multipath *m) -{ - bool r; - unsigned long flags; - - spin_lock_irqsave(&m->lock, flags); - r = (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || - __must_push_back(m)); - spin_unlock_irqrestore(&m->lock, flags); - - return r; -} - -static bool must_push_back_bio(struct multipath *m) -{ - bool r; - unsigned long flags; - - spin_lock_irqsave(&m->lock, flags); - r = __must_push_back(m); - spin_unlock_irqrestore(&m->lock, flags); - - return r; -} - /* * Map cloned requests (request-based multipath) */ @@ -503,7 +462,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, pgpath = choose_pgpath(m, nr_bytes); if (!pgpath) { - if (must_push_back_rq(m)) + if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) return DM_MAPIO_DELAY_REQUEUE; return -EIO; /* Failed */ } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || @@ -580,9 +539,9 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m } if (!pgpath) { - if (!must_push_back_bio(m)) - return -EIO; - return DM_MAPIO_REQUEUE; + if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) + return DM_MAPIO_REQUEUE; + return -EIO; } mpio->pgpath = pgpath; @@ -674,7 +633,7 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, else clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); } - if (queue_if_no_path) + if (queue_if_no_path || dm_noflush_suspending(m->ti)) set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); else clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); @@ -1519,12 +1478,9 @@ static int do_end_io(struct multipath *m, struct request *clone, if (mpio->pgpath) fail_path(mpio->pgpath); - if (!atomic_read(&m->nr_valid_paths)) { - if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { - if (!must_push_back_rq(m)) - r = -EIO; - } - } + if (atomic_read(&m->nr_valid_paths) == 0 && + !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) + r = -EIO; return r; } @@ -1565,13 +1521,9 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone, if (mpio->pgpath) fail_path(mpio->pgpath); - if (!atomic_read(&m->nr_valid_paths)) { - if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { - if (!must_push_back_bio(m)) - return -EIO; - return DM_ENDIO_REQUEUE; - } - } + if (atomic_read(&m->nr_valid_paths) == 0 && + !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) + return -EIO; /* Queue for the daemon to resubmit */ dm_bio_restore(get_bio_details_from_bio(clone), clone); -- cgit v1.2.1 From 9a8ac3ae682e8760afebab71556a9a8d1b18f906 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 27 Apr 2017 10:11:25 -0700 Subject: dm mpath: cleanup QUEUE_IF_NO_PATH bit manipulation by introducing assign_bit() No functional change but makes the code easier to read. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 5cb1beccd1e2..cc529537c8ce 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -612,6 +612,14 @@ static void process_queued_bios(struct work_struct *work) blk_finish_plug(&plug); } +static void assign_bit(bool value, long nr, unsigned long *addr) +{ + if (value) + set_bit(nr, addr); + else + clear_bit(nr, addr); +} + /* * If we run out of usable paths, should we queue I/O or error it? */ @@ -621,23 +629,11 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, unsigned long flags; spin_lock_irqsave(&m->lock, flags); - - if (save_old_value) { - if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) - set_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); - else - clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); - } else { - if (queue_if_no_path) - set_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); - else - clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); - } - if (queue_if_no_path || dm_noflush_suspending(m->ti)) - set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); - else - clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); - + assign_bit((save_old_value && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) || + (!save_old_value && queue_if_no_path), + MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); + assign_bit(queue_if_no_path || dm_noflush_suspending(m->ti), + MPATHF_QUEUE_IF_NO_PATH, &m->flags); spin_unlock_irqrestore(&m->lock, flags); if (!queue_if_no_path) { @@ -1589,10 +1585,8 @@ static void multipath_resume(struct dm_target *ti) unsigned long flags; spin_lock_irqsave(&m->lock, flags); - if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) - set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); - else - clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); + assign_bit(test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags), + MPATHF_QUEUE_IF_NO_PATH, &m->flags); spin_unlock_irqrestore(&m->lock, flags); } -- cgit v1.2.1 From 86331f39a5935b092d3ea59446d416563ed05d16 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 27 Apr 2017 10:11:26 -0700 Subject: dm mpath: make it easier to detect unintended I/O request flushes I/O errors triggered by multipathd incorrectly not enabling the no-flush flag for DM_DEVICE_SUSPEND or DM_DEVICE_RESUME are hard to debug. Add more logging to make it easier to debug this. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index cc529537c8ce..fd7cdc4ce2a5 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -441,6 +441,23 @@ failed: return NULL; } +/* + * dm_report_EIO() is a macro instead of a function to make pr_debug() + * report the function name and line number of the function from which + * it has been invoked. + */ +#define dm_report_EIO(m) \ +({ \ + struct mapped_device *md = dm_table_get_md((m)->ti->table); \ + \ + pr_debug("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d\n", \ + dm_device_name(md), \ + test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \ + test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \ + dm_noflush_suspending((m)->ti)); \ + -EIO; \ +}) + /* * Map cloned requests (request-based multipath) */ @@ -464,7 +481,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, if (!pgpath) { if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) return DM_MAPIO_DELAY_REQUEUE; - return -EIO; /* Failed */ + return dm_report_EIO(m); /* Failed */ } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { if (pg_init_all_paths(m)) @@ -541,7 +558,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m if (!pgpath) { if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) return DM_MAPIO_REQUEUE; - return -EIO; + return dm_report_EIO(m); } mpio->pgpath = pgpath; @@ -1476,7 +1493,7 @@ static int do_end_io(struct multipath *m, struct request *clone, if (atomic_read(&m->nr_valid_paths) == 0 && !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) - r = -EIO; + r = dm_report_EIO(m); return r; } @@ -1519,7 +1536,7 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone, if (atomic_read(&m->nr_valid_paths) == 0 && !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) - return -EIO; + return dm_report_EIO(m); /* Queue for the daemon to resubmit */ dm_bio_restore(get_bio_details_from_bio(clone), clone); -- cgit v1.2.1