From 7e6358d244e4706fe612a77b9c36519a33600ac0 Mon Sep 17 00:00:00 2001 From: "monty_pavel@sina.com" Date: Sat, 25 Nov 2017 01:43:50 +0800 Subject: dm: fix various targets to dm_register_target after module __init resources created A NULL pointer is seen if two concurrent "vgchange -ay -K " processes race to load the dm-thin-pool module: PID: 25992 TASK: ffff883cd7d23500 CPU: 4 COMMAND: "vgchange" #0 [ffff883cd743d600] machine_kexec at ffffffff81038fa9 0000001 [ffff883cd743d660] crash_kexec at ffffffff810c5992 0000002 [ffff883cd743d730] oops_end at ffffffff81515c90 0000003 [ffff883cd743d760] no_context at ffffffff81049f1b 0000004 [ffff883cd743d7b0] __bad_area_nosemaphore at ffffffff8104a1a5 0000005 [ffff883cd743d800] bad_area at ffffffff8104a2ce 0000006 [ffff883cd743d830] __do_page_fault at ffffffff8104aa6f 0000007 [ffff883cd743d950] do_page_fault at ffffffff81517bae 0000008 [ffff883cd743d980] page_fault at ffffffff81514f95 [exception RIP: kmem_cache_alloc+108] RIP: ffffffff8116ef3c RSP: ffff883cd743da38 RFLAGS: 00010046 RAX: 0000000000000004 RBX: ffffffff81121b90 RCX: ffff881bf1e78cc0 RDX: 0000000000000000 RSI: 00000000000000d0 RDI: 0000000000000000 RBP: ffff883cd743da68 R8: ffff881bf1a4eb00 R9: 0000000080042000 R10: 0000000000002000 R11: 0000000000000000 R12: 00000000000000d0 R13: 0000000000000000 R14: 00000000000000d0 R15: 0000000000000246 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 0000009 [ffff883cd743da70] mempool_alloc_slab at ffffffff81121ba5 0000010 [ffff883cd743da80] mempool_create_node at ffffffff81122083 0000011 [ffff883cd743dad0] mempool_create at ffffffff811220f4 0000012 [ffff883cd743dae0] pool_ctr at ffffffffa08de049 [dm_thin_pool] 0000013 [ffff883cd743dbd0] dm_table_add_target at ffffffffa0005f2f [dm_mod] 0000014 [ffff883cd743dc30] table_load at ffffffffa0008ba9 [dm_mod] 0000015 [ffff883cd743dc90] ctl_ioctl at ffffffffa0009dc4 [dm_mod] The race results in a NULL pointer because: Process A (vgchange -ay -K): a. send DM_LIST_VERSIONS_CMD ioctl; b. pool_target not registered; c. modprobe dm_thin_pool and wait until end. Process B (vgchange -ay -K): a. send DM_LIST_VERSIONS_CMD ioctl; b. pool_target registered; c. table_load->dm_table_add_target->pool_ctr; d. _new_mapping_cache is NULL and panic. Note: 1. process A and process B are two concurrent processes. 2. pool_target can be detected by process B but _new_mapping_cache initialization has not ended. To fix dm-thin-pool, and other targets (cache, multipath, and snapshot) with the same problem, simply dm_register_target() after all resources created during module init (as labelled with __init) are finished. Cc: stable@vger.kernel.org Signed-off-by: monty Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c8faa2b85842..35a2a2fa477f 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1957,13 +1957,6 @@ static int __init dm_multipath_init(void) { int r; - r = dm_register_target(&multipath_target); - if (r < 0) { - DMERR("request-based register failed %d", r); - r = -EINVAL; - goto bad_register_target; - } - kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0); if (!kmultipathd) { DMERR("failed to create workqueue kmpathd"); @@ -1985,13 +1978,20 @@ static int __init dm_multipath_init(void) goto bad_alloc_kmpath_handlerd; } + r = dm_register_target(&multipath_target); + if (r < 0) { + DMERR("request-based register failed %d", r); + r = -EINVAL; + goto bad_register_target; + } + return 0; +bad_register_target: + destroy_workqueue(kmpath_handlerd); bad_alloc_kmpath_handlerd: destroy_workqueue(kmultipathd); bad_alloc_kmultipathd: - dm_unregister_target(&multipath_target); -bad_register_target: return r; } -- cgit v1.2.3 From c1fd0abee0d52eb7e2871194b6c79d54792f515f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 7 Dec 2017 22:42:27 -0500 Subject: dm mpath: fix bio-based multipath queue_if_no_path handling Commit ca5beb76 ("dm mpath: micro-optimize the hot path relative to MPATHF_QUEUE_IF_NO_PATH") caused bio-based DM-multipath to fail mptest's "test_02_sdev_delete". Restoring the logic that existed prior to commit ca5beb76 fixes this bio-based DM-multipath regression. Also verified all mptest tests pass with request-based DM-multipath. This commit effectively reverts commit ca5beb76 -- but it does so without reintroducing the need to take the m->lock spinlock in must_push_back_{rq,bio}. Fixes: ca5beb76 ("dm mpath: micro-optimize the hot path relative to MPATHF_QUEUE_IF_NO_PATH") Cc: stable@vger.kernel.org # 4.12+ Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 7 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 35a2a2fa477f..f7810cc869ac 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -457,6 +457,38 @@ do { \ dm_noflush_suspending((m)->ti)); \ } while (0) +/* + * Check whether bios must be queued in the device-mapper core rather + * than here in the target. + * + * If MPATHF_QUEUE_IF_NO_PATH and MPATHF_SAVED_QUEUE_IF_NO_PATH hold + * the same value then we are not between multipath_presuspend() + * and multipath_resume() calls and we have no need to check + * for the DMF_NOFLUSH_SUSPENDING flag. + */ +static bool __must_push_back(struct multipath *m, unsigned long flags) +{ + return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) != + test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &flags)) && + dm_noflush_suspending(m->ti)); +} + +/* + * Following functions use READ_ONCE to get atomic access to + * all m->flags to avoid taking spinlock + */ +static bool must_push_back_rq(struct multipath *m) +{ + unsigned long flags = READ_ONCE(m->flags); + return test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) || __must_push_back(m, flags); +} + +static bool must_push_back_bio(struct multipath *m) +{ + unsigned long flags = READ_ONCE(m->flags); + return __must_push_back(m, flags); +} + /* * Map cloned requests (request-based multipath) */ @@ -478,7 +510,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, pgpath = choose_pgpath(m, nr_bytes); if (!pgpath) { - if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) + if (must_push_back_rq(m)) return DM_MAPIO_DELAY_REQUEUE; dm_report_EIO(m); /* Failed */ return DM_MAPIO_KILL; @@ -553,7 +585,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m } if (!pgpath) { - if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) + if (must_push_back_bio(m)) return DM_MAPIO_REQUEUE; dm_report_EIO(m); return DM_MAPIO_KILL; @@ -651,8 +683,7 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags, (save_old_value && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) || (!save_old_value && queue_if_no_path)); - assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, - queue_if_no_path || dm_noflush_suspending(m->ti)); + assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path); spin_unlock_irqrestore(&m->lock, flags); if (!queue_if_no_path) { @@ -1486,7 +1517,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone, fail_path(pgpath); if (atomic_read(&m->nr_valid_paths) == 0 && - !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + !must_push_back_rq(m)) { if (error == BLK_STS_IOERR) dm_report_EIO(m); /* complete with the original error */ @@ -1521,8 +1552,12 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, if (atomic_read(&m->nr_valid_paths) == 0 && !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { - dm_report_EIO(m); - *error = BLK_STS_IOERR; + if (must_push_back_bio(m)) { + r = DM_ENDIO_REQUEUE; + } else { + dm_report_EIO(m); + *error = BLK_STS_IOERR; + } goto done; } -- cgit v1.2.3