diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bio-integrity.c | 4 | ||||
-rw-r--r-- | block/bio.c | 77 | ||||
-rw-r--r-- | block/blk-core.c | 136 | ||||
-rw-r--r-- | block/blk-exec.c | 10 | ||||
-rw-r--r-- | block/blk-mq.c | 148 | ||||
-rw-r--r-- | block/blk.h | 5 | ||||
-rw-r--r-- | block/bounce.c | 3 | ||||
-rw-r--r-- | block/ioctl.c | 37 |
8 files changed, 208 insertions, 212 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 5cbd5d9ea61d..0436c21db7f2 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -361,7 +361,7 @@ static void bio_integrity_verify_fn(struct work_struct *work) /* Restore original bio completion handler */ bio->bi_end_io = bip->bip_end_io; - bio_endio_nodec(bio, error); + bio_endio(bio, error); } /** @@ -388,7 +388,7 @@ void bio_integrity_endio(struct bio *bio, int error) */ if (error) { bio->bi_end_io = bip->bip_end_io; - bio_endio_nodec(bio, error); + bio_endio(bio, error); return; } diff --git a/block/bio.c b/block/bio.c index f66a4eae16ee..259197d97de1 100644 --- a/block/bio.c +++ b/block/bio.c @@ -270,8 +270,8 @@ void bio_init(struct bio *bio) { memset(bio, 0, sizeof(*bio)); bio->bi_flags = 1 << BIO_UPTODATE; - atomic_set(&bio->bi_remaining, 1); - atomic_set(&bio->bi_cnt, 1); + atomic_set(&bio->__bi_remaining, 1); + atomic_set(&bio->__bi_cnt, 1); } EXPORT_SYMBOL(bio_init); @@ -292,8 +292,8 @@ void bio_reset(struct bio *bio) __bio_free(bio); memset(bio, 0, BIO_RESET_BYTES); - bio->bi_flags = flags|(1 << BIO_UPTODATE); - atomic_set(&bio->bi_remaining, 1); + bio->bi_flags = flags | (1 << BIO_UPTODATE); + atomic_set(&bio->__bi_remaining, 1); } EXPORT_SYMBOL(bio_reset); @@ -303,6 +303,17 @@ static void bio_chain_endio(struct bio *bio, int error) bio_put(bio); } +/* + * Increment chain count for the bio. Make sure the CHAIN flag update + * is visible before the raised count. + */ +static inline void bio_inc_remaining(struct bio *bio) +{ + bio->bi_flags |= (1 << BIO_CHAIN); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_remaining); +} + /** * bio_chain - chain bio completions * @bio: the target bio @@ -320,7 +331,7 @@ void bio_chain(struct bio *bio, struct bio *parent) bio->bi_private = parent; bio->bi_end_io = bio_chain_endio; - atomic_inc(&parent->bi_remaining); + bio_inc_remaining(parent); } EXPORT_SYMBOL(bio_chain); @@ -524,13 +535,17 @@ EXPORT_SYMBOL(zero_fill_bio); **/ void bio_put(struct bio *bio) { - BIO_BUG_ON(!atomic_read(&bio->bi_cnt)); - - /* - * last put frees it - */ - if (atomic_dec_and_test(&bio->bi_cnt)) + if (!bio_flagged(bio, BIO_REFFED)) bio_free(bio); + else { + BIO_BUG_ON(!atomic_read(&bio->__bi_cnt)); + + /* + * last put frees it + */ + if (atomic_dec_and_test(&bio->__bi_cnt)) + bio_free(bio); + } } EXPORT_SYMBOL(bio_put); @@ -1741,6 +1756,25 @@ void bio_flush_dcache_pages(struct bio *bi) EXPORT_SYMBOL(bio_flush_dcache_pages); #endif +static inline bool bio_remaining_done(struct bio *bio) +{ + /* + * If we're not chaining, then ->__bi_remaining is always 1 and + * we always end io on the first invocation. + */ + if (!bio_flagged(bio, BIO_CHAIN)) + return true; + + BUG_ON(atomic_read(&bio->__bi_remaining) <= 0); + + if (atomic_dec_and_test(&bio->__bi_remaining)) { + clear_bit(BIO_CHAIN, &bio->bi_flags); + return true; + } + + return false; +} + /** * bio_endio - end I/O on a bio * @bio: bio @@ -1758,15 +1792,13 @@ EXPORT_SYMBOL(bio_flush_dcache_pages); void bio_endio(struct bio *bio, int error) { while (bio) { - BUG_ON(atomic_read(&bio->bi_remaining) <= 0); - if (error) clear_bit(BIO_UPTODATE, &bio->bi_flags); else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) error = -EIO; - if (!atomic_dec_and_test(&bio->bi_remaining)) - return; + if (unlikely(!bio_remaining_done(bio))) + break; /* * Need to have a real endio function for chained bios, @@ -1790,21 +1822,6 @@ void bio_endio(struct bio *bio, int error) EXPORT_SYMBOL(bio_endio); /** - * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining - * @bio: bio - * @error: error, if any - * - * For code that has saved and restored bi_end_io; thing hard before using this - * function, probably you should've cloned the entire bio. - **/ -void bio_endio_nodec(struct bio *bio, int error) -{ - atomic_inc(&bio->bi_remaining); - bio_endio(bio, error); -} -EXPORT_SYMBOL(bio_endio_nodec); - -/** * bio_split - split a bio * @bio: bio to split * @sectors: number of sectors to split from the front of @bio diff --git a/block/blk-core.c b/block/blk-core.c index 03b5f8d77f37..f6ab750060fe 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -117,7 +117,7 @@ EXPORT_SYMBOL(blk_rq_init); static void req_bio_endio(struct request *rq, struct bio *bio, unsigned int nbytes, int error) { - if (error) + if (error && !(rq->cmd_flags & REQ_CLONE)) clear_bit(BIO_UPTODATE, &bio->bi_flags); else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) error = -EIO; @@ -128,7 +128,8 @@ static void req_bio_endio(struct request *rq, struct bio *bio, bio_advance(bio, nbytes); /* don't actually finish bio if it's part of flush sequence */ - if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) + if (bio->bi_iter.bi_size == 0 && + !(rq->cmd_flags & (REQ_FLUSH_SEQ|REQ_CLONE))) bio_endio(bio, error); } @@ -285,6 +286,7 @@ inline void __blk_run_queue_uncond(struct request_queue *q) q->request_fn(q); q->request_fn_active--; } +EXPORT_SYMBOL_GPL(__blk_run_queue_uncond); /** * __blk_run_queue - run a single device queue @@ -1525,7 +1527,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, * Caller must ensure !blk_queue_nomerges(q) beforehand. */ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, - unsigned int *request_count) + unsigned int *request_count, + struct request **same_queue_rq) { struct blk_plug *plug; struct request *rq; @@ -1545,8 +1548,16 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, list_for_each_entry_reverse(rq, plug_list, queuelist) { int el_ret; - if (rq->q == q) + if (rq->q == q) { (*request_count)++; + /* + * Only blk-mq multiple hardware queues case checks the + * rq in the same queue, there should be only one such + * rq in a queue + **/ + if (same_queue_rq) + *same_queue_rq = rq; + } if (rq->q != q || !blk_rq_merge_ok(rq, bio)) continue; @@ -1611,7 +1622,7 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio) * any locks. */ if (!blk_queue_nomerges(q) && - blk_attempt_plug_merge(q, bio, &request_count)) + blk_attempt_plug_merge(q, bio, &request_count, NULL)) return; spin_lock_irq(q->queue_lock); @@ -1718,8 +1729,6 @@ static void handle_bad_sector(struct bio *bio) bio->bi_rw, (unsigned long long)bio_end_sector(bio), (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); - - set_bit(BIO_EOF, &bio->bi_flags); } #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -2904,95 +2913,22 @@ int blk_lld_busy(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_lld_busy); -/** - * blk_rq_unprep_clone - Helper function to free all bios in a cloned request - * @rq: the clone request to be cleaned up - * - * Description: - * Free all bios in @rq for a cloned request. - */ -void blk_rq_unprep_clone(struct request *rq) -{ - struct bio *bio; - - while ((bio = rq->bio) != NULL) { - rq->bio = bio->bi_next; - - bio_put(bio); - } -} -EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); - -/* - * Copy attributes of the original request to the clone request. - * The actual data parts (e.g. ->cmd, ->sense) are not copied. - */ -static void __blk_rq_prep_clone(struct request *dst, struct request *src) +void blk_rq_prep_clone(struct request *dst, struct request *src) { dst->cpu = src->cpu; - dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; + dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK); + dst->cmd_flags |= REQ_NOMERGE | REQ_CLONE; dst->cmd_type = src->cmd_type; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); dst->nr_phys_segments = src->nr_phys_segments; dst->ioprio = src->ioprio; dst->extra_len = src->extra_len; -} - -/** - * blk_rq_prep_clone - Helper function to setup clone request - * @rq: the request to be setup - * @rq_src: original request to be cloned - * @bs: bio_set that bios for clone are allocated from - * @gfp_mask: memory allocation mask for bio - * @bio_ctr: setup function to be called for each clone bio. - * Returns %0 for success, non %0 for failure. - * @data: private data to be passed to @bio_ctr - * - * Description: - * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. - * The actual data parts of @rq_src (e.g. ->cmd, ->sense) - * are not copied, and copying such parts is the caller's responsibility. - * Also, pages which the original bios are pointing to are not copied - * and the cloned bios just point same pages. - * So cloned bios must be completed before original bios, which means - * the caller must complete @rq before @rq_src. - */ -int blk_rq_prep_clone(struct request *rq, struct request *rq_src, - struct bio_set *bs, gfp_t gfp_mask, - int (*bio_ctr)(struct bio *, struct bio *, void *), - void *data) -{ - struct bio *bio, *bio_src; - - if (!bs) - bs = fs_bio_set; - - __rq_for_each_bio(bio_src, rq_src) { - bio = bio_clone_fast(bio_src, gfp_mask, bs); - if (!bio) - goto free_and_out; - - if (bio_ctr && bio_ctr(bio, bio_src, data)) - goto free_and_out; - - if (rq->bio) { - rq->biotail->bi_next = bio; - rq->biotail = bio; - } else - rq->bio = rq->biotail = bio; - } - - __blk_rq_prep_clone(rq, rq_src); - - return 0; - -free_and_out: - if (bio) - bio_put(bio); - blk_rq_unprep_clone(rq); - - return -ENOMEM; + dst->bio = src->bio; + dst->biotail = src->biotail; + dst->cmd = src->cmd; + dst->cmd_len = src->cmd_len; + dst->sense = src->sense; } EXPORT_SYMBOL_GPL(blk_rq_prep_clone); @@ -3034,21 +2970,20 @@ void blk_start_plug(struct blk_plug *plug) { struct task_struct *tsk = current; + /* + * If this is a nested plug, don't actually assign it. + */ + if (tsk->plug) + return; + INIT_LIST_HEAD(&plug->list); INIT_LIST_HEAD(&plug->mq_list); INIT_LIST_HEAD(&plug->cb_list); - /* - * If this is a nested plug, don't actually assign it. It will be - * flushed on its own. + * Store ordering should not be needed here, since a potential + * preempt will imply a full memory barrier */ - if (!tsk->plug) { - /* - * Store ordering should not be needed here, since a potential - * preempt will imply a full memory barrier - */ - tsk->plug = plug; - } + tsk->plug = plug; } EXPORT_SYMBOL(blk_start_plug); @@ -3195,10 +3130,11 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) void blk_finish_plug(struct blk_plug *plug) { + if (plug != current->plug) + return; blk_flush_plug_list(plug, false); - if (plug == current->plug) - current->plug = NULL; + current->plug = NULL; } EXPORT_SYMBOL(blk_finish_plug); diff --git a/block/blk-exec.c b/block/blk-exec.c index 9924725fa50d..3fec8a29d0fa 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -53,7 +53,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, rq_end_io_fn *done) { int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; - bool is_pm_resume; WARN_ON(irqs_disabled()); WARN_ON(rq->cmd_type == REQ_TYPE_FS); @@ -70,12 +69,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, return; } - /* - * need to check this before __blk_run_queue(), because rq can - * be freed before that returns. - */ - is_pm_resume = rq->cmd_type == REQ_TYPE_PM_RESUME; - spin_lock_irq(q->queue_lock); if (unlikely(blk_queue_dying(q))) { @@ -88,9 +81,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, __elv_add_request(q, rq, where); __blk_run_queue(q); - /* the queue is stopped so it won't be run */ - if (is_pm_resume) - __blk_run_queue_uncond(q); spin_unlock_irq(q->queue_lock); } EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); diff --git a/block/blk-mq.c b/block/blk-mq.c index e68b71b85a7e..87e901501de2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -89,7 +89,8 @@ static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp) return -EBUSY; ret = wait_event_interruptible(q->mq_freeze_wq, - !q->mq_freeze_depth || blk_queue_dying(q)); + !atomic_read(&q->mq_freeze_depth) || + blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; if (ret) @@ -112,13 +113,10 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref) void blk_mq_freeze_queue_start(struct request_queue *q) { - bool freeze; + int freeze_depth; - spin_lock_irq(q->queue_lock); - freeze = !q->mq_freeze_depth++; - spin_unlock_irq(q->queue_lock); - - if (freeze) { + freeze_depth = atomic_inc_return(&q->mq_freeze_depth); + if (freeze_depth == 1) { percpu_ref_kill(&q->mq_usage_counter); blk_mq_run_hw_queues(q, false); } @@ -143,13 +141,11 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue); void blk_mq_unfreeze_queue(struct request_queue *q) { - bool wake; + int freeze_depth; - spin_lock_irq(q->queue_lock); - wake = !--q->mq_freeze_depth; - WARN_ON_ONCE(q->mq_freeze_depth < 0); - spin_unlock_irq(q->queue_lock); - if (wake) { + freeze_depth = atomic_dec_return(&q->mq_freeze_depth); + WARN_ON_ONCE(freeze_depth < 0); + if (!freeze_depth) { percpu_ref_reinit(&q->mq_usage_counter); wake_up_all(&q->mq_freeze_wq); } @@ -1237,6 +1233,38 @@ static struct request *blk_mq_map_request(struct request_queue *q, return rq; } +static int blk_mq_direct_issue_request(struct request *rq) +{ + int ret; + struct request_queue *q = rq->q; + struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, + rq->mq_ctx->cpu); + struct blk_mq_queue_data bd = { + .rq = rq, + .list = NULL, + .last = 1 + }; + + /* + * For OK queue, we are done. For error, kill it. Any other + * error (busy), just add it to our list as we previously + * would have done + */ + ret = q->mq_ops->queue_rq(hctx, &bd); + if (ret == BLK_MQ_RQ_QUEUE_OK) + return 0; + else { + __blk_mq_requeue_request(rq); + + if (ret == BLK_MQ_RQ_QUEUE_ERROR) { + rq->errors = -EIO; + blk_mq_end_request(rq, rq->errors); + return 0; + } + return -1; + } +} + /* * Multiple hardware queue variant. This will not use per-process plugs, * but will attempt to bypass the hctx queueing if we can go straight to @@ -1248,6 +1276,9 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); struct blk_map_ctx data; struct request *rq; + unsigned int request_count = 0; + struct blk_plug *plug; + struct request *same_queue_rq = NULL; blk_queue_bounce(q, &bio); @@ -1256,6 +1287,10 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) return; } + if (!is_flush_fua && !blk_queue_nomerges(q) && + blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) + return; + rq = blk_mq_map_request(q, bio, &data); if (unlikely(!rq)) return; @@ -1266,38 +1301,42 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) goto run_queue; } + plug = current->plug; /* * If the driver supports defer issued based on 'last', then * queue it up like normal since we can potentially save some * CPU this way. */ - if (is_sync && !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) { - struct blk_mq_queue_data bd = { - .rq = rq, - .list = NULL, - .last = 1 - }; - int ret; + if (((plug && !blk_queue_nomerges(q)) || is_sync) && + !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) { + struct request *old_rq = NULL; blk_mq_bio_to_request(rq, bio); /* - * For OK queue, we are done. For error, kill it. Any other - * error (busy), just add it to our list as we previously - * would have done + * we do limited pluging. If bio can be merged, do merge. + * Otherwise the existing request in the plug list will be + * issued. So the plug list will have one request at most */ - ret = q->mq_ops->queue_rq(data.hctx, &bd); - if (ret == BLK_MQ_RQ_QUEUE_OK) - goto done; - else { - __blk_mq_requeue_request(rq); - - if (ret == BLK_MQ_RQ_QUEUE_ERROR) { - rq->errors = -EIO; - blk_mq_end_request(rq, rq->errors); - goto done; + if (plug) { + /* + * The plug list might get flushed before this. If that + * happens, same_queue_rq is invalid and plug list is empty + **/ + if (same_queue_rq && !list_empty(&plug->mq_list)) { + old_rq = same_queue_rq; + list_del_init(&old_rq->queuelist); } - } + list_add_tail(&rq->queuelist, &plug->mq_list); + } else /* is_sync */ + old_rq = rq; + blk_mq_put_ctx(data.ctx); + if (!old_rq) + return; + if (!blk_mq_direct_issue_request(old_rq)) + return; + blk_mq_insert_request(old_rq, false, true, true); + return; } if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { @@ -1310,7 +1349,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) run_queue: blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); } -done: blk_mq_put_ctx(data.ctx); } @@ -1322,16 +1360,11 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = rw_is_sync(bio->bi_rw); const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); - unsigned int use_plug, request_count = 0; + struct blk_plug *plug; + unsigned int request_count = 0; struct blk_map_ctx data; struct request *rq; - /* - * If we have multiple hardware queues, just go directly to - * one of those for sync IO. - */ - use_plug = !is_flush_fua && !is_sync; - blk_queue_bounce(q, &bio); if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { @@ -1339,8 +1372,8 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) return; } - if (use_plug && !blk_queue_nomerges(q) && - blk_attempt_plug_merge(q, bio, &request_count)) + if (!is_flush_fua && !blk_queue_nomerges(q) && + blk_attempt_plug_merge(q, bio, &request_count, NULL)) return; rq = blk_mq_map_request(q, bio, &data); @@ -1358,21 +1391,18 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) * utilize that to temporarily store requests until the task is * either done or scheduled away. */ - if (use_plug) { - struct blk_plug *plug = current->plug; - - if (plug) { - blk_mq_bio_to_request(rq, bio); - if (list_empty(&plug->mq_list)) - trace_block_plug(q); - else if (request_count >= BLK_MAX_REQUEST_COUNT) { - blk_flush_plug_list(plug, false); - trace_block_plug(q); - } - list_add_tail(&rq->queuelist, &plug->mq_list); - blk_mq_put_ctx(data.ctx); - return; + plug = current->plug; + if (plug) { + blk_mq_bio_to_request(rq, bio); + if (list_empty(&plug->mq_list)) + trace_block_plug(q); + else if (request_count >= BLK_MAX_REQUEST_COUNT) { + blk_flush_plug_list(plug, false); + trace_block_plug(q); } + list_add_tail(&rq->queuelist, &plug->mq_list); + blk_mq_put_ctx(data.ctx); + return; } if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { @@ -2052,7 +2082,7 @@ void blk_mq_free_queue(struct request_queue *q) /* Basically redo blk_mq_init_queue with queue frozen */ static void blk_mq_queue_reinit(struct request_queue *q) { - WARN_ON_ONCE(!q->mq_freeze_depth); + WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); blk_mq_sysfs_unregister(q); diff --git a/block/blk.h b/block/blk.h index 43b036185712..026d9594142b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -78,7 +78,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, bool bio_attempt_back_merge(struct request_queue *q, struct request *req, struct bio *bio); bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, - unsigned int *request_count); + unsigned int *request_count, + struct request **same_queue_rq); void blk_account_io_start(struct request *req, bool new_io); void blk_account_io_completion(struct request *req, unsigned int bytes); @@ -193,8 +194,6 @@ int blk_try_merge(struct request *rq, struct bio *bio); void blk_queue_congestion_threshold(struct request_queue *q); -void __blk_run_queue_uncond(struct request_queue *q); - int blk_dev_init(void); diff --git a/block/bounce.c b/block/bounce.c index ed9dd8067120..3ab0bce1c947 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -128,9 +128,6 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) struct bio_vec *bvec, *org_vec; int i; - if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) - set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags); - /* * free up bounce indirect pages used */ diff --git a/block/ioctl.c b/block/ioctl.c index 7d8befde2aca..8061eba42887 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -150,21 +150,48 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user } } -static int blkdev_reread_part(struct block_device *bdev) +/* + * This is an exported API for the block driver, and will not + * acquire bd_mutex. This API should be used in case that + * caller has held bd_mutex already. + */ +int __blkdev_reread_part(struct block_device *bdev) { struct gendisk *disk = bdev->bd_disk; - int res; if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (!mutex_trylock(&bdev->bd_mutex)) - return -EBUSY; - res = rescan_partitions(disk, bdev); + + lockdep_assert_held(&bdev->bd_mutex); + + return rescan_partitions(disk, bdev); +} +EXPORT_SYMBOL(__blkdev_reread_part); + +/* + * This is an exported API for the block driver, and will + * try to acquire bd_mutex. If bd_mutex has been held already + * in current context, please call __blkdev_reread_part(). + * + * Make sure the held locks in current context aren't required + * in open()/close() handler and I/O path for avoiding ABBA deadlock: + * - bd_mutex is held before calling block driver's open/close + * handler + * - reading partition table may submit I/O to the block device + */ +int blkdev_reread_part(struct block_device *bdev) +{ + int res; + + mutex_lock(&bdev->bd_mutex); + res = __blkdev_reread_part(bdev); mutex_unlock(&bdev->bd_mutex); + return res; } +EXPORT_SYMBOL(blkdev_reread_part); static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, uint64_t len, int secure) |