diff options
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r-- | block/blk-mq.c | 375 |
1 files changed, 214 insertions, 161 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index 0835f4d8d42e..a12b1763508d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -30,6 +30,7 @@ #include <trace/events/block.h> #include <linux/blk-mq.h> +#include <linux/t10-pi.h> #include "blk.h" #include "blk-mq.h" #include "blk-mq-debugfs.h" @@ -44,12 +45,12 @@ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); static int blk_mq_poll_stats_bkt(const struct request *rq) { - int ddir, bytes, bucket; + int ddir, sectors, bucket; ddir = rq_data_dir(rq); - bytes = blk_rq_bytes(rq); + sectors = blk_rq_stats_sectors(rq); - bucket = ddir + 2*(ilog2(bytes) - 9); + bucket = ddir + 2 * ilog2(sectors); if (bucket < 0) return -1; @@ -92,7 +93,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, struct mq_inflight { struct hd_struct *part; - unsigned int *inflight; + unsigned int inflight[2]; }; static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, @@ -101,45 +102,29 @@ static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, { struct mq_inflight *mi = priv; - /* - * index[0] counts the specific partition that was asked for. - */ if (rq->part == mi->part) - mi->inflight[0]++; + mi->inflight[rq_data_dir(rq)]++; return true; } unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part) { - unsigned inflight[2]; - struct mq_inflight mi = { .part = part, .inflight = inflight, }; + struct mq_inflight mi = { .part = part }; - inflight[0] = inflight[1] = 0; blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi); - return inflight[0]; -} - -static bool blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx, - struct request *rq, void *priv, - bool reserved) -{ - struct mq_inflight *mi = priv; - - if (rq->part == mi->part) - mi->inflight[rq_data_dir(rq)]++; - - return true; + return mi.inflight[0] + mi.inflight[1]; } void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, unsigned int inflight[2]) { - struct mq_inflight mi = { .part = part, .inflight = inflight, }; + struct mq_inflight mi = { .part = part }; - inflight[0] = inflight[1] = 0; - blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi); + blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi); + inflight[0] = mi.inflight[0]; + inflight[1] = mi.inflight[1]; } void blk_freeze_queue_start(struct request_queue *q) @@ -275,23 +260,17 @@ void blk_mq_wake_waiters(struct request_queue *q) blk_mq_tag_wakeup_all(hctx->tags, true); } -bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) -{ - return blk_mq_has_free_tags(hctx->tags); -} -EXPORT_SYMBOL(blk_mq_can_queue); - /* - * Only need start/end time stamping if we have stats enabled, or using - * an IO scheduler. + * Only need start/end time stamping if we have iostat or + * blk stats enabled, or using an IO scheduler. */ static inline bool blk_mq_need_time_stamp(struct request *rq) { - return (rq->rq_flags & RQF_IO_STAT) || rq->q->elevator; + return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS)) || rq->q->elevator; } static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, - unsigned int tag, unsigned int op) + unsigned int tag, unsigned int op, u64 alloc_time_ns) { struct blk_mq_tags *tags = blk_mq_tags_from_data(data); struct request *rq = tags->static_rqs[tag]; @@ -325,11 +304,15 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, RB_CLEAR_NODE(&rq->rb_node); rq->rq_disk = NULL; rq->part = NULL; +#ifdef CONFIG_BLK_RQ_ALLOC_TIME + rq->alloc_time_ns = alloc_time_ns; +#endif if (blk_mq_need_time_stamp(rq)) rq->start_time_ns = ktime_get_ns(); else rq->start_time_ns = 0; rq->io_start_time_ns = 0; + rq->stats_sectors = 0; rq->nr_phys_segments = 0; #if defined(CONFIG_BLK_DEV_INTEGRITY) rq->nr_integrity_segments = 0; @@ -356,8 +339,14 @@ static struct request *blk_mq_get_request(struct request_queue *q, struct request *rq; unsigned int tag; bool clear_ctx_on_error = false; + u64 alloc_time_ns = 0; blk_queue_enter_live(q); + + /* alloc_time includes depth and tag waits */ + if (blk_queue_rq_alloc_time(q)) + alloc_time_ns = ktime_get_ns(); + data->q = q; if (likely(!data->ctx)) { data->ctx = blk_mq_get_ctx(q); @@ -393,7 +382,7 @@ static struct request *blk_mq_get_request(struct request_queue *q, return NULL; } - rq = blk_mq_rq_ctx_init(data, tag, data->cmd_flags); + rq = blk_mq_rq_ctx_init(data, tag, data->cmd_flags, alloc_time_ns); if (!op_is_flush(data->cmd_flags)) { rq->elv.icq = NULL; if (e && e->type->ops.prepare_request) { @@ -652,19 +641,14 @@ bool blk_mq_complete_request(struct request *rq) } EXPORT_SYMBOL(blk_mq_complete_request); -void blk_mq_complete_request_sync(struct request *rq) -{ - WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); - rq->q->mq_ops->complete(rq); -} -EXPORT_SYMBOL_GPL(blk_mq_complete_request_sync); - -int blk_mq_request_started(struct request *rq) -{ - return blk_mq_rq_state(rq) != MQ_RQ_IDLE; -} -EXPORT_SYMBOL_GPL(blk_mq_request_started); - +/** + * blk_mq_start_request - Start processing a request + * @rq: Pointer to request to be started + * + * Function used by device drivers to notify the block layer that a request + * is going to be processed now, so blk layer can do proper initializations + * such as starting the timeout timer. + */ void blk_mq_start_request(struct request *rq) { struct request_queue *q = rq->q; @@ -673,9 +657,7 @@ void blk_mq_start_request(struct request *rq) if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { rq->io_start_time_ns = ktime_get_ns(); -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW - rq->throtl_size = blk_rq_sectors(rq); -#endif + rq->stats_sectors = blk_rq_sectors(rq); rq->rq_flags |= RQF_STATS; rq_qos_issue(q, rq); } @@ -693,6 +675,11 @@ void blk_mq_start_request(struct request *rq) */ rq->nr_phys_segments++; } + +#ifdef CONFIG_BLK_DEV_INTEGRITY + if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE) + q->integrity.profile->prepare_fn(rq); +#endif } EXPORT_SYMBOL(blk_mq_start_request); @@ -905,7 +892,10 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, */ if (blk_mq_req_expired(rq, next)) blk_mq_rq_timed_out(rq, reserved); - if (refcount_dec_and_test(&rq->ref)) + + if (is_flush_rq(rq, hctx)) + rq->end_io(rq, 0); + else if (refcount_dec_and_test(&rq->ref)) __blk_mq_free_request(rq); return true; @@ -1048,7 +1038,7 @@ bool blk_mq_get_driver_tag(struct request *rq) bool shared; if (rq->tag != -1) - goto done; + return true; if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag)) data.flags |= BLK_MQ_REQ_RESERVED; @@ -1063,7 +1053,6 @@ bool blk_mq_get_driver_tag(struct request *rq) data.hctx->tags->rqs[rq->tag] = rq; } -done: return rq->tag != -1; } @@ -1346,6 +1335,12 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, return (queued + errors) != 0; } +/** + * __blk_mq_run_hw_queue - Run a hardware queue. + * @hctx: Pointer to the hardware queue to run. + * + * Send pending requests to the hardware. + */ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) { int srcu_idx; @@ -1443,6 +1438,15 @@ select_cpu: return next_cpu; } +/** + * __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue. + * @hctx: Pointer to the hardware queue to run. + * @async: If we want to run the queue asynchronously. + * @msecs: Microseconds of delay to wait before running the queue. + * + * If !@async, try to run the queue now. Else, run the queue asynchronously and + * with a delay of @msecs. + */ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, unsigned long msecs) { @@ -1464,13 +1468,29 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, msecs_to_jiffies(msecs)); } +/** + * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously. + * @hctx: Pointer to the hardware queue to run. + * @msecs: Microseconds of delay to wait before running the queue. + * + * Run a hardware queue asynchronously with a delay of @msecs. + */ void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) { __blk_mq_delay_run_hw_queue(hctx, true, msecs); } EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); -bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) +/** + * blk_mq_run_hw_queue - Start to run a hardware queue. + * @hctx: Pointer to the hardware queue to run. + * @async: If we want to run the queue asynchronously. + * + * Check if the request queue is not in a quiesced state and if there are + * pending requests to be sent. If this is true, run the queue to send requests + * to hardware. + */ +void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) { int srcu_idx; bool need_run; @@ -1488,15 +1508,16 @@ bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) blk_mq_hctx_has_pending(hctx); hctx_unlock(hctx, srcu_idx); - if (need_run) { + if (need_run) __blk_mq_delay_run_hw_queue(hctx, async, 0); - return true; - } - - return false; } EXPORT_SYMBOL(blk_mq_run_hw_queue); +/** + * blk_mq_run_hw_queue - Run all hardware queues in a request queue. + * @q: Pointer to the request queue to run. + * @async: If we want to run the queue asynchronously. + */ void blk_mq_run_hw_queues(struct request_queue *q, bool async) { struct blk_mq_hw_ctx *hctx; @@ -1648,7 +1669,11 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_mq_hctx_mark_pending(hctx, ctx); } -/* +/** + * blk_mq_request_bypass_insert - Insert a request at dispatch list. + * @rq: Pointer to request to be inserted. + * @run_queue: If we should run the hardware queue after inserting the request. + * * Should only be used carefully, when the caller knows we want to * bypass a potential IO scheduler on the target device. */ @@ -1691,28 +1716,20 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) struct request *rqa = container_of(a, struct request, queuelist); struct request *rqb = container_of(b, struct request, queuelist); - if (rqa->mq_ctx < rqb->mq_ctx) - return -1; - else if (rqa->mq_ctx > rqb->mq_ctx) - return 1; - else if (rqa->mq_hctx < rqb->mq_hctx) - return -1; - else if (rqa->mq_hctx > rqb->mq_hctx) - return 1; + if (rqa->mq_ctx != rqb->mq_ctx) + return rqa->mq_ctx > rqb->mq_ctx; + if (rqa->mq_hctx != rqb->mq_hctx) + return rqa->mq_hctx > rqb->mq_hctx; return blk_rq_pos(rqa) > blk_rq_pos(rqb); } void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) { - struct blk_mq_hw_ctx *this_hctx; - struct blk_mq_ctx *this_ctx; - struct request_queue *this_q; - struct request *rq; LIST_HEAD(list); - LIST_HEAD(rq_list); - unsigned int depth; + if (list_empty(&plug->mq_list)) + return; list_splice_init(&plug->mq_list, &list); if (plug->rq_count > 2 && plug->multiple_queues) @@ -1720,42 +1737,27 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) plug->rq_count = 0; - this_q = NULL; - this_hctx = NULL; - this_ctx = NULL; - depth = 0; - - while (!list_empty(&list)) { - rq = list_entry_rq(list.next); - list_del_init(&rq->queuelist); - BUG_ON(!rq->q); - if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx) { - if (this_hctx) { - trace_block_unplug(this_q, depth, !from_schedule); - blk_mq_sched_insert_requests(this_hctx, this_ctx, - &rq_list, - from_schedule); - } - - this_q = rq->q; - this_ctx = rq->mq_ctx; - this_hctx = rq->mq_hctx; - depth = 0; + do { + struct list_head rq_list; + struct request *rq, *head_rq = list_entry_rq(list.next); + struct list_head *pos = &head_rq->queuelist; /* skip first */ + struct blk_mq_hw_ctx *this_hctx = head_rq->mq_hctx; + struct blk_mq_ctx *this_ctx = head_rq->mq_ctx; + unsigned int depth = 1; + + list_for_each_continue(pos, &list) { + rq = list_entry_rq(pos); + BUG_ON(!rq->q); + if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx) + break; + depth++; } - depth++; - list_add_tail(&rq->queuelist, &rq_list); - } - - /* - * If 'this_hctx' is set, we know we have entries to complete - * on 'rq_list'. Do those. - */ - if (this_hctx) { - trace_block_unplug(this_q, depth, !from_schedule); + list_cut_before(&rq_list, &list, pos); + trace_block_unplug(head_rq->q, depth, !from_schedule); blk_mq_sched_insert_requests(this_hctx, this_ctx, &rq_list, from_schedule); - } + } while(!list_empty(&list)); } static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, @@ -1851,6 +1853,17 @@ insert: return BLK_STS_OK; } +/** + * blk_mq_try_issue_directly - Try to send a request directly to device driver. + * @hctx: Pointer of the associated hardware queue. + * @rq: Pointer to request to be sent. + * @cookie: Request queue cookie. + * + * If the device has enough resources to accept a new request now, send the + * request directly to device driver. Else, insert at hctx->dispatch queue, so + * we can try send it another time in the future. Requests inserted at this + * queue have higher priority. + */ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_qc_t *cookie) { @@ -1928,6 +1941,22 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) } } +/** + * blk_mq_make_request - Create and send a request to block device. + * @q: Request queue pointer. + * @bio: Bio pointer. + * + * Builds up a request structure from @q and @bio and send to the device. The + * request may not be queued directly to hardware if: + * * This request can be merged with another one + * * We want to place request at plug queue for possible future merging + * * There is an IO scheduler active at this queue + * + * It will not queue the request if there is an error with the bio, or at the + * request creation. + * + * Returns: Request queue cookie. + */ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = op_is_sync(bio->bi_opf); @@ -1973,13 +2002,17 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) plug = blk_mq_plug(q, bio); if (unlikely(is_flush_fua)) { - /* bypass scheduler for flush rq */ + /* Bypass scheduler for flush requests */ blk_insert_flush(rq); blk_mq_run_hw_queue(data.hctx, true); - } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) { + } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs || + !blk_queue_nonrot(q))) { /* * Use plugging if we have a ->commit_rqs() hook as well, as * we know the driver uses bd->last in a smart fashion. + * + * Use normal plugging if this disk is slow HDD, as sequential + * IO may benefit a lot from plug merging. */ unsigned int request_count = plug->rq_count; struct request *last = NULL; @@ -1996,6 +2029,9 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) } blk_add_rq_to_plug(plug, rq); + } else if (q->elevator) { + /* Insert the request at the IO scheduler queue */ + blk_mq_sched_insert_request(rq, false, true, true); } else if (plug && !blk_queue_nomerges(q)) { /* * We do limited plugging. If the bio can be merged, do that. @@ -2019,10 +2055,15 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_try_issue_directly(data.hctx, same_queue_rq, &cookie); } - } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator && - !data.hctx->dispatch_busy)) { + } else if ((q->nr_hw_queues > 1 && is_sync) || + !data.hctx->dispatch_busy) { + /* + * There is no scheduler and we can try to send directly + * to the hardware. + */ blk_mq_try_issue_directly(data.hctx, rq, &cookie); } else { + /* Default case. */ blk_mq_sched_insert_request(rq, false, true, true); } @@ -2453,11 +2494,6 @@ static void blk_mq_map_swqueue(struct request_queue *q) struct blk_mq_ctx *ctx; struct blk_mq_tag_set *set = q->tag_set; - /* - * Avoid others reading imcomplete hctx->cpumask through sysfs - */ - mutex_lock(&q->sysfs_lock); - queue_for_each_hw_ctx(q, hctx, i) { cpumask_clear(hctx->cpumask); hctx->nr_ctx = 0; @@ -2518,8 +2554,6 @@ static void blk_mq_map_swqueue(struct request_queue *q) HCTX_TYPE_DEFAULT, i); } - mutex_unlock(&q->sysfs_lock); - queue_for_each_hw_ctx(q, hctx, i) { /* * If no software queues are mapped to this hardware queue, @@ -2688,7 +2722,11 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) if (!uninit_q) return ERR_PTR(-ENOMEM); - q = blk_mq_init_allocated_queue(set, uninit_q); + /* + * Initialize the queue without an elevator. device_add_disk() will do + * the initialization. + */ + q = blk_mq_init_allocated_queue(set, uninit_q, false); if (IS_ERR(q)) blk_cleanup_queue(uninit_q); @@ -2770,6 +2808,23 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, int i, j, end; struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx; + if (q->nr_hw_queues < set->nr_hw_queues) { + struct blk_mq_hw_ctx **new_hctxs; + + new_hctxs = kcalloc_node(set->nr_hw_queues, + sizeof(*new_hctxs), GFP_KERNEL, + set->numa_node); + if (!new_hctxs) + return; + if (hctxs) + memcpy(new_hctxs, hctxs, q->nr_hw_queues * + sizeof(*hctxs)); + q->queue_hw_ctx = new_hctxs; + q->nr_hw_queues = set->nr_hw_queues; + kfree(hctxs); + hctxs = new_hctxs; + } + /* protect against switching io scheduler */ mutex_lock(&q->sysfs_lock); for (i = 0; i < set->nr_hw_queues; i++) { @@ -2825,21 +2880,9 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, mutex_unlock(&q->sysfs_lock); } -/* - * Maximum number of hardware queues we support. For single sets, we'll never - * have more than the CPUs (software queues). For multiple sets, the tag_set - * user may have set ->nr_hw_queues larger. - */ -static unsigned int nr_hw_queues(struct blk_mq_tag_set *set) -{ - if (set->nr_maps == 1) - return nr_cpu_ids; - - return max(set->nr_hw_queues, nr_cpu_ids); -} - struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, - struct request_queue *q) + struct request_queue *q, + bool elevator_init) { /* mark the queue as mq asap */ q->mq_ops = set->ops; @@ -2856,12 +2899,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, /* init q->mq_kobj and sw queues' kobjects */ blk_mq_sysfs_init(q); - q->nr_queues = nr_hw_queues(set); - q->queue_hw_ctx = kcalloc_node(q->nr_queues, sizeof(*(q->queue_hw_ctx)), - GFP_KERNEL, set->numa_node); - if (!q->queue_hw_ctx) - goto err_sys_init; - INIT_LIST_HEAD(&q->unused_hctx_list); spin_lock_init(&q->unused_hctx_lock); @@ -2901,19 +2938,14 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, blk_mq_add_queue_tag_set(set, q); blk_mq_map_swqueue(q); - if (!(set->flags & BLK_MQ_F_NO_SCHED)) { - int ret; - - ret = elevator_init_mq(q); - if (ret) - return ERR_PTR(ret); - } + if (elevator_init) + elevator_init_mq(q); return q; err_hctxs: kfree(q->queue_hw_ctx); -err_sys_init: + q->nr_hw_queues = 0; blk_mq_sysfs_deinit(q); err_poll: blk_stat_free_callback(q->poll_cb); @@ -3014,6 +3046,29 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set) } } +static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, + int cur_nr_hw_queues, int new_nr_hw_queues) +{ + struct blk_mq_tags **new_tags; + + if (cur_nr_hw_queues >= new_nr_hw_queues) + return 0; + + new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *), + GFP_KERNEL, set->numa_node); + if (!new_tags) + return -ENOMEM; + + if (set->tags) + memcpy(new_tags, set->tags, cur_nr_hw_queues * + sizeof(*set->tags)); + kfree(set->tags); + set->tags = new_tags; + set->nr_hw_queues = new_nr_hw_queues; + + return 0; +} + /* * Alloc a tag set to be associated with one or more request queues. * May fail with EINVAL for various error conditions. May adjust the @@ -3067,9 +3122,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids) set->nr_hw_queues = nr_cpu_ids; - set->tags = kcalloc_node(nr_hw_queues(set), sizeof(struct blk_mq_tags *), - GFP_KERNEL, set->numa_node); - if (!set->tags) + if (blk_mq_realloc_tag_set_tags(set, 0, set->nr_hw_queues) < 0) return -ENOMEM; ret = -ENOMEM; @@ -3110,7 +3163,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) { int i, j; - for (i = 0; i < nr_hw_queues(set); i++) + for (i = 0; i < set->nr_hw_queues; i++) blk_mq_free_map_and_requests(set, i); for (j = 0; j < set->nr_maps; j++) { @@ -3255,10 +3308,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_freeze_queue(q); /* - * Sync with blk_mq_queue_tag_busy_iter. - */ - synchronize_rcu(); - /* * Switch IO scheduler to 'none', cleaning up the data associated * with the previous scheduler. We will switch back once we are done * updating the new sw to hw queue mappings. @@ -3272,6 +3321,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, blk_mq_sysfs_unregister(q); } + if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) < + 0) + goto reregister; + prev_nr_hw_queues = set->nr_hw_queues; set->nr_hw_queues = nr_hw_queues; blk_mq_update_queue_map(set); @@ -3288,6 +3341,7 @@ fallback: blk_mq_map_swqueue(q); } +reregister: list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_sysfs_register(q); blk_mq_debugfs_register_hctxs(q); @@ -3411,15 +3465,14 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, kt = nsecs; mode = HRTIMER_MODE_REL; - hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode); + hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode); hrtimer_set_expires(&hs.timer, kt); - hrtimer_init_sleeper(&hs, current); do { if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE) break; set_current_state(TASK_UNINTERRUPTIBLE); - hrtimer_start_expires(&hs.timer, mode); + hrtimer_sleeper_start_expires(&hs, mode); if (hs.task) io_schedule(); hrtimer_cancel(&hs.timer); |