diff options
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r-- | block/blk-mq.c | 131 |
1 files changed, 79 insertions, 52 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index 33c428530193..e68b71b85a7e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -33,7 +33,6 @@ static DEFINE_MUTEX(all_q_mutex); static LIST_HEAD(all_q_list); static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); -static void blk_mq_run_queues(struct request_queue *q); /* * Check if any of the ctx's have pending work in this hardware queue @@ -42,7 +41,7 @@ static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) { unsigned int i; - for (i = 0; i < hctx->ctx_map.map_size; i++) + for (i = 0; i < hctx->ctx_map.size; i++) if (hctx->ctx_map.map[i].word) return true; @@ -78,7 +77,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word); } -static int blk_mq_queue_enter(struct request_queue *q) +static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp) { while (true) { int ret; @@ -86,6 +85,9 @@ static int blk_mq_queue_enter(struct request_queue *q) if (percpu_ref_tryget_live(&q->mq_usage_counter)) return 0; + if (!(gfp & __GFP_WAIT)) + return -EBUSY; + ret = wait_event_interruptible(q->mq_freeze_wq, !q->mq_freeze_depth || blk_queue_dying(q)); if (blk_queue_dying(q)) @@ -118,7 +120,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q) if (freeze) { percpu_ref_kill(&q->mq_usage_counter); - blk_mq_run_queues(q); + blk_mq_run_hw_queues(q, false); } } EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); @@ -257,7 +259,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, struct blk_mq_alloc_data alloc_data; int ret; - ret = blk_mq_queue_enter(q); + ret = blk_mq_queue_enter(q, gfp); if (ret) return ERR_PTR(ret); @@ -675,8 +677,11 @@ static void blk_mq_rq_timer(unsigned long priv) data.next = blk_rq_timeout(round_jiffies_up(data.next)); mod_timer(&q->timeout, data.next); } else { - queue_for_each_hw_ctx(q, hctx, i) - blk_mq_tag_idle(hctx); + queue_for_each_hw_ctx(q, hctx, i) { + /* the hctx may be unmapped, so check it here */ + if (blk_mq_hw_queue_mapped(hctx)) + blk_mq_tag_idle(hctx); + } } } @@ -728,7 +733,7 @@ static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) struct blk_mq_ctx *ctx; int i; - for (i = 0; i < hctx->ctx_map.map_size; i++) { + for (i = 0; i < hctx->ctx_map.size; i++) { struct blk_align_bitmap *bm = &hctx->ctx_map.map[i]; unsigned int off, bit; @@ -853,6 +858,16 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) spin_lock(&hctx->lock); list_splice(&rq_list, &hctx->dispatch); spin_unlock(&hctx->lock); + /* + * the queue is expected stopped with BLK_MQ_RQ_QUEUE_BUSY, but + * it's possible the queue is stopped and restarted again + * before this. Queue restart will dispatch requests. And since + * requests in rq_list aren't added into hctx->dispatch yet, + * the requests in rq_list might get lost. + * + * blk_mq_run_hw_queue() already checks the STOPPED bit + **/ + blk_mq_run_hw_queue(hctx, true); } } @@ -904,7 +919,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) &hctx->run_work, 0); } -static void blk_mq_run_queues(struct request_queue *q) +void blk_mq_run_hw_queues(struct request_queue *q, bool async) { struct blk_mq_hw_ctx *hctx; int i; @@ -915,9 +930,10 @@ static void blk_mq_run_queues(struct request_queue *q) test_bit(BLK_MQ_S_STOPPED, &hctx->state)) continue; - blk_mq_run_hw_queue(hctx, false); + blk_mq_run_hw_queue(hctx, async); } } +EXPORT_SYMBOL(blk_mq_run_hw_queues); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) { @@ -1186,7 +1202,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, int rw = bio_data_dir(bio); struct blk_mq_alloc_data alloc_data; - if (unlikely(blk_mq_queue_enter(q))) { + if (unlikely(blk_mq_queue_enter(q, GFP_KERNEL))) { bio_endio(bio, -EIO); return NULL; } @@ -1517,8 +1533,6 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node) if (!bitmap->map) return -ENOMEM; - bitmap->map_size = num_maps; - total = nr_cpu_ids; for (i = 0; i < num_maps; i++) { bitmap->map[i].depth = min(total, bitmap->bits_per_word); @@ -1570,22 +1584,6 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) return NOTIFY_OK; } -static int blk_mq_hctx_cpu_online(struct blk_mq_hw_ctx *hctx, int cpu) -{ - struct request_queue *q = hctx->queue; - struct blk_mq_tag_set *set = q->tag_set; - - if (set->tags[hctx->queue_num]) - return NOTIFY_OK; - - set->tags[hctx->queue_num] = blk_mq_init_rq_map(set, hctx->queue_num); - if (!set->tags[hctx->queue_num]) - return NOTIFY_STOP; - - hctx->tags = set->tags[hctx->queue_num]; - return NOTIFY_OK; -} - static int blk_mq_hctx_notify(void *data, unsigned long action, unsigned int cpu) { @@ -1593,8 +1591,11 @@ static int blk_mq_hctx_notify(void *data, unsigned long action, if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) return blk_mq_hctx_cpu_offline(hctx, cpu); - else if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) - return blk_mq_hctx_cpu_online(hctx, cpu); + + /* + * In case of CPU online, tags may be reallocated + * in blk_mq_map_swqueue() after mapping is updated. + */ return NOTIFY_OK; } @@ -1759,8 +1760,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, continue; hctx = q->mq_ops->map_queue(q, i); - cpumask_set_cpu(i, hctx->cpumask); - hctx->nr_ctx++; /* * Set local node, IFF we have more than one hw queue. If @@ -1776,6 +1775,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) unsigned int i; struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx; + struct blk_mq_tag_set *set = q->tag_set; queue_for_each_hw_ctx(q, hctx, i) { cpumask_clear(hctx->cpumask); @@ -1797,21 +1797,34 @@ static void blk_mq_map_swqueue(struct request_queue *q) } queue_for_each_hw_ctx(q, hctx, i) { + struct blk_mq_ctxmap *map = &hctx->ctx_map; + /* * If no software queues are mapped to this hardware queue, * disable it and free the request entries. */ if (!hctx->nr_ctx) { - struct blk_mq_tag_set *set = q->tag_set; - if (set->tags[i]) { blk_mq_free_rq_map(set, set->tags[i], i); set->tags[i] = NULL; - hctx->tags = NULL; } + hctx->tags = NULL; continue; } + /* unmapped hw queue can be remapped after CPU topo changed */ + if (!set->tags[i]) + set->tags[i] = blk_mq_init_rq_map(set, i); + hctx->tags = set->tags[i]; + WARN_ON(!hctx->tags); + + /* + * Set the map size to the number of mapped software queues. + * This is more accurate and more efficient than looping + * over all possibly mapped software queues. + */ + map->size = DIV_ROUND_UP(hctx->nr_ctx, map->bits_per_word); + /* * Initialize batch roundrobin counts */ @@ -1889,9 +1902,25 @@ void blk_mq_release(struct request_queue *q) struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) { + struct request_queue *uninit_q, *q; + + uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node); + if (!uninit_q) + return ERR_PTR(-ENOMEM); + + q = blk_mq_init_allocated_queue(set, uninit_q); + if (IS_ERR(q)) + blk_cleanup_queue(uninit_q); + + return q; +} +EXPORT_SYMBOL(blk_mq_init_queue); + +struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, + struct request_queue *q) +{ struct blk_mq_hw_ctx **hctxs; struct blk_mq_ctx __percpu *ctx; - struct request_queue *q; unsigned int *map; int i; @@ -1926,20 +1955,16 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) hctxs[i]->queue_num = i; } - q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node); - if (!q) - goto err_hctxs; - /* * Init percpu_ref in atomic mode so that it's faster to shutdown. * See blk_register_queue() for details. */ if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release, PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) - goto err_mq_usage; + goto err_hctxs; setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); - blk_queue_rq_timeout(q, 30000); + blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30000); q->nr_queues = nr_cpu_ids; q->nr_hw_queues = set->nr_hw_queues; @@ -1965,9 +1990,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) else blk_queue_make_request(q, blk_sq_make_request); - if (set->timeout) - blk_queue_rq_timeout(q, set->timeout); - /* * Do this after blk_queue_make_request() overrides it... */ @@ -1979,7 +2001,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) blk_mq_init_cpu_queues(q, set->nr_hw_queues); if (blk_mq_init_hw_queues(q, set)) - goto err_mq_usage; + goto err_hctxs; mutex_lock(&all_q_mutex); list_add_tail(&q->all_q_node, &all_q_list); @@ -1991,8 +2013,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) return q; -err_mq_usage: - blk_cleanup_queue(q); err_hctxs: kfree(map); for (i = 0; i < set->nr_hw_queues; i++) { @@ -2007,7 +2027,7 @@ err_percpu: free_percpu(ctx); return ERR_PTR(-ENOMEM); } -EXPORT_SYMBOL(blk_mq_init_queue); +EXPORT_SYMBOL(blk_mq_init_allocated_queue); void blk_mq_free_queue(struct request_queue *q) { @@ -2075,9 +2095,16 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, */ list_for_each_entry(q, &all_q_list, all_q_node) blk_mq_freeze_queue_start(q); - list_for_each_entry(q, &all_q_list, all_q_node) + list_for_each_entry(q, &all_q_list, all_q_node) { blk_mq_freeze_queue_wait(q); + /* + * timeout handler can't touch hw queue during the + * reinitialization + */ + del_timer_sync(&q->timeout); + } + list_for_each_entry(q, &all_q_list, all_q_node) blk_mq_queue_reinit(q); @@ -2159,7 +2186,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) return -EINVAL; - if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue) + if (!set->ops->queue_rq || !set->ops->map_queue) return -EINVAL; if (set->queue_depth > BLK_MQ_MAX_DEPTH) { |