diff options
Diffstat (limited to 'block/blk-core.c')
-rw-r--r-- | block/blk-core.c | 281 |
1 files changed, 188 insertions, 93 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 1f61b74867e4..3c923a7aeb56 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -29,11 +29,13 @@ #include <linux/fault-inject.h> #include <linux/list_sort.h> #include <linux/delay.h> +#include <linux/ratelimit.h> #define CREATE_TRACE_POINTS #include <trace/events/block.h> #include "blk.h" +#include "blk-cgroup.h" EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); @@ -280,7 +282,7 @@ EXPORT_SYMBOL(blk_stop_queue); * * This function does not cancel any asynchronous activity arising * out of elevator or throttling code. That would require elevaotor_exit() - * and blk_throtl_exit() to be called with queue lock initialized. + * and blkcg_exit_queue() to be called with queue lock initialized. * */ void blk_sync_queue(struct request_queue *q) @@ -365,17 +367,23 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) spin_lock_irq(q->queue_lock); - elv_drain_elevator(q); - if (drain_all) - blk_throtl_drain(q); + /* + * The caller might be trying to drain @q before its + * elevator is initialized. + */ + if (q->elevator) + elv_drain_elevator(q); + + blkcg_drain_queue(q); /* * This function might be called on a queue which failed - * driver init after queue creation. Some drivers - * (e.g. fd) get unhappy in such cases. Kick queue iff - * dispatch queue has something on it. + * driver init after queue creation or is not yet fully + * active yet. Some drivers (e.g. fd and loop) get unhappy + * in such cases. Kick queue iff dispatch queue has + * something on it and @q has request_fn set. */ - if (!list_empty(&q->queue_head)) + if (!list_empty(&q->queue_head) && q->request_fn) __blk_run_queue(q); drain |= q->rq.elvpriv; @@ -403,6 +411,49 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) } /** + * blk_queue_bypass_start - enter queue bypass mode + * @q: queue of interest + * + * In bypass mode, only the dispatch FIFO queue of @q is used. This + * function makes @q enter bypass mode and drains all requests which were + * throttled or issued before. On return, it's guaranteed that no request + * is being throttled or has ELVPRIV set and blk_queue_bypass() %true + * inside queue or RCU read lock. + */ +void blk_queue_bypass_start(struct request_queue *q) +{ + bool drain; + + spin_lock_irq(q->queue_lock); + drain = !q->bypass_depth++; + queue_flag_set(QUEUE_FLAG_BYPASS, q); + spin_unlock_irq(q->queue_lock); + + if (drain) { + blk_drain_queue(q, false); + /* ensure blk_queue_bypass() is %true inside RCU read lock */ + synchronize_rcu(); + } +} +EXPORT_SYMBOL_GPL(blk_queue_bypass_start); + +/** + * blk_queue_bypass_end - leave queue bypass mode + * @q: queue of interest + * + * Leave bypass mode and restore the normal queueing behavior. + */ +void blk_queue_bypass_end(struct request_queue *q) +{ + spin_lock_irq(q->queue_lock); + if (!--q->bypass_depth) + queue_flag_clear(QUEUE_FLAG_BYPASS, q); + WARN_ON_ONCE(q->bypass_depth < 0); + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL_GPL(blk_queue_bypass_end); + +/** * blk_cleanup_queue - shutdown a request queue * @q: request queue to shutdown * @@ -418,6 +469,19 @@ void blk_cleanup_queue(struct request_queue *q) queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); spin_lock_irq(lock); + + /* + * Dead queue is permanently in bypass mode till released. Note + * that, unlike blk_queue_bypass_start(), we aren't performing + * synchronize_rcu() after entering bypass mode to avoid the delay + * as some drivers create and destroy a lot of queues while + * probing. This is still safe because blk_release_queue() will be + * called only after the queue refcnt drops to zero and nothing, + * RCU or not, would be traversing the queue by then. + */ + q->bypass_depth++; + queue_flag_set(QUEUE_FLAG_BYPASS, q); + queue_flag_set(QUEUE_FLAG_NOMERGES, q); queue_flag_set(QUEUE_FLAG_NOXMERGES, q); queue_flag_set(QUEUE_FLAG_DEAD, q); @@ -428,13 +492,8 @@ void blk_cleanup_queue(struct request_queue *q) spin_unlock_irq(lock); mutex_unlock(&q->sysfs_lock); - /* - * Drain all requests queued before DEAD marking. The caller might - * be trying to tear down @q before its elevator is initialized, in - * which case we don't want to call into draining. - */ - if (q->elevator) - blk_drain_queue(q, true); + /* drain all requests queued before DEAD marking */ + blk_drain_queue(q, true); /* @q won't process any more request, flush async actions */ del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); @@ -498,14 +557,15 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) if (err) goto fail_id; - if (blk_throtl_init(q)) - goto fail_id; - setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, laptop_mode_timer_fn, (unsigned long) q); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); + INIT_LIST_HEAD(&q->queue_head); INIT_LIST_HEAD(&q->timeout_list); INIT_LIST_HEAD(&q->icq_list); +#ifdef CONFIG_BLK_CGROUP + INIT_LIST_HEAD(&q->blkg_list); +#endif INIT_LIST_HEAD(&q->flush_queue[0]); INIT_LIST_HEAD(&q->flush_queue[1]); INIT_LIST_HEAD(&q->flush_data_in_flight); @@ -522,6 +582,18 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) */ q->queue_lock = &q->__queue_lock; + /* + * A queue starts its life with bypass turned on to avoid + * unnecessary bypass on/off overhead and nasty surprises during + * init. The initial bypass will be finished at the end of + * blk_init_allocated_queue(). + */ + q->bypass_depth = 1; + __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); + + if (blkcg_init_queue(q)) + goto fail_id; + return q; fail_id: @@ -614,15 +686,15 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, q->sg_reserved_size = INT_MAX; - /* - * all done - */ - if (!elevator_init(q, NULL)) { - blk_queue_congestion_threshold(q); - return q; - } + /* init elevator */ + if (elevator_init(q, NULL)) + return NULL; - return NULL; + blk_queue_congestion_threshold(q); + + /* all done, end the initial bypass */ + blk_queue_bypass_end(q); + return q; } EXPORT_SYMBOL(blk_init_allocated_queue); @@ -648,33 +720,6 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq) mempool_free(rq, q->rq.rq_pool); } -static struct request * -blk_alloc_request(struct request_queue *q, struct io_cq *icq, - unsigned int flags, gfp_t gfp_mask) -{ - struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); - - if (!rq) - return NULL; - - blk_rq_init(q, rq); - - rq->cmd_flags = flags | REQ_ALLOCED; - - if (flags & REQ_ELVPRIV) { - rq->elv.icq = icq; - if (unlikely(elv_set_request(q, rq, gfp_mask))) { - mempool_free(rq, q->rq.rq_pool); - return NULL; - } - /* @rq->elv.icq holds on to io_context until @rq is freed */ - if (icq) - get_io_context(icq->ioc); - } - - return rq; -} - /* * ioc_batching returns true if the ioc is a valid batching request and * should be given priority access to a request. @@ -763,6 +808,22 @@ static bool blk_rq_should_init_elevator(struct bio *bio) } /** + * rq_ioc - determine io_context for request allocation + * @bio: request being allocated is for this bio (can be %NULL) + * + * Determine io_context to use for request allocation for @bio. May return + * %NULL if %current->io_context doesn't exist. + */ +static struct io_context *rq_ioc(struct bio *bio) +{ +#ifdef CONFIG_BLK_CGROUP + if (bio && bio->bi_ioc) + return bio->bi_ioc; +#endif + return current->io_context; +} + +/** * get_request - get a free request * @q: request_queue to allocate request from * @rw_flags: RW and SYNC flags @@ -779,7 +840,7 @@ static bool blk_rq_should_init_elevator(struct bio *bio) static struct request *get_request(struct request_queue *q, int rw_flags, struct bio *bio, gfp_t gfp_mask) { - struct request *rq = NULL; + struct request *rq; struct request_list *rl = &q->rq; struct elevator_type *et; struct io_context *ioc; @@ -789,7 +850,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags, int may_queue; retry: et = q->elevator->type; - ioc = current->io_context; + ioc = rq_ioc(bio); if (unlikely(blk_queue_dead(q))) return NULL; @@ -808,7 +869,7 @@ retry: */ if (!ioc && !retried) { spin_unlock_irq(q->queue_lock); - create_io_context(current, gfp_mask, q->node); + create_io_context(gfp_mask, q->node); spin_lock_irq(q->queue_lock); retried = true; goto retry; @@ -831,7 +892,7 @@ retry: * process is not a "batcher", and not * exempted by the IO scheduler */ - goto out; + return NULL; } } } @@ -844,7 +905,7 @@ retry: * allocated with any setting of ->nr_requests */ if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) - goto out; + return NULL; rl->count[is_sync]++; rl->starved[is_sync] = 0; @@ -859,8 +920,7 @@ retry: * Also, lookup icq while holding queue_lock. If it doesn't exist, * it will be created after releasing queue_lock. */ - if (blk_rq_should_init_elevator(bio) && - !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) { + if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) { rw_flags |= REQ_ELVPRIV; rl->elvpriv++; if (et->icq_cache && ioc) @@ -871,41 +931,36 @@ retry: rw_flags |= REQ_IO_STAT; spin_unlock_irq(q->queue_lock); - /* create icq if missing */ - if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) { - icq = ioc_create_icq(q, gfp_mask); - if (!icq) - goto fail_icq; - } - - rq = blk_alloc_request(q, icq, rw_flags, gfp_mask); + /* allocate and init request */ + rq = mempool_alloc(q->rq.rq_pool, gfp_mask); + if (!rq) + goto fail_alloc; -fail_icq: - if (unlikely(!rq)) { - /* - * Allocation failed presumably due to memory. Undo anything - * we might have messed up. - * - * Allocating task should really be put onto the front of the - * wait queue, but this is pretty rare. - */ - spin_lock_irq(q->queue_lock); - freed_request(q, rw_flags); + blk_rq_init(q, rq); + rq->cmd_flags = rw_flags | REQ_ALLOCED; + + /* init elvpriv */ + if (rw_flags & REQ_ELVPRIV) { + if (unlikely(et->icq_cache && !icq)) { + create_io_context(gfp_mask, q->node); + ioc = rq_ioc(bio); + if (!ioc) + goto fail_elvpriv; + + icq = ioc_create_icq(ioc, q, gfp_mask); + if (!icq) + goto fail_elvpriv; + } - /* - * in the very unlikely event that allocation failed and no - * requests for this direction was pending, mark us starved - * so that freeing of a request in the other direction will - * notice us. another possible fix would be to split the - * rq mempool into READ and WRITE - */ -rq_starved: - if (unlikely(rl->count[is_sync] == 0)) - rl->starved[is_sync] = 1; + rq->elv.icq = icq; + if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) + goto fail_elvpriv; - goto out; + /* @rq->elv.icq holds io_context until @rq is freed */ + if (icq) + get_io_context(icq->ioc); } - +out: /* * ioc may be NULL here, and ioc_batching will be false. That's * OK, if the queue is under the request limit then requests need @@ -916,8 +971,48 @@ rq_starved: ioc->nr_batch_requests--; trace_block_getrq(q, bio, rw_flags & 1); -out: return rq; + +fail_elvpriv: + /* + * elvpriv init failed. ioc, icq and elvpriv aren't mempool backed + * and may fail indefinitely under memory pressure and thus + * shouldn't stall IO. Treat this request as !elvpriv. This will + * disturb iosched and blkcg but weird is bettern than dead. + */ + printk_ratelimited(KERN_WARNING "%s: request aux data allocation failed, iosched may be disturbed\n", + dev_name(q->backing_dev_info.dev)); + + rq->cmd_flags &= ~REQ_ELVPRIV; + rq->elv.icq = NULL; + + spin_lock_irq(q->queue_lock); + rl->elvpriv--; + spin_unlock_irq(q->queue_lock); + goto out; + +fail_alloc: + /* + * Allocation failed presumably due to memory. Undo anything we + * might have messed up. + * + * Allocating task should really be put onto the front of the wait + * queue, but this is pretty rare. + */ + spin_lock_irq(q->queue_lock); + freed_request(q, rw_flags); + + /* + * in the very unlikely event that allocation failed and no + * requests for this direction was pending, mark us starved so that + * freeing of a request in the other direction will notice + * us. another possible fix would be to split the rq mempool into + * READ and WRITE + */ +rq_starved: + if (unlikely(rl->count[is_sync] == 0)) + rl->starved[is_sync] = 1; + return NULL; } /** @@ -961,7 +1056,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, * up to a big batch of them for a small period time. * See ioc_batching, ioc_set_batching */ - create_io_context(current, GFP_NOIO, q->node); + create_io_context(GFP_NOIO, q->node); ioc_set_batching(q, current->io_context); spin_lock_irq(q->queue_lock); |