diff options
Diffstat (limited to 'block/blk-core.c')
-rw-r--r-- | block/blk-core.c | 80 |
1 files changed, 62 insertions, 18 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index d0cc6e14d2f0..089e890ab208 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -34,8 +34,10 @@ #include <linux/ratelimit.h> #include <linux/pm_runtime.h> #include <linux/blk-cgroup.h> +#include <linux/t10-pi.h> #include <linux/debugfs.h> #include <linux/bpf.h> +#include <linux/psi.h> #define CREATE_TRACE_POINTS #include <trace/events/block.h> @@ -129,6 +131,10 @@ static const char *const blk_op_name[] = { REQ_OP_NAME(DISCARD), REQ_OP_NAME(SECURE_ERASE), REQ_OP_NAME(ZONE_RESET), + REQ_OP_NAME(ZONE_RESET_ALL), + REQ_OP_NAME(ZONE_OPEN), + REQ_OP_NAME(ZONE_CLOSE), + REQ_OP_NAME(ZONE_FINISH), REQ_OP_NAME(WRITE_SAME), REQ_OP_NAME(WRITE_ZEROES), REQ_OP_NAME(SCSI_IN), @@ -333,18 +339,19 @@ EXPORT_SYMBOL_GPL(blk_set_queue_dying); */ void blk_cleanup_queue(struct request_queue *q) { + WARN_ON_ONCE(blk_queue_registered(q)); + /* mark @q DYING, no new request or merges will be allowed afterwards */ - mutex_lock(&q->sysfs_lock); blk_set_queue_dying(q); blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); blk_queue_flag_set(QUEUE_FLAG_DYING, q); - mutex_unlock(&q->sysfs_lock); /* * Drain all requests queued before DYING marking. Set DEAD flag to - * prevent that q->request_fn() gets invoked after draining finished. + * prevent that blk_mq_run_hw_queues() accesses the hardware queues + * after draining finished. */ blk_freeze_queue(q); @@ -479,7 +486,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) if (!q) return NULL; - INIT_LIST_HEAD(&q->queue_head); q->last_merge = NULL; q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); @@ -518,6 +524,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) mutex_init(&q->blk_trace_mutex); #endif mutex_init(&q->sysfs_lock); + mutex_init(&q->sysfs_dir_lock); spin_lock_init(&q->queue_lock); init_waitqueue_head(&q->mq_freeze_wq); @@ -601,6 +608,7 @@ bool bio_attempt_back_merge(struct request *req, struct bio *bio, return false; trace_block_bio_backmerge(req->q, req, bio); + rq_qos_merge(req->q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) blk_rq_set_mixed_merge(req); @@ -622,6 +630,7 @@ bool bio_attempt_front_merge(struct request *req, struct bio *bio, return false; trace_block_bio_frontmerge(req->q, req, bio); + rq_qos_merge(req->q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) blk_rq_set_mixed_merge(req); @@ -647,6 +656,8 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req, blk_rq_get_max_sectors(req, blk_rq_pos(req))) goto no_merge; + rq_qos_merge(q, req, bio); + req->biotail->bi_next = bio; req->biotail = bio; req->__data_len += bio->bi_iter.bi_size; @@ -840,11 +851,7 @@ static inline int blk_partition_remap(struct bio *bio) if (unlikely(bio_check_ro(bio, p))) goto out; - /* - * Zone reset does not include bi_size so bio_sectors() is always 0. - * Include a test for the reset op code and perform the remap if needed. - */ - if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) { + if (bio_sectors(bio)) { if (bio_check_eod(bio, part_nr_sects_read(p))) goto out; bio->bi_iter.bi_sector += p->start_sect; @@ -878,11 +885,14 @@ generic_make_request_checks(struct bio *bio) } /* - * For a REQ_NOWAIT based request, return -EOPNOTSUPP - * if queue is not a request based queue. + * Non-mq queues do not honor REQ_NOWAIT, so complete a bio + * with BLK_STS_AGAIN status in order to catch -EAGAIN and + * to give a chance to the caller to repeat request gracefully. */ - if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q)) - goto not_supported; + if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q)) { + status = BLK_STS_AGAIN; + goto end_io; + } if (should_fail_bio(bio)) goto end_io; @@ -928,9 +938,16 @@ generic_make_request_checks(struct bio *bio) goto not_supported; break; case REQ_OP_ZONE_RESET: + case REQ_OP_ZONE_OPEN: + case REQ_OP_ZONE_CLOSE: + case REQ_OP_ZONE_FINISH: if (!blk_queue_is_zoned(q)) goto not_supported; break; + case REQ_OP_ZONE_RESET_ALL: + if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q)) + goto not_supported; + break; case REQ_OP_WRITE_ZEROES: if (!q->limits.max_write_zeroes_sectors) goto not_supported; @@ -1128,6 +1145,10 @@ EXPORT_SYMBOL_GPL(direct_make_request); */ blk_qc_t submit_bio(struct bio *bio) { + bool workingset_read = false; + unsigned long pflags; + blk_qc_t ret; + if (blkcg_punt_bio_submit(bio)) return BLK_QC_T_NONE; @@ -1146,6 +1167,8 @@ blk_qc_t submit_bio(struct bio *bio) if (op_is_write(bio_op(bio))) { count_vm_events(PGPGOUT, count); } else { + if (bio_flagged(bio, BIO_WORKINGSET)) + workingset_read = true; task_io_account_read(bio->bi_iter.bi_size); count_vm_events(PGPGIN, count); } @@ -1160,7 +1183,21 @@ blk_qc_t submit_bio(struct bio *bio) } } - return generic_make_request(bio); + /* + * If we're reading data that is part of the userspace + * workingset, count submission time as memory stall. When the + * device is congested, or the submitting cgroup IO-throttled, + * submission can be a significant part of overall IO time. + */ + if (workingset_read) + psi_memstall_enter(&pflags); + + ret = generic_make_request(bio); + + if (workingset_read) + psi_memstall_leave(&pflags); + + return ret; } EXPORT_SYMBOL(submit_bio); @@ -1276,7 +1313,7 @@ EXPORT_SYMBOL_GPL(blk_rq_err_bytes); void blk_account_io_completion(struct request *req, unsigned int bytes) { - if (blk_do_io_stat(req)) { + if (req->part && blk_do_io_stat(req)) { const int sgrp = op_stat_group(req_op(req)); struct hd_struct *part; @@ -1294,7 +1331,8 @@ void blk_account_io_done(struct request *req, u64 now) * normal IO on queueing nor completion. Accounting the * containing request is enough. */ - if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) { + if (req->part && blk_do_io_stat(req) && + !(req->rq_flags & RQF_FLUSH_SEQ)) { const int sgrp = op_stat_group(req_op(req)); struct hd_struct *part; @@ -1405,6 +1443,12 @@ bool blk_update_request(struct request *req, blk_status_t error, if (!req->bio) return false; +#ifdef CONFIG_BLK_DEV_INTEGRITY + if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ && + error == BLK_STS_OK) + req->q->integrity.profile->complete_fn(req, nr_bytes); +#endif + if (unlikely(error && !blk_rq_is_passthrough(req) && !(req->rq_flags & RQF_QUIET))) print_req_error(req, error, __func__); @@ -1752,9 +1796,9 @@ int __init blk_dev_init(void) { BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS)); BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * - FIELD_SIZEOF(struct request, cmd_flags)); + sizeof_field(struct request, cmd_flags)); BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * - FIELD_SIZEOF(struct bio, bi_opf)); + sizeof_field(struct bio, bi_opf)); /* used for unplugging and affects IO latency/throughput - HIGHPRI */ kblockd_workqueue = alloc_workqueue("kblockd", |