summaryrefslogtreecommitdiffstats
path: root/block/blk-core.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-core.c')
-rw-r--r--block/blk-core.c80
1 files changed, 62 insertions, 18 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index d0cc6e14d2f0..089e890ab208 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,8 +34,10 @@
#include <linux/ratelimit.h>
#include <linux/pm_runtime.h>
#include <linux/blk-cgroup.h>
+#include <linux/t10-pi.h>
#include <linux/debugfs.h>
#include <linux/bpf.h>
+#include <linux/psi.h>
#define CREATE_TRACE_POINTS
#include <trace/events/block.h>
@@ -129,6 +131,10 @@ static const char *const blk_op_name[] = {
REQ_OP_NAME(DISCARD),
REQ_OP_NAME(SECURE_ERASE),
REQ_OP_NAME(ZONE_RESET),
+ REQ_OP_NAME(ZONE_RESET_ALL),
+ REQ_OP_NAME(ZONE_OPEN),
+ REQ_OP_NAME(ZONE_CLOSE),
+ REQ_OP_NAME(ZONE_FINISH),
REQ_OP_NAME(WRITE_SAME),
REQ_OP_NAME(WRITE_ZEROES),
REQ_OP_NAME(SCSI_IN),
@@ -333,18 +339,19 @@ EXPORT_SYMBOL_GPL(blk_set_queue_dying);
*/
void blk_cleanup_queue(struct request_queue *q)
{
+ WARN_ON_ONCE(blk_queue_registered(q));
+
/* mark @q DYING, no new request or merges will be allowed afterwards */
- mutex_lock(&q->sysfs_lock);
blk_set_queue_dying(q);
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_DYING, q);
- mutex_unlock(&q->sysfs_lock);
/*
* Drain all requests queued before DYING marking. Set DEAD flag to
- * prevent that q->request_fn() gets invoked after draining finished.
+ * prevent that blk_mq_run_hw_queues() accesses the hardware queues
+ * after draining finished.
*/
blk_freeze_queue(q);
@@ -479,7 +486,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
if (!q)
return NULL;
- INIT_LIST_HEAD(&q->queue_head);
q->last_merge = NULL;
q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
@@ -518,6 +524,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
mutex_init(&q->blk_trace_mutex);
#endif
mutex_init(&q->sysfs_lock);
+ mutex_init(&q->sysfs_dir_lock);
spin_lock_init(&q->queue_lock);
init_waitqueue_head(&q->mq_freeze_wq);
@@ -601,6 +608,7 @@ bool bio_attempt_back_merge(struct request *req, struct bio *bio,
return false;
trace_block_bio_backmerge(req->q, req, bio);
+ rq_qos_merge(req->q, req, bio);
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
blk_rq_set_mixed_merge(req);
@@ -622,6 +630,7 @@ bool bio_attempt_front_merge(struct request *req, struct bio *bio,
return false;
trace_block_bio_frontmerge(req->q, req, bio);
+ rq_qos_merge(req->q, req, bio);
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
blk_rq_set_mixed_merge(req);
@@ -647,6 +656,8 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
blk_rq_get_max_sectors(req, blk_rq_pos(req)))
goto no_merge;
+ rq_qos_merge(q, req, bio);
+
req->biotail->bi_next = bio;
req->biotail = bio;
req->__data_len += bio->bi_iter.bi_size;
@@ -840,11 +851,7 @@ static inline int blk_partition_remap(struct bio *bio)
if (unlikely(bio_check_ro(bio, p)))
goto out;
- /*
- * Zone reset does not include bi_size so bio_sectors() is always 0.
- * Include a test for the reset op code and perform the remap if needed.
- */
- if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
+ if (bio_sectors(bio)) {
if (bio_check_eod(bio, part_nr_sects_read(p)))
goto out;
bio->bi_iter.bi_sector += p->start_sect;
@@ -878,11 +885,14 @@ generic_make_request_checks(struct bio *bio)
}
/*
- * For a REQ_NOWAIT based request, return -EOPNOTSUPP
- * if queue is not a request based queue.
+ * Non-mq queues do not honor REQ_NOWAIT, so complete a bio
+ * with BLK_STS_AGAIN status in order to catch -EAGAIN and
+ * to give a chance to the caller to repeat request gracefully.
*/
- if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q))
- goto not_supported;
+ if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q)) {
+ status = BLK_STS_AGAIN;
+ goto end_io;
+ }
if (should_fail_bio(bio))
goto end_io;
@@ -928,9 +938,16 @@ generic_make_request_checks(struct bio *bio)
goto not_supported;
break;
case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
if (!blk_queue_is_zoned(q))
goto not_supported;
break;
+ case REQ_OP_ZONE_RESET_ALL:
+ if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q))
+ goto not_supported;
+ break;
case REQ_OP_WRITE_ZEROES:
if (!q->limits.max_write_zeroes_sectors)
goto not_supported;
@@ -1128,6 +1145,10 @@ EXPORT_SYMBOL_GPL(direct_make_request);
*/
blk_qc_t submit_bio(struct bio *bio)
{
+ bool workingset_read = false;
+ unsigned long pflags;
+ blk_qc_t ret;
+
if (blkcg_punt_bio_submit(bio))
return BLK_QC_T_NONE;
@@ -1146,6 +1167,8 @@ blk_qc_t submit_bio(struct bio *bio)
if (op_is_write(bio_op(bio))) {
count_vm_events(PGPGOUT, count);
} else {
+ if (bio_flagged(bio, BIO_WORKINGSET))
+ workingset_read = true;
task_io_account_read(bio->bi_iter.bi_size);
count_vm_events(PGPGIN, count);
}
@@ -1160,7 +1183,21 @@ blk_qc_t submit_bio(struct bio *bio)
}
}
- return generic_make_request(bio);
+ /*
+ * If we're reading data that is part of the userspace
+ * workingset, count submission time as memory stall. When the
+ * device is congested, or the submitting cgroup IO-throttled,
+ * submission can be a significant part of overall IO time.
+ */
+ if (workingset_read)
+ psi_memstall_enter(&pflags);
+
+ ret = generic_make_request(bio);
+
+ if (workingset_read)
+ psi_memstall_leave(&pflags);
+
+ return ret;
}
EXPORT_SYMBOL(submit_bio);
@@ -1276,7 +1313,7 @@ EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
void blk_account_io_completion(struct request *req, unsigned int bytes)
{
- if (blk_do_io_stat(req)) {
+ if (req->part && blk_do_io_stat(req)) {
const int sgrp = op_stat_group(req_op(req));
struct hd_struct *part;
@@ -1294,7 +1331,8 @@ void blk_account_io_done(struct request *req, u64 now)
* normal IO on queueing nor completion. Accounting the
* containing request is enough.
*/
- if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
+ if (req->part && blk_do_io_stat(req) &&
+ !(req->rq_flags & RQF_FLUSH_SEQ)) {
const int sgrp = op_stat_group(req_op(req));
struct hd_struct *part;
@@ -1405,6 +1443,12 @@ bool blk_update_request(struct request *req, blk_status_t error,
if (!req->bio)
return false;
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
+ error == BLK_STS_OK)
+ req->q->integrity.profile->complete_fn(req, nr_bytes);
+#endif
+
if (unlikely(error && !blk_rq_is_passthrough(req) &&
!(req->rq_flags & RQF_QUIET)))
print_req_error(req, error, __func__);
@@ -1752,9 +1796,9 @@ int __init blk_dev_init(void)
{
BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
- FIELD_SIZEOF(struct request, cmd_flags));
+ sizeof_field(struct request, cmd_flags));
BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
- FIELD_SIZEOF(struct bio, bi_opf));
+ sizeof_field(struct bio, bi_opf));
/* used for unplugging and affects IO latency/throughput - HIGHPRI */
kblockd_workqueue = alloc_workqueue("kblockd",
OpenPOWER on IntegriCloud