Merge branch 'for-4.4/io-poll' of git://git.kernel.dk/linux-block

Pull block IO poll support from Jens Axboe: "Various groups have been doing experimentation around IO polling for (really) fast devices. The code has been reviewed and has been sitting on the side for a few releases, but this is now good enough for coordinated benchmarking and further experimentation. Currently O_DIRECT sync read/write are supported. A framework is in the works that allows scalable stats tracking so we can auto-tune this. And we'll add libaio support as well soon. Fow now, it's an opt-in feature for test purposes" * 'for-4.4/io-poll' of git://git.kernel.dk/linux-block: direct-io: be sure to assign dio->bio_bdev for both paths directio: add block polling support NVMe: add blk polling support block: add block polling support blk-mq: return tag/queue combo in the make_request_fn handlers block: change ->make_request_fn() and users to return a queue cookie
author: Linus Torvalds <torvalds@linux-foundation.org> 2015-11-10 17:23:49 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-11-10 17:23:49 -0800
commit: 3419b45039c6b799c974a8019361c045e7ca232c (patch)
tree: 36a63602036cc50f34fadcbd5d5d8fca94e44297 /block
parent: 01504f5e9e071f1dde1062e3be15f54d4555308f (diff)
parent: c1c534609fe8a859f9c8108a5591e6e8a97e34d1 (diff)
download: blackbird-op-linux-3419b45039c6b799c974a8019361c045e7ca232c.tar.gz
blackbird-op-linux-3419b45039c6b799c974a8019361c045e7ca232c.zip
4 files changed, 138 insertions, 33 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 590cca21c24a..2bbf08cd2900 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -809,7 +809,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 }
 EXPORT_SYMBOL(blk_init_queue_node);
 
-static void blk_queue_bio(struct request_queue *q, struct bio *bio);
+static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
 
 struct request_queue *
 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
@@ -1678,7 +1678,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 	blk_rq_bio_prep(req->q, req, bio);
 }
 
-static void blk_queue_bio(struct request_queue *q, struct bio *bio)
+static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 {
 	const bool sync = !!(bio->bi_rw & REQ_SYNC);
 	struct blk_plug *plug;
@@ -1698,7 +1698,7 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio)
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio->bi_error = -EIO;
 		bio_endio(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
@@ -1713,7 +1713,7 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio)
 	 */
 	if (!blk_queue_nomerges(q)) {
 		if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
-			return;
+			return BLK_QC_T_NONE;
 	} else
 		request_count = blk_plug_queued_count(q);
 
@@ -1791,6 +1791,8 @@ get_rq:
 out_unlock:
 		spin_unlock_irq(q->queue_lock);
 	}
+
+	return BLK_QC_T_NONE;
 }
 
 /*
@@ -1996,12 +1998,13 @@ end_io:
  * a lower device by calling into generic_make_request recursively, which
  * means the bio should NOT be touched after the call to ->make_request_fn.
  */
-void generic_make_request(struct bio *bio)
+blk_qc_t generic_make_request(struct bio *bio)
 {
 	struct bio_list bio_list_on_stack;
+	blk_qc_t ret = BLK_QC_T_NONE;
 
 	if (!generic_make_request_checks(bio))
-		return;
+		goto out;
 
 	/*
 	 * We only want one ->make_request_fn to be active at a time, else
@@ -2015,7 +2018,7 @@ void generic_make_request(struct bio *bio)
 	 */
 	if (current->bio_list) {
 		bio_list_add(current->bio_list, bio);
-		return;
+		goto out;
 	}
 
 	/* following loop may be a bit non-obvious, and so deserves some
@@ -2040,7 +2043,7 @@ void generic_make_request(struct bio *bio)
 
 		if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) {
 
-			q->make_request_fn(q, bio);
+			ret = q->make_request_fn(q, bio);
 
 			blk_queue_exit(q);
 
@@ -2053,6 +2056,9 @@ void generic_make_request(struct bio *bio)
 		}
 	} while (bio);
 	current->bio_list = NULL; /* deactivate */
+
+out:
+	return ret;
 }
 EXPORT_SYMBOL(generic_make_request);
 
@@ -2066,7 +2072,7 @@ EXPORT_SYMBOL(generic_make_request);
  * interfaces; @bio must be presetup and ready for I/O.
  *
  */
-void submit_bio(int rw, struct bio *bio)
+blk_qc_t submit_bio(int rw, struct bio *bio)
 {
 	bio->bi_rw |= rw;
 
@@ -2100,7 +2106,7 @@ void submit_bio(int rw, struct bio *bio)
 		}
 	}
 
-	generic_make_request(bio);
+	return generic_make_request(bio);
 }
 EXPORT_SYMBOL(submit_bio);
 
@@ -3306,6 +3312,47 @@ void blk_finish_plug(struct blk_plug *plug)
 }
 EXPORT_SYMBOL(blk_finish_plug);
 
+bool blk_poll(struct request_queue *q, blk_qc_t cookie)
+{
+	struct blk_plug *plug;
+	long state;
+
+	if (!q->mq_ops || !q->mq_ops->poll || !blk_qc_t_valid(cookie) ||
+	    !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+		return false;
+
+	plug = current->plug;
+	if (plug)
+		blk_flush_plug_list(plug, false);
+
+	state = current->state;
+	while (!need_resched()) {
+		unsigned int queue_num = blk_qc_t_to_queue_num(cookie);
+		struct blk_mq_hw_ctx *hctx = q->queue_hw_ctx[queue_num];
+		int ret;
+
+		hctx->poll_invoked++;
+
+		ret = q->mq_ops->poll(hctx, blk_qc_t_to_tag(cookie));
+		if (ret > 0) {
+			hctx->poll_success++;
+			set_current_state(TASK_RUNNING);
+			return true;
+		}
+
+		if (signal_pending_state(state, current))
+			set_current_state(TASK_RUNNING);
+
+		if (current->state == TASK_RUNNING)
+			return true;
+		if (ret < 0)
+			break;
+		cpu_relax();
+	}
+
+	return false;
+}
+
 #ifdef CONFIG_PM
 /**
  * blk_pm_runtime_init - Block layer runtime PM initialization routine
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 6f57a110289c..1cf18784c5cf 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -174,6 +174,11 @@ static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page)
 	return ret;
 }
 
+static ssize_t blk_mq_hw_sysfs_poll_show(struct blk_mq_hw_ctx *hctx, char *page)
+{
+	return sprintf(page, "invoked=%lu, success=%lu\n", hctx->poll_invoked, hctx->poll_success);
+}
+
 static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx,
 					   char *page)
 {
@@ -295,6 +300,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
 	.attr = {.name = "cpu_list", .mode = S_IRUGO },
 	.show = blk_mq_hw_sysfs_cpus_show,
 };
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = {
+	.attr = {.name = "io_poll", .mode = S_IRUGO },
+	.show = blk_mq_hw_sysfs_poll_show,
+};
 
 static struct attribute *default_hw_ctx_attrs[] = {
 	&blk_mq_hw_sysfs_queued.attr,
@@ -304,6 +313,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
 	&blk_mq_hw_sysfs_tags.attr,
 	&blk_mq_hw_sysfs_cpus.attr,
 	&blk_mq_hw_sysfs_active.attr,
+	&blk_mq_hw_sysfs_poll.attr,
 	NULL,
 };
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 694f8703f83c..86bd5b25288e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1198,7 +1198,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
 	return rq;
 }
 
-static int blk_mq_direct_issue_request(struct request *rq)
+static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie)
 {
 	int ret;
 	struct request_queue *q = rq->q;
@@ -1209,6 +1209,7 @@ static int blk_mq_direct_issue_request(struct request *rq)
 		.list = NULL,
 		.last = 1
 	};
+	blk_qc_t new_cookie = blk_tag_to_qc_t(rq->tag, hctx->queue_num);
 
 	/*
 	 * For OK queue, we are done. For error, kill it. Any other
@@ -1216,18 +1217,21 @@ static int blk_mq_direct_issue_request(struct request *rq)
 	 * would have done
 	 */
 	ret = q->mq_ops->queue_rq(hctx, &bd);
-	if (ret == BLK_MQ_RQ_QUEUE_OK)
+	if (ret == BLK_MQ_RQ_QUEUE_OK) {
+		*cookie = new_cookie;
 		return 0;
-	else {
-		__blk_mq_requeue_request(rq);
+	}
 
-		if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
-			rq->errors = -EIO;
-			blk_mq_end_request(rq, rq->errors);
-			return 0;
-		}
-		return -1;
+	__blk_mq_requeue_request(rq);
+
+	if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
+		*cookie = BLK_QC_T_NONE;
+		rq->errors = -EIO;
+		blk_mq_end_request(rq, rq->errors);
+		return 0;
 	}
+
+	return -1;
 }
 
 /*
@@ -1235,7 +1239,7 @@ static int blk_mq_direct_issue_request(struct request *rq)
  * but will attempt to bypass the hctx queueing if we can go straight to
  * hardware for SYNC IO.
  */
-static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = rw_is_sync(bio->bi_rw);
 	const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
@@ -1244,12 +1248,13 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	unsigned int request_count = 0;
 	struct blk_plug *plug;
 	struct request *same_queue_rq = NULL;
+	blk_qc_t cookie;
 
 	blk_queue_bounce(q, &bio);
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio_io_error(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	blk_queue_split(q, &bio, q->bio_split);
@@ -1257,13 +1262,15 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	if (!is_flush_fua && !blk_queue_nomerges(q)) {
 		if (blk_attempt_plug_merge(q, bio, &request_count,
 					   &same_queue_rq))
-			return;
+			return BLK_QC_T_NONE;
 	} else
 		request_count = blk_plug_queued_count(q);
 
 	rq = blk_mq_map_request(q, bio, &data);
 	if (unlikely(!rq))
-		return;
+		return BLK_QC_T_NONE;
+
+	cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
 
 	if (unlikely(is_flush_fua)) {
 		blk_mq_bio_to_request(rq, bio);
@@ -1302,11 +1309,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 			old_rq = rq;
 		blk_mq_put_ctx(data.ctx);
 		if (!old_rq)
-			return;
-		if (!blk_mq_direct_issue_request(old_rq))
-			return;
+			goto done;
+		if (!blk_mq_direct_issue_request(old_rq, &cookie))
+			goto done;
 		blk_mq_insert_request(old_rq, false, true, true);
-		return;
+		goto done;
 	}
 
 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1320,13 +1327,15 @@ run_queue:
 		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
 	}
 	blk_mq_put_ctx(data.ctx);
+done:
+	return cookie;
 }
 
 /*
  * Single hardware queue variant. This will attempt to use any per-process
  * plug for merging and IO deferral.
  */
-static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = rw_is_sync(bio->bi_rw);
 	const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
@@ -1334,23 +1343,26 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
 	unsigned int request_count = 0;
 	struct blk_map_ctx data;
 	struct request *rq;
+	blk_qc_t cookie;
 
 	blk_queue_bounce(q, &bio);
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio_io_error(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	blk_queue_split(q, &bio, q->bio_split);
 
 	if (!is_flush_fua && !blk_queue_nomerges(q) &&
 	    blk_attempt_plug_merge(q, bio, &request_count, NULL))
-		return;
+		return BLK_QC_T_NONE;
 
 	rq = blk_mq_map_request(q, bio, &data);
 	if (unlikely(!rq))
-		return;
+		return BLK_QC_T_NONE;
+
+	cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
 
 	if (unlikely(is_flush_fua)) {
 		blk_mq_bio_to_request(rq, bio);
@@ -1374,7 +1386,7 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
 		}
 		list_add_tail(&rq->queuelist, &plug->mq_list);
 		blk_mq_put_ctx(data.ctx);
-		return;
+		return cookie;
 	}
 
 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1389,6 +1401,7 @@ run_queue:
 	}
 
 	blk_mq_put_ctx(data.ctx);
+	return cookie;
 }
 
 /*
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 31849e328b45..565b8dac5782 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -317,6 +317,34 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
 	return ret;
 }
 
+static ssize_t queue_poll_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(test_bit(QUEUE_FLAG_POLL, &q->queue_flags), page);
+}
+
+static ssize_t queue_poll_store(struct request_queue *q, const char *page,
+				size_t count)
+{
+	unsigned long poll_on;
+	ssize_t ret;
+
+	if (!q->mq_ops || !q->mq_ops->poll)
+		return -EINVAL;
+
+	ret = queue_var_store(&poll_on, page, count);
+	if (ret < 0)
+		return ret;
+
+	spin_lock_irq(q->queue_lock);
+	if (poll_on)
+		queue_flag_set(QUEUE_FLAG_POLL, q);
+	else
+		queue_flag_clear(QUEUE_FLAG_POLL, q);
+	spin_unlock_irq(q->queue_lock);
+
+	return ret;
+}
+
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_requests_show,
@@ -442,6 +470,12 @@ static struct queue_sysfs_entry queue_random_entry = {
 	.store = queue_store_random,
 };
 
+static struct queue_sysfs_entry queue_poll_entry = {
+	.attr = {.name = "io_poll", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_poll_show,
+	.store = queue_poll_store,
+};
+
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
@@ -466,6 +500,7 @@ static struct attribute *default_attrs[] = {
 	&queue_rq_affinity_entry.attr,
 	&queue_iostats_entry.attr,
 	&queue_random_entry.attr,
+	&queue_poll_entry.attr,
 	NULL,
 };
author	Linus Torvalds <torvalds@linux-foundation.org>	2015-11-10 17:23:49 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-11-10 17:23:49 -0800
commit	3419b45039c6b799c974a8019361c045e7ca232c (patch)
tree	36a63602036cc50f34fadcbd5d5d8fca94e44297 /block
parent	01504f5e9e071f1dde1062e3be15f54d4555308f (diff)
parent	c1c534609fe8a859f9c8108a5591e6e8a97e34d1 (diff)
download	blackbird-op-linux-3419b45039c6b799c974a8019361c045e7ca232c.tar.gz blackbird-op-linux-3419b45039c6b799c974a8019361c045e7ca232c.zip