summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJens Axboe <jens.axboe@oracle.com>2008-09-14 05:55:09 -0700
committerJens Axboe <jens.axboe@oracle.com>2008-10-09 08:56:13 +0200
commit242f9dcb8ba6f68fcd217a119a7648a4f69290e9 (patch)
tree1bfe245ffbc50d204d76665cd8f90d85100f86a1 /block
parent608aeef17a91747d6303de4df5e2c2e6899a95e8 (diff)
downloadtalos-obmc-linux-242f9dcb8ba6f68fcd217a119a7648a4f69290e9.tar.gz
talos-obmc-linux-242f9dcb8ba6f68fcd217a119a7648a4f69290e9.zip
block: unify request timeout handling
Right now SCSI and others do their own command timeout handling. Move those bits to the block layer. Instead of having a timer per command, we try to be a bit more clever and simply have one per-queue. This avoids the overhead of having to tear down and setup a timer for each command, so it will result in a lot less timer fiddling. Signed-off-by: Mike Anderson <andmike@linux.vnet.ibm.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block')
-rw-r--r--block/Makefile4
-rw-r--r--block/blk-core.c7
-rw-r--r--block/blk-settings.c12
-rw-r--r--block/blk-softirq.c30
-rw-r--r--block/blk-timeout.c155
-rw-r--r--block/blk.h24
-rw-r--r--block/elevator.c8
7 files changed, 226 insertions, 14 deletions
diff --git a/block/Makefile b/block/Makefile
index 0da976ce67dd..bfe73049f939 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -4,8 +4,8 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
- blk-exec.o blk-merge.o blk-softirq.o ioctl.o genhd.o \
- scsi_ioctl.o cmd-filter.o
+ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
+ ioctl.o genhd.o scsi_ioctl.o cmd-filter.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-core.c b/block/blk-core.c
index f25eb9786d94..d768a8ddc173 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -110,6 +110,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
memset(rq, 0, sizeof(*rq));
INIT_LIST_HEAD(&rq->queuelist);
+ INIT_LIST_HEAD(&rq->timeout_list);
rq->cpu = -1;
rq->q = q;
rq->sector = rq->hard_sector = (sector_t) -1;
@@ -490,6 +491,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
}
init_timer(&q->unplug_timer);
+ setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
+ INIT_LIST_HEAD(&q->timeout_list);
kobject_init(&q->kobj, &blk_queue_ktype);
@@ -897,6 +900,8 @@ EXPORT_SYMBOL(blk_start_queueing);
*/
void blk_requeue_request(struct request_queue *q, struct request *rq)
{
+ blk_delete_timer(rq);
+ blk_clear_rq_complete(rq);
blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
if (blk_rq_tagged(rq))
@@ -1650,6 +1655,8 @@ static void end_that_request_last(struct request *req, int error)
{
struct gendisk *disk = req->rq_disk;
+ blk_delete_timer(req);
+
if (blk_rq_tagged(req))
blk_queue_end_tag(req->q, req);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d70692badcdb..1d0330d0b40a 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -77,6 +77,18 @@ void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
}
EXPORT_SYMBOL(blk_queue_softirq_done);
+void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
+{
+ q->rq_timeout = timeout;
+}
+EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
+
+void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
+{
+ q->rq_timed_out_fn = fn;
+}
+EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);
+
/**
* blk_queue_make_request - define an alternate make_request function for a device
* @q: the request queue for the device to be affected
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 3a1af551191e..7ab344afb16f 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -101,18 +101,7 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = {
.notifier_call = blk_cpu_notify,
};
-/**
- * blk_complete_request - end I/O on a request
- * @req: the request being processed
- *
- * Description:
- * Ends all I/O on a request. It does not handle partial completions,
- * unless the driver actually implements this in its completion callback
- * through requeueing. The actual completion happens out-of-order,
- * through a softirq handler. The user must have registered a completion
- * callback through blk_queue_softirq_done().
- **/
-void blk_complete_request(struct request *req)
+void __blk_complete_request(struct request *req)
{
struct request_queue *q = req->q;
unsigned long flags;
@@ -151,6 +140,23 @@ do_local:
local_irq_restore(flags);
}
+
+/**
+ * blk_complete_request - end I/O on a request
+ * @req: the request being processed
+ *
+ * Description:
+ * Ends all I/O on a request. It does not handle partial completions,
+ * unless the driver actually implements this in its completion callback
+ * through requeueing. The actual completion happens out-of-order,
+ * through a softirq handler. The user must have registered a completion
+ * callback through blk_queue_softirq_done().
+ **/
+void blk_complete_request(struct request *req)
+{
+ if (!blk_mark_rq_complete(req))
+ __blk_complete_request(req);
+}
EXPORT_SYMBOL(blk_complete_request);
__init int blk_softirq_init(void)
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
new file mode 100644
index 000000000000..b36d07bf0afb
--- /dev/null
+++ b/block/blk-timeout.c
@@ -0,0 +1,155 @@
+/*
+ * Functions related to generic timeout handling of requests.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+
+#include "blk.h"
+
+/*
+ * blk_delete_timer - Delete/cancel timer for a given function.
+ * @req: request that we are canceling timer for
+ *
+ */
+void blk_delete_timer(struct request *req)
+{
+ struct request_queue *q = req->q;
+
+ /*
+ * Nothing to detach
+ */
+ if (!q->rq_timed_out_fn || !req->deadline)
+ return;
+
+ list_del_init(&req->timeout_list);
+
+ if (list_empty(&q->timeout_list))
+ del_timer(&q->timeout);
+}
+
+static void blk_rq_timed_out(struct request *req)
+{
+ struct request_queue *q = req->q;
+ enum blk_eh_timer_return ret;
+
+ ret = q->rq_timed_out_fn(req);
+ switch (ret) {
+ case BLK_EH_HANDLED:
+ __blk_complete_request(req);
+ break;
+ case BLK_EH_RESET_TIMER:
+ blk_clear_rq_complete(req);
+ blk_add_timer(req);
+ break;
+ case BLK_EH_NOT_HANDLED:
+ /*
+ * LLD handles this for now but in the future
+ * we can send a request msg to abort the command
+ * and we can move more of the generic scsi eh code to
+ * the blk layer.
+ */
+ break;
+ default:
+ printk(KERN_ERR "block: bad eh return: %d\n", ret);
+ break;
+ }
+}
+
+void blk_rq_timed_out_timer(unsigned long data)
+{
+ struct request_queue *q = (struct request_queue *) data;
+ unsigned long flags, uninitialized_var(next), next_set = 0;
+ struct request *rq, *tmp;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+
+ list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) {
+ if (time_after_eq(jiffies, rq->deadline)) {
+ list_del_init(&rq->timeout_list);
+
+ /*
+ * Check if we raced with end io completion
+ */
+ if (blk_mark_rq_complete(rq))
+ continue;
+ blk_rq_timed_out(rq);
+ }
+ if (!next_set) {
+ next = rq->deadline;
+ next_set = 1;
+ } else if (time_after(next, rq->deadline))
+ next = rq->deadline;
+ }
+
+ if (next_set && !list_empty(&q->timeout_list))
+ mod_timer(&q->timeout, round_jiffies(next));
+
+ spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+/**
+ * blk_abort_request -- Request request recovery for the specified command
+ * @req: pointer to the request of interest
+ *
+ * This function requests that the block layer start recovery for the
+ * request by deleting the timer and calling the q's timeout function.
+ * LLDDs who implement their own error recovery MAY ignore the timeout
+ * event if they generated blk_abort_req. Must hold queue lock.
+ */
+void blk_abort_request(struct request *req)
+{
+ blk_delete_timer(req);
+ blk_rq_timed_out(req);
+}
+EXPORT_SYMBOL_GPL(blk_abort_request);
+
+/**
+ * blk_add_timer - Start timeout timer for a single request
+ * @req: request that is about to start running.
+ *
+ * Notes:
+ * Each request has its own timer, and as it is added to the queue, we
+ * set up the timer. When the request completes, we cancel the timer.
+ */
+void blk_add_timer(struct request *req)
+{
+ struct request_queue *q = req->q;
+ unsigned long expiry;
+
+ if (!q->rq_timed_out_fn)
+ return;
+
+ BUG_ON(!list_empty(&req->timeout_list));
+ BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
+
+ if (req->timeout)
+ req->deadline = jiffies + req->timeout;
+ else {
+ req->deadline = jiffies + q->rq_timeout;
+ /*
+ * Some LLDs, like scsi, peek at the timeout to prevent
+ * a command from being retried forever.
+ */
+ req->timeout = q->rq_timeout;
+ }
+ list_add_tail(&req->timeout_list, &q->timeout_list);
+
+ /*
+ * If the timer isn't already pending or this timeout is earlier
+ * than an existing one, modify the timer. Round to next nearest
+ * second.
+ */
+ expiry = round_jiffies(req->deadline);
+
+ /*
+ * We use ->deadline == 0 to detect whether a timer was added or
+ * not, so just increase to next jiffy for that specific case
+ */
+ if (unlikely(!req->deadline))
+ req->deadline = 1;
+
+ if (!timer_pending(&q->timeout) ||
+ time_before(expiry, q->timeout.expires))
+ mod_timer(&q->timeout, expiry);
+}
diff --git a/block/blk.h b/block/blk.h
index de74254cb916..a4f4a50aefaa 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -17,6 +17,30 @@ void __blk_queue_free_tags(struct request_queue *q);
void blk_unplug_work(struct work_struct *work);
void blk_unplug_timeout(unsigned long data);
+void blk_rq_timed_out_timer(unsigned long data);
+void blk_delete_timer(struct request *);
+void blk_add_timer(struct request *);
+
+/*
+ * Internal atomic flags for request handling
+ */
+enum rq_atomic_flags {
+ REQ_ATOM_COMPLETE = 0,
+};
+
+/*
+ * EH timer and IO completion will both attempt to 'grab' the request, make
+ * sure that only one of them suceeds
+ */
+static inline int blk_mark_rq_complete(struct request *rq)
+{
+ return test_and_set_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
+}
+
+static inline void blk_clear_rq_complete(struct request *rq)
+{
+ clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
+}
struct io_context *current_io_context(gfp_t gfp_flags, int node);
diff --git a/block/elevator.c b/block/elevator.c
index 8e3fc3afc77b..a91fc59edd01 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -36,6 +36,8 @@
#include <linux/hash.h>
#include <linux/uaccess.h>
+#include "blk.h"
+
static DEFINE_SPINLOCK(elv_list_lock);
static LIST_HEAD(elv_list);
@@ -771,6 +773,12 @@ struct request *elv_next_request(struct request_queue *q)
*/
rq->cmd_flags |= REQ_STARTED;
blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
+
+ /*
+ * We are now handing the request to the hardware,
+ * add the timeout handler
+ */
+ blk_add_timer(rq);
}
if (!q->boundary_rq || q->boundary_rq == rq) {
OpenPOWER on IntegriCloud