summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilipp Reisner <philipp.reisner@linbit.com>2011-06-29 14:06:08 +0200
committerPhilipp Reisner <philipp.reisner@linbit.com>2012-05-09 15:10:45 +0200
commitdfa8bedbfe881caf6676704ab1aae18dfe8e430a (patch)
tree416102d8cd59c6be45b072c2ce123c6cb8f63f52
parent02ee8f95fadf7c94b3d28df436a095152f6392b2 (diff)
downloadblackbird-op-linux-dfa8bedbfe881caf6676704ab1aae18dfe8e430a.tar.gz
blackbird-op-linux-dfa8bedbfe881caf6676704ab1aae18dfe8e430a.zip
drbd: Implemented the disk-timeout option
When the disk-timeout is active, and it expires for a single request, we consider the local disk as D_FAILED. Note: With this change, I made both timeout based state transitions HARD state transitions. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r--drivers/block/drbd/drbd_main.c5
-rw-r--r--drivers/block/drbd/drbd_receiver.c2
-rw-r--r--drivers/block/drbd/drbd_req.c32
-rw-r--r--include/linux/drbd_limits.h5
-rw-r--r--include/linux/drbd_nl.h1
5 files changed, 31 insertions, 14 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index bc8a8a7556da..4bd636524dd1 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1404,6 +1404,9 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
/* Here we have the actions that are performed after a
state change. This function might sleep */
+ if (os.disk <= D_NEGOTIATING && ns.disk > D_NEGOTIATING)
+ mod_timer(&mdev->request_timer, jiffies + HZ);
+
nsm.i = -1;
if (ns.susp_nod) {
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
@@ -3318,6 +3321,8 @@ static void drbd_delete_device(unsigned int minor)
if (!mdev)
return;
+ del_timer_sync(&mdev->request_timer);
+
/* paranoia asserts */
if (mdev->open_cnt != 0)
dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt,
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e7ed0aa93a16..a85bbe1bbc2b 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3803,8 +3803,6 @@ static void drbd_disconnect(struct drbd_conf *mdev)
atomic_set(&mdev->rs_pending_cnt, 0);
wake_up(&mdev->misc_wait);
- del_timer(&mdev->request_timer);
-
/* make sure syncer is stopped and w_resume_next_sg queued */
del_timer_sync(&mdev->resync_timer);
resync_timer_fn((unsigned long)mdev);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 1a8aac4b0c2f..ef145e33a647 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1208,13 +1208,19 @@ void request_timer_fn(unsigned long data)
struct drbd_conf *mdev = (struct drbd_conf *) data;
struct drbd_request *req; /* oldest request */
struct list_head *le;
- unsigned long et = 0; /* effective timeout = ko_count * timeout */
+ unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
if (get_net_conf(mdev)) {
- et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count;
+ ent = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count;
put_net_conf(mdev);
}
- if (!et || mdev->state.conn < C_WF_REPORT_PARAMS)
+ if (get_ldev(mdev)) {
+ dt = mdev->ldev->dc.disk_timeout * HZ / 10;
+ put_ldev(mdev);
+ }
+ et = min_not_zero(dt, ent);
+
+ if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED))
return; /* Recurring timer stopped */
spin_lock_irq(&mdev->req_lock);
@@ -1227,17 +1233,19 @@ void request_timer_fn(unsigned long data)
le = le->prev;
req = list_entry(le, struct drbd_request, tl_requests);
- if (time_is_before_eq_jiffies(req->start_time + et)) {
- if (req->rq_state & RQ_NET_PENDING) {
+ if (ent && req->rq_state & RQ_NET_PENDING) {
+ if (time_is_before_eq_jiffies(req->start_time + ent)) {
dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
- _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL);
- } else {
- dev_warn(DEV, "Local backing block device frozen?\n");
- mod_timer(&mdev->request_timer, jiffies + et);
+ _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
}
- } else {
- mod_timer(&mdev->request_timer, req->start_time + et);
}
-
+ if (dt && req->rq_state & RQ_LOCAL_PENDING) {
+ if (time_is_before_eq_jiffies(req->start_time + dt)) {
+ dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
+ __drbd_chk_io_error(mdev, 1);
+ }
+ }
+ nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et;
spin_unlock_irq(&mdev->req_lock);
+ mod_timer(&mdev->request_timer, nt);
}
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 928c84dfaf42..fb670bf603f7 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -48,6 +48,11 @@
#define DRBD_TIMEOUT_MAX 600
#define DRBD_TIMEOUT_DEF 60 /* 6 seconds */
+ /* If backing disk takes longer than disk_timeout, mark the disk as failed */
+#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */
+#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */
+#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */
+
/* active connection retries when C_WF_CONNECTION */
#define DRBD_CONNECT_INT_MIN 1
#define DRBD_CONNECT_INT_MAX 120
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index 7203c9ead233..a8706f08ab36 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -31,6 +31,7 @@ NL_PACKET(disk_conf, 3,
NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs)
NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier)
NL_BIT( 58, T_MAY_IGNORE, no_disk_drain)
+ NL_INTEGER( 89, T_MAY_IGNORE, disk_timeout)
)
NL_PACKET(detach, 4,
OpenPOWER on IntegriCloud