nvme: add keep-alive support

Periodic keep-alive is a mandatory feature in NVMe over Fabrics, and optional in NVMe 1.2.1 for PCIe. This patch adds periodic keep-alive sent from the host to verify that the controller is still responsive and vice-versa. The keep-alive timeout is user-defined (with keep_alive_tmo connection parameter) and defaults to 5 seconds. In order to avoid a race condition where the host sends a keep-alive competing with the target side keep-alive timeout expiration, the host adds a grace period of 10 seconds when publishing the keep-alive timeout to the target. In case a keep-alive failed (or timed out), a transport specific error recovery kicks in. For now only NVMe over Fabrics is wired up to support keep alive, but we can add PCIe support easily once controllers actually supporting it become available. Signed-off-by: Sagi Grimberg <sagi@grimberg.me> Reviewed-by: Steve Wise <swise@chelsio.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Jens Axboe <axboe@fb.com>
author: Sagi Grimberg <sagi@grimberg.me> 2016-06-13 16:45:28 +0200
committer: Jens Axboe <axboe@fb.com> 2016-07-05 11:28:20 -0600
commit: 038bd4cb6766c69b5b9c77507f389cc718a36842 (patch)
tree: f551977e651401dadf30bb47bc9756df5f922901 /drivers/nvme/host/fabrics.c
parent: 7b89eae29eec4dd9259acd82ed57bec8d9e430ca (diff)
download: talos-obmc-linux-038bd4cb6766c69b5b9c77507f389cc718a36842.tar.gz
talos-obmc-linux-038bd4cb6766c69b5b9c77507f389cc718a36842.zip
1 files changed, 32 insertions, 1 deletions
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index efa86d0e4c9f..b86b6379ef0c 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -360,6 +360,12 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
 	cmd.connect.fctype = nvme_fabrics_type_connect;
 	cmd.connect.qid = 0;
 	cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize);
+	/*
+	 * Set keep-alive timeout in seconds granularity (ms * 1000)
+	 * and add a grace period for controller kato enforcement
+	 */
+	cmd.connect.kato = ctrl->opts->discovery_nqn ? 0 :
+		cpu_to_le32((ctrl->kato + NVME_KATO_GRACE) * 1000);
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
@@ -499,6 +505,7 @@ static const match_table_t opt_tokens = {
 	{ NVMF_OPT_NR_IO_QUEUES,	"nr_io_queues=%d"	},
 	{ NVMF_OPT_TL_RETRY_COUNT,	"tl_retry_count=%d"	},
 	{ NVMF_OPT_RECONNECT_DELAY,	"reconnect_delay=%d"	},
+	{ NVMF_OPT_KATO,		"keep_alive_tmo=%d"	},
 	{ NVMF_OPT_HOSTNQN,		"hostnqn=%s"		},
 	{ NVMF_OPT_ERR,			NULL			}
 };
@@ -610,6 +617,28 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 			}
 			opts->tl_retry_count = token;
 			break;
+		case NVMF_OPT_KATO:
+			if (match_int(args, &token)) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			if (opts->discovery_nqn) {
+				pr_err("Discovery controllers cannot accept keep_alive_tmo != 0\n");
+				ret = -EINVAL;
+				goto out;
+			}
+
+			if (token < 0) {
+				pr_err("Invalid keep_alive_tmo %d\n", token);
+				ret = -EINVAL;
+				goto out;
+			} else if (token == 0) {
+				/* Allowed for debug */
+				pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n");
+			}
+			opts->kato = token;
+			break;
 		case NVMF_OPT_HOSTNQN:
 			if (opts->host) {
 				pr_err("hostnqn already user-assigned: %s\n",
@@ -661,6 +690,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 	}
 
 out:
+	if (!opts->discovery_nqn && !opts->kato)
+		opts->kato = NVME_DEFAULT_KATO;
 	kfree(options);
 	return ret;
 }
@@ -717,7 +748,7 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
 
 #define NVMF_REQUIRED_OPTS	(NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
 #define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
-				 NVMF_OPT_HOSTNQN)
+				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN)
 
 static struct nvme_ctrl *
 nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
author	Sagi Grimberg <sagi@grimberg.me>	2016-06-13 16:45:28 +0200
committer	Jens Axboe <axboe@fb.com>	2016-07-05 11:28:20 -0600
commit	038bd4cb6766c69b5b9c77507f389cc718a36842 (patch)
tree	f551977e651401dadf30bb47bc9756df5f922901 /drivers/nvme/host/fabrics.c
parent	7b89eae29eec4dd9259acd82ed57bec8d9e430ca (diff)
download	talos-obmc-linux-038bd4cb6766c69b5b9c77507f389cc718a36842.tar.gz talos-obmc-linux-038bd4cb6766c69b5b9c77507f389cc718a36842.zip