7 files changed, 178 insertions, 43 deletions
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index a5c31cbeb481..3a5b9d0576cb 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -15,8 +15,8 @@ config NVME_TARGET
 
 config NVME_TARGET_LOOP
 	tristate "NVMe loopback device support"
-	depends on BLK_DEV_NVME
 	depends on NVME_TARGET
+	select NVME_CORE
 	select NVME_FABRICS
 	select SG_POOL
 	help
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 2fac17a5ad53..7ab9c9381b98 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -13,8 +13,8 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
-#include <linux/random.h>
 #include <generated/utsrelease.h>
+#include <asm/unaligned.h>
 #include "nvmet.h"
 
 u32 nvmet_get_log_page_len(struct nvme_command *cmd)
@@ -30,8 +30,84 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd)
 	return len;
 }
 
+static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
+		struct nvme_smart_log *slog)
+{
+	u16 status;
+	struct nvmet_ns *ns;
+	u64 host_reads, host_writes, data_units_read, data_units_written;
+
+	status = NVME_SC_SUCCESS;
+	ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid);
+	if (!ns) {
+		status = NVME_SC_INVALID_NS;
+		pr_err("nvmet : Counld not find namespace id : %d\n",
+				le32_to_cpu(req->cmd->get_log_page.nsid));
+		goto out;
+	}
+
+	host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]);
+	data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]);
+	host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]);
+	data_units_written = part_stat_read(ns->bdev->bd_part, sectors[WRITE]);
+
+	put_unaligned_le64(host_reads, &slog->host_reads[0]);
+	put_unaligned_le64(data_units_read, &slog->data_units_read[0]);
+	put_unaligned_le64(host_writes, &slog->host_writes[0]);
+	put_unaligned_le64(data_units_written, &slog->data_units_written[0]);
+	nvmet_put_namespace(ns);
+out:
+	return status;
+}
+
+static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
+		struct nvme_smart_log *slog)
+{
+	u16 status;
+	u64 host_reads = 0, host_writes = 0;
+	u64 data_units_read = 0, data_units_written = 0;
+	struct nvmet_ns *ns;
+	struct nvmet_ctrl *ctrl;
+
+	status = NVME_SC_SUCCESS;
+	ctrl = req->sq->ctrl;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+		host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]);
+		data_units_read +=
+			part_stat_read(ns->bdev->bd_part, sectors[READ]);
+		host_writes += part_stat_read(ns->bdev->bd_part, ios[WRITE]);
+		data_units_written +=
+			part_stat_read(ns->bdev->bd_part, sectors[WRITE]);
+
+	}
+	rcu_read_unlock();
+
+	put_unaligned_le64(host_reads, &slog->host_reads[0]);
+	put_unaligned_le64(data_units_read, &slog->data_units_read[0]);
+	put_unaligned_le64(host_writes, &slog->host_writes[0]);
+	put_unaligned_le64(data_units_written, &slog->data_units_written[0]);
+
+	return status;
+}
+
+static u16 nvmet_get_smart_log(struct nvmet_req *req,
+		struct nvme_smart_log *slog)
+{
+	u16 status;
+
+	WARN_ON(req == NULL || slog == NULL);
+	if (req->cmd->get_log_page.nsid == 0xFFFFFFFF)
+		status = nvmet_get_smart_log_all(req, slog);
+	else
+		status = nvmet_get_smart_log_nsid(req, slog);
+	return status;
+}
+
 static void nvmet_execute_get_log_page(struct nvmet_req *req)
 {
+	struct nvme_smart_log *smart_log;
 	size_t data_len = nvmet_get_log_page_len(req->cmd);
 	void *buf;
 	u16 status = 0;
@@ -60,6 +136,16 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
 		 * available (e.g. units or commands read/written) those aren't
 		 * persistent over power loss.
 		 */
+		if (data_len != sizeof(*smart_log)) {
+			status = NVME_SC_INTERNAL;
+			goto err;
+		}
+		smart_log = buf;
+		status = nvmet_get_smart_log(req, smart_log);
+		if (status) {
+			memset(buf, '\0', data_len);
+			goto err;
+		}
 		break;
 	case 0x03:
 		/*
@@ -74,6 +160,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
 
 	status = nvmet_copy_to_sgl(req, 0, buf, data_len);
 
+err:
 	kfree(buf);
 out:
 	nvmet_req_complete(req, status);
@@ -83,7 +170,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 {
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	struct nvme_id_ctrl *id;
-	u64 serial;
 	u16 status = 0;
 
 	id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -96,10 +182,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	id->vid = 0;
 	id->ssvid = 0;
 
-	/* generate a random serial number as our controllers are ephemeral: */
-	get_random_bytes(&serial, sizeof(serial));
 	memset(id->sn, ' ', sizeof(id->sn));
-	snprintf(id->sn, sizeof(id->sn), "%llx", serial);
+	snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->serial);
 
 	memset(id->mn, ' ', sizeof(id->mn));
 	strncpy((char *)id->mn, "Linux", sizeof(id->mn));
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 8a891ca53367..6559d5afa7bf 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -13,6 +13,7 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
+#include <linux/random.h>
 #include "nvmet.h"
 
 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
@@ -728,6 +729,9 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 	memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
 	memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
 
+	/* generate a random serial number as our controllers are ephemeral: */
+	get_random_bytes(&ctrl->serial, sizeof(ctrl->serial));
+
 	kref_init(&ctrl->ref);
 	ctrl->subsys = subsys;
 
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index 2cd069b691ae..4a96c2049b7b 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -58,6 +58,7 @@ static void nvmet_execute_rw(struct nvmet_req *req)
 
 	if (req->cmd->rw.opcode == nvme_cmd_write) {
 		op = REQ_OP_WRITE;
+		op_flags = WRITE_ODIRECT;
 		if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
 			op_flags |= REQ_FUA;
 	} else {
@@ -205,7 +206,7 @@ int nvmet_parse_io_cmd(struct nvmet_req *req)
 		return 0;
 	case nvme_cmd_dsm:
 		req->execute = nvmet_execute_dsm;
-		req->data_len = le32_to_cpu(cmd->dsm.nr) *
+		req->data_len = le32_to_cpu(cmd->dsm.nr + 1) *
 			sizeof(struct nvme_dsm_range);
 		return 0;
 	default:
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 94e782987cc9..d5df77d686b2 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -273,7 +273,6 @@ static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 static struct blk_mq_ops nvme_loop_mq_ops = {
 	.queue_rq	= nvme_loop_queue_rq,
 	.complete	= nvme_loop_complete_rq,
-	.map_queue	= blk_mq_map_queue,
 	.init_request	= nvme_loop_init_request,
 	.init_hctx	= nvme_loop_init_hctx,
 	.timeout	= nvme_loop_timeout,
@@ -282,7 +281,6 @@ static struct blk_mq_ops nvme_loop_mq_ops = {
 static struct blk_mq_ops nvme_loop_admin_mq_ops = {
 	.queue_rq	= nvme_loop_queue_rq,
 	.complete	= nvme_loop_complete_rq,
-	.map_queue	= blk_mq_map_queue,
 	.init_request	= nvme_loop_init_admin_request,
 	.init_hctx	= nvme_loop_init_admin_hctx,
 	.timeout	= nvme_loop_timeout,
@@ -414,9 +412,8 @@ static void nvme_loop_del_ctrl_work(struct work_struct *work)
 	struct nvme_loop_ctrl *ctrl = container_of(work,
 				struct nvme_loop_ctrl, delete_work);
 
-	nvme_remove_namespaces(&ctrl->ctrl);
-	nvme_loop_shutdown_ctrl(ctrl);
 	nvme_uninit_ctrl(&ctrl->ctrl);
+	nvme_loop_shutdown_ctrl(ctrl);
 	nvme_put_ctrl(&ctrl->ctrl);
 }
 
@@ -501,7 +498,6 @@ out_free_queues:
 	nvme_loop_destroy_admin_queue(ctrl);
 out_disable:
 	dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
-	nvme_remove_namespaces(&ctrl->ctrl);
 	nvme_uninit_ctrl(&ctrl->ctrl);
 	nvme_put_ctrl(&ctrl->ctrl);
 }
@@ -558,7 +554,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
 
 	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
 	ctrl->tag_set.ops = &nvme_loop_mq_ops;
-	ctrl->tag_set.queue_depth = ctrl->ctrl.sqsize;
+	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
 	ctrl->tag_set.reserved_tags = 1; /* fabric connect */
 	ctrl->tag_set.numa_node = NUMA_NO_NODE;
 	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
@@ -622,7 +618,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 
 	ret = -ENOMEM;
 
-	ctrl->ctrl.sqsize = opts->queue_size;
+	ctrl->ctrl.sqsize = opts->queue_size - 1;
 	ctrl->ctrl.kato = opts->kato;
 
 	ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues),
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 57dd6d834c28..76b6eedccaf9 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -113,6 +113,7 @@ struct nvmet_ctrl {
 
 	struct mutex		lock;
 	u64			cap;
+	u64			serial;
 	u32			cc;
 	u32			csts;
 
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index e06d504bdf0c..f8d23999e0f2 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -77,6 +77,7 @@ enum nvmet_rdma_queue_state {
 	NVMET_RDMA_Q_CONNECTING,
 	NVMET_RDMA_Q_LIVE,
 	NVMET_RDMA_Q_DISCONNECTING,
+	NVMET_RDMA_IN_DEVICE_REMOVAL,
 };
 
 struct nvmet_rdma_queue {
@@ -615,15 +616,10 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
 	if (!len)
 		return 0;
 
-	/* use the already allocated data buffer if possible */
-	if (len <= NVMET_RDMA_INLINE_DATA_SIZE && rsp->queue->host_qid) {
-		nvmet_rdma_use_inline_sg(rsp, len, 0);
-	} else {
-		status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
-				len);
-		if (status)
-			return status;
-	}
+	status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
+			len);
+	if (status)
+		return status;
 
 	ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
 			rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
@@ -852,7 +848,7 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
 	ndev->device = cm_id->device;
 	kref_init(&ndev->ref);
 
-	ndev->pd = ib_alloc_pd(ndev->device);
+	ndev->pd = ib_alloc_pd(ndev->device, 0);
 	if (IS_ERR(ndev->pd))
 		goto out_free_dev;
 
@@ -982,9 +978,13 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w)
 		container_of(w, struct nvmet_rdma_queue, release_work);
 	struct rdma_cm_id *cm_id = queue->cm_id;
 	struct nvmet_rdma_device *dev = queue->dev;
+	enum nvmet_rdma_queue_state state = queue->state;
 
 	nvmet_rdma_free_queue(queue);
-	rdma_destroy_id(cm_id);
+
+	if (state != NVMET_RDMA_IN_DEVICE_REMOVAL)
+		rdma_destroy_id(cm_id);
+
 	kref_put(&dev->ref, nvmet_rdma_free_dev);
 }
 
@@ -1004,10 +1004,10 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
 	queue->host_qid = le16_to_cpu(req->qid);
 
 	/*
-	 * req->hsqsize corresponds to our recv queue size
+	 * req->hsqsize corresponds to our recv queue size plus 1
 	 * req->hrqsize corresponds to our send queue size
 	 */
-	queue->recv_queue_size = le16_to_cpu(req->hsqsize);
+	queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
 	queue->send_queue_size = le16_to_cpu(req->hrqsize);
 
 	if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH)
@@ -1233,8 +1233,9 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
 	switch (queue->state) {
 	case NVMET_RDMA_Q_CONNECTING:
 	case NVMET_RDMA_Q_LIVE:
-		disconnect = true;
 		queue->state = NVMET_RDMA_Q_DISCONNECTING;
+	case NVMET_RDMA_IN_DEVICE_REMOVAL:
+		disconnect = true;
 		break;
 	case NVMET_RDMA_Q_DISCONNECTING:
 		break;
@@ -1272,6 +1273,62 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
 	schedule_work(&queue->release_work);
 }
 
+/**
+ * nvme_rdma_device_removal() - Handle RDMA device removal
+ * @queue:      nvmet rdma queue (cm id qp_context)
+ * @addr:	nvmet address (cm_id context)
+ *
+ * DEVICE_REMOVAL event notifies us that the RDMA device is about
+ * to unplug so we should take care of destroying our RDMA resources.
+ * This event will be generated for each allocated cm_id.
+ *
+ * Note that this event can be generated on a normal queue cm_id
+ * and/or a device bound listener cm_id (where in this case
+ * queue will be null).
+ *
+ * we claim ownership on destroying the cm_id. For queues we move
+ * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
+ * we nullify the priv to prevent double cm_id destruction and destroying
+ * the cm_id implicitely by returning a non-zero rc to the callout.
+ */
+static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
+		struct nvmet_rdma_queue *queue)
+{
+	unsigned long flags;
+
+	if (!queue) {
+		struct nvmet_port *port = cm_id->context;
+
+		/*
+		 * This is a listener cm_id. Make sure that
+		 * future remove_port won't invoke a double
+		 * cm_id destroy. use atomic xchg to make sure
+		 * we don't compete with remove_port.
+		 */
+		if (xchg(&port->priv, NULL) != cm_id)
+			return 0;
+	} else {
+		/*
+		 * This is a queue cm_id. Make sure that
+		 * release queue will not destroy the cm_id
+		 * and schedule all ctrl queues removal (only
+		 * if the queue is not disconnecting already).
+		 */
+		spin_lock_irqsave(&queue->state_lock, flags);
+		if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
+			queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
+		spin_unlock_irqrestore(&queue->state_lock, flags);
+		nvmet_rdma_queue_disconnect(queue);
+		flush_scheduled_work();
+	}
+
+	/*
+	 * We need to return 1 so that the core will destroy
+	 * it's own ID.  What a great API design..
+	 */
+	return 1;
+}
+
 static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
 		struct rdma_cm_event *event)
 {
@@ -1294,20 +1351,11 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
 		break;
 	case RDMA_CM_EVENT_ADDR_CHANGE:
 	case RDMA_CM_EVENT_DISCONNECTED:
-	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
-		/*
-		 * We can get the device removal callback even for a
-		 * CM ID that we aren't actually using.  In that case
-		 * the context pointer is NULL, so we shouldn't try
-		 * to disconnect a non-existing queue.  But we also
-		 * need to return 1 so that the core will destroy
-		 * it's own ID.  What a great API design..
-		 */
-		if (queue)
-			nvmet_rdma_queue_disconnect(queue);
-		else
-			ret = 1;
+		nvmet_rdma_queue_disconnect(queue);
+		break;
+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
+		ret = nvmet_rdma_device_removal(cm_id, queue);
 		break;
 	case RDMA_CM_EVENT_REJECTED:
 	case RDMA_CM_EVENT_UNREACHABLE:
@@ -1396,9 +1444,10 @@ out_destroy_id:
 
 static void nvmet_rdma_remove_port(struct nvmet_port *port)
 {
-	struct rdma_cm_id *cm_id = port->priv;
+	struct rdma_cm_id *cm_id = xchg(&port->priv, NULL);
 
-	rdma_destroy_id(cm_id);
+	if (cm_id)
+		rdma_destroy_id(cm_id);
 }
 
 static struct nvmet_fabrics_ops nvmet_rdma_ops = {