summaryrefslogtreecommitdiffstats
path: root/drivers/nvme
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/Kconfig12
-rw-r--r--drivers/nvme/host/Makefile1
-rw-r--r--drivers/nvme/host/core.c502
-rw-r--r--drivers/nvme/host/fabrics.c38
-rw-r--r--drivers/nvme/host/fabrics.h6
-rw-r--r--drivers/nvme/host/fc.c160
-rw-r--r--drivers/nvme/host/hwmon.c256
-rw-r--r--drivers/nvme/host/lightnvm.c45
-rw-r--r--drivers/nvme/host/multipath.c33
-rw-r--r--drivers/nvme/host/nvme.h87
-rw-r--r--drivers/nvme/host/pci.c294
-rw-r--r--drivers/nvme/host/rdma.c124
-rw-r--r--drivers/nvme/host/tcp.c172
-rw-r--r--drivers/nvme/host/trace.c18
-rw-r--r--drivers/nvme/target/admin-cmd.c165
-rw-r--r--drivers/nvme/target/core.c100
-rw-r--r--drivers/nvme/target/discovery.c74
-rw-r--r--drivers/nvme/target/fabrics-cmd.c30
-rw-r--r--drivers/nvme/target/fc.c31
-rw-r--r--drivers/nvme/target/fcloop.c1
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c59
-rw-r--r--drivers/nvme/target/io-cmd-file.c20
-rw-r--r--drivers/nvme/target/loop.c47
-rw-r--r--drivers/nvme/target/nvmet.h11
-rw-r--r--drivers/nvme/target/rdma.c8
-rw-r--r--drivers/nvme/target/tcp.c26
-rw-r--r--drivers/nvme/target/trace.c18
27 files changed, 1643 insertions, 695 deletions
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index ec43ac9199e2..b9358db83e96 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config NVME_CORE
tristate
+ select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY
config BLK_DEV_NVME
tristate "NVM Express block device"
@@ -23,6 +24,16 @@ config NVME_MULTIPATH
/dev/nvmeXnY device will show up for each NVMe namespaces,
even if it is accessible through multiple controllers.
+config NVME_HWMON
+ bool "NVMe hardware monitoring"
+ depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON)
+ help
+ This provides support for NVMe hardware monitoring. If enabled,
+ a hardware monitoring device will be created for each NVMe drive
+ in the system.
+
+ If unsure, say N.
+
config NVME_FABRICS
tristate
@@ -64,6 +75,7 @@ config NVME_TCP
depends on INET
depends on BLK_DEV_NVME
select NVME_FABRICS
+ select CRYPTO_CRC32C
help
This provides support for the NVMe over Fabrics protocol using
the TCP transport. This allows you to use remote block devices
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 8a4b671c5f0c..fc7b26be692d 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -14,6 +14,7 @@ nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
+nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
nvme-y += pci.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c258a1ce4b28..ada59df642d2 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -22,12 +22,12 @@
#include <linux/pm_qos.h>
#include <asm/unaligned.h>
-#define CREATE_TRACE_POINTS
-#include "trace.h"
-
#include "nvme.h"
#include "fabrics.h"
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
#define NVME_MINORS (1U << MINORBITS)
unsigned int admin_timeout = 60;
@@ -66,8 +66,8 @@ MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
* nvme_reset_wq - hosts nvme reset works
* nvme_delete_wq - hosts nvme delete works
*
- * nvme_wq will host works such are scan, aen handling, fw activation,
- * keep-alive error recovery, periodic reconnects etc. nvme_reset_wq
+ * nvme_wq will host works such as scan, aen handling, fw activation,
+ * keep-alive, periodic reconnects etc. nvme_reset_wq
* runs reset works which also flush works hosted on nvme_wq for
* serialization purposes. nvme_delete_wq host controller deletion
* works which flush reset works for serialization.
@@ -81,7 +81,6 @@ EXPORT_SYMBOL_GPL(nvme_reset_wq);
struct workqueue_struct *nvme_delete_wq;
EXPORT_SYMBOL_GPL(nvme_delete_wq);
-static DEFINE_IDA(nvme_subsystems_ida);
static LIST_HEAD(nvme_subsystems);
static DEFINE_MUTEX(nvme_subsystems_lock);
@@ -103,10 +102,13 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
*/
if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
return;
- revalidate_disk(ns->disk);
blk_set_queue_dying(ns->queue);
/* Forcibly unquiesce queues to avoid blocking dispatch */
blk_mq_unquiesce_queue(ns->queue);
+ /*
+ * Revalidate after unblocking dispatchers that may be holding bd_butex
+ */
+ revalidate_disk(ns->disk);
}
static void nvme_queue_scan(struct nvme_ctrl *ctrl)
@@ -114,10 +116,26 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl)
/*
* Only new queue scan work when admin and IO queues are both alive
*/
- if (ctrl->state == NVME_CTRL_LIVE)
+ if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
queue_work(nvme_wq, &ctrl->scan_work);
}
+/*
+ * Use this function to proceed with scheduling reset_work for a controller
+ * that had previously been set to the resetting state. This is intended for
+ * code paths that can't be interrupted by other reset attempts. A hot removal
+ * may prevent this from succeeding.
+ */
+int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
+{
+ if (ctrl->state != NVME_CTRL_RESETTING)
+ return -EBUSY;
+ if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
+ return -EBUSY;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_try_sched_reset);
+
int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@@ -135,8 +153,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
ret = nvme_reset_ctrl(ctrl);
if (!ret) {
flush_work(&ctrl->reset_work);
- if (ctrl->state != NVME_CTRL_LIVE &&
- ctrl->state != NVME_CTRL_ADMIN_ONLY)
+ if (ctrl->state != NVME_CTRL_LIVE)
ret = -ENETRESET;
}
@@ -197,14 +214,16 @@ static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
}
-static blk_status_t nvme_error_status(struct request *req)
+static blk_status_t nvme_error_status(u16 status)
{
- switch (nvme_req(req)->status & 0x7ff) {
+ switch (status & 0x7ff) {
case NVME_SC_SUCCESS:
return BLK_STS_OK;
case NVME_SC_CAP_EXCEEDED:
return BLK_STS_NOSPC;
case NVME_SC_LBA_RANGE:
+ case NVME_SC_CMD_INTERRUPTED:
+ case NVME_SC_NS_NOT_READY:
return BLK_STS_TARGET;
case NVME_SC_BAD_ATTRIBUTES:
case NVME_SC_ONCS_NOT_SUPPORTED:
@@ -226,6 +245,8 @@ static blk_status_t nvme_error_status(struct request *req)
return BLK_STS_PROTECTION;
case NVME_SC_RESERVATION_CONFLICT:
return BLK_STS_NEXUS;
+ case NVME_SC_HOST_PATH_ERROR:
+ return BLK_STS_TRANSPORT;
default:
return BLK_STS_IOERR;
}
@@ -260,10 +281,12 @@ static void nvme_retry_req(struct request *req)
void nvme_complete_rq(struct request *req)
{
- blk_status_t status = nvme_error_status(req);
+ blk_status_t status = nvme_error_status(nvme_req(req)->status);
trace_nvme_complete_rq(req);
+ nvme_cleanup_cmd(req);
+
if (nvme_req(req)->ctrl->kas)
nvme_req(req)->ctrl->comp_seen = true;
@@ -279,6 +302,8 @@ void nvme_complete_rq(struct request *req)
return;
}
}
+
+ nvme_trace_bio_complete(req, status);
blk_mq_end_request(req, status);
}
EXPORT_SYMBOL_GPL(nvme_complete_rq);
@@ -288,8 +313,12 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved)
dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
"Cancelling I/O %d", req->tag);
- nvme_req(req)->status = NVME_SC_ABORT_REQ;
- blk_mq_complete_request_sync(req);
+ /* don't abort one completed request */
+ if (blk_mq_request_completed(req))
+ return true;
+
+ nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
+ blk_mq_complete_request(req);
return true;
}
EXPORT_SYMBOL_GPL(nvme_cancel_request);
@@ -305,15 +334,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
old_state = ctrl->state;
switch (new_state) {
- case NVME_CTRL_ADMIN_ONLY:
- switch (old_state) {
- case NVME_CTRL_CONNECTING:
- changed = true;
- /* FALLTHRU */
- default:
- break;
- }
- break;
case NVME_CTRL_LIVE:
switch (old_state) {
case NVME_CTRL_NEW:
@@ -329,7 +349,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
- case NVME_CTRL_ADMIN_ONLY:
changed = true;
/* FALLTHRU */
default:
@@ -349,7 +368,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
case NVME_CTRL_DELETING:
switch (old_state) {
case NVME_CTRL_LIVE:
- case NVME_CTRL_ADMIN_ONLY:
case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING:
changed = true;
@@ -371,8 +389,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
break;
}
- if (changed)
+ if (changed) {
ctrl->state = new_state;
+ wake_up_all(&ctrl->state_wq);
+ }
spin_unlock_irqrestore(&ctrl->lock, flags);
if (changed && ctrl->state == NVME_CTRL_LIVE)
@@ -381,6 +401,39 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
}
EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
+/*
+ * Returns true for sink states that can't ever transition back to live.
+ */
+static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
+{
+ switch (ctrl->state) {
+ case NVME_CTRL_NEW:
+ case NVME_CTRL_LIVE:
+ case NVME_CTRL_RESETTING:
+ case NVME_CTRL_CONNECTING:
+ return false;
+ case NVME_CTRL_DELETING:
+ case NVME_CTRL_DEAD:
+ return true;
+ default:
+ WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
+ return true;
+ }
+}
+
+/*
+ * Waits for the controller state to be resetting, or returns false if it is
+ * not possible to ever transition to that state.
+ */
+bool nvme_wait_reset(struct nvme_ctrl *ctrl)
+{
+ wait_event(ctrl->state_wq,
+ nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
+ nvme_state_terminal(ctrl));
+ return ctrl->state == NVME_CTRL_RESETTING;
+}
+EXPORT_SYMBOL_GPL(nvme_wait_reset);
+
static void nvme_free_ns_head(struct kref *ref)
{
struct nvme_ns_head *head =
@@ -562,8 +615,14 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
struct nvme_dsm_range *range;
struct bio *bio;
- range = kmalloc_array(segments, sizeof(*range),
- GFP_ATOMIC | __GFP_NOWARN);
+ /*
+ * Some devices do not consider the DSM 'Number of Ranges' field when
+ * determining how much data to DMA. Always allocate memory for maximum
+ * number of segments to prevent device reading beyond end of buffer.
+ */
+ static const size_t alloc_size = sizeof(*range) * NVME_DSM_MAX_RANGES;
+
+ range = kzalloc(alloc_size, GFP_ATOMIC | __GFP_NOWARN);
if (!range) {
/*
* If we fail allocation our range, fallback to the controller
@@ -577,7 +636,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
}
__rq_for_each_bio(bio, req) {
- u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
+ u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
if (n < segments) {
@@ -603,7 +662,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
req->special_vec.bv_page = virt_to_page(range);
req->special_vec.bv_offset = offset_in_page(range);
- req->special_vec.bv_len = sizeof(*range) * segments;
+ req->special_vec.bv_len = alloc_size;
req->rq_flags |= RQF_SPECIAL_PAYLOAD;
return BLK_STS_OK;
@@ -618,7 +677,7 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes;
cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id);
cmnd->write_zeroes.slba =
- cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+ cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->write_zeroes.length =
cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
cmnd->write_zeroes.control = 0;
@@ -642,7 +701,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
- cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+ cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
@@ -659,8 +718,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
return BLK_STS_NOTSUPP;
control |= NVME_RW_PRINFO_PRACT;
- } else if (req_op(req) == REQ_OP_WRITE) {
- t10_pi_prepare(req, ns->pi_type);
}
switch (ns->pi_type) {
@@ -683,13 +740,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
void nvme_cleanup_cmd(struct request *req)
{
- if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
- nvme_req(req)->status == 0) {
- struct nvme_ns *ns = req->rq_disk->private_data;
-
- t10_pi_complete(req, ns->pi_type,
- blk_rq_bytes(req) >> ns->lba_shift);
- }
if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
struct nvme_ns *ns = req->rq_disk->private_data;
struct page *page = req->special_vec.bv_page;
@@ -849,7 +899,7 @@ out:
static int nvme_submit_user_cmd(struct request_queue *q,
struct nvme_command *cmd, void __user *ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
- u32 meta_seed, u32 *result, unsigned timeout)
+ u32 meta_seed, u64 *result, unsigned timeout)
{
bool write = nvme_is_write(cmd);
struct nvme_ns *ns = q->queuedata;
@@ -890,7 +940,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
else
ret = nvme_req(req)->status;
if (result)
- *result = le32_to_cpu(nvme_req(req)->result.u32);
+ *result = le64_to_cpu(nvme_req(req)->result.u64);
if (meta && !ret && !write) {
if (copy_to_user(meta_buffer, meta, meta_len))
ret = -EFAULT;
@@ -926,7 +976,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
startka = true;
spin_unlock_irqrestore(&ctrl->lock, flags);
if (startka)
- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
}
static int nvme_keep_alive(struct nvme_ctrl *ctrl)
@@ -956,7 +1006,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
dev_dbg(ctrl->device,
"reschedule traffic based keep-alive timer\n");
ctrl->comp_seen = false;
- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
return;
}
@@ -973,7 +1023,7 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
if (unlikely(ctrl->kato == 0))
return;
- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
}
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
@@ -1088,10 +1138,9 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
NVME_IDENTIFY_DATA_SIZE);
}
-static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
- unsigned nsid)
+static int nvme_identify_ns(struct nvme_ctrl *ctrl,
+ unsigned nsid, struct nvme_id_ns **id)
{
- struct nvme_id_ns *id;
struct nvme_command c = { };
int error;
@@ -1100,18 +1149,17 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
c.identify.nsid = cpu_to_le32(nsid);
c.identify.cns = NVME_ID_CNS_NS;
- id = kmalloc(sizeof(*id), GFP_KERNEL);
- if (!id)
- return NULL;
+ *id = kmalloc(sizeof(**id), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
- error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
+ error = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id));
if (error) {
dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error);
- kfree(id);
- return NULL;
+ kfree(*id);
}
- return id;
+ return error;
}
static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
@@ -1180,7 +1228,8 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
EXPORT_SYMBOL_GPL(nvme_set_queue_count);
#define NVME_AEN_SUPPORTED \
- (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | NVME_AEN_CFG_ANA_CHANGE)
+ (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | \
+ NVME_AEN_CFG_ANA_CHANGE | NVME_AEN_CFG_DISC_CHANGE)
static void nvme_enable_aen(struct nvme_ctrl *ctrl)
{
@@ -1195,6 +1244,8 @@ static void nvme_enable_aen(struct nvme_ctrl *ctrl)
if (status)
dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n",
supported_aens);
+
+ queue_work(nvme_wq, &ctrl->async_event_work);
}
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
@@ -1304,8 +1355,6 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl)
if (ns->disk && nvme_revalidate_disk(ns->disk))
nvme_set_queue_dying(ns);
up_read(&ctrl->namespaces_rwsem);
-
- nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
}
static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
@@ -1321,6 +1370,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
nvme_unfreeze(ctrl);
nvme_mpath_unfreeze(ctrl->subsys);
mutex_unlock(&ctrl->subsys->lock);
+ nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
mutex_unlock(&ctrl->scan_lock);
}
if (effects & NVME_CMD_EFFECTS_CCC)
@@ -1336,6 +1386,54 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct nvme_command c;
unsigned timeout = 0;
u32 effects;
+ u64 result;
+ int status;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
+ return -EFAULT;
+ if (cmd.flags)
+ return -EINVAL;
+
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = cmd.opcode;
+ c.common.flags = cmd.flags;
+ c.common.nsid = cpu_to_le32(cmd.nsid);
+ c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
+ c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
+ c.common.cdw10 = cpu_to_le32(cmd.cdw10);
+ c.common.cdw11 = cpu_to_le32(cmd.cdw11);
+ c.common.cdw12 = cpu_to_le32(cmd.cdw12);
+ c.common.cdw13 = cpu_to_le32(cmd.cdw13);
+ c.common.cdw14 = cpu_to_le32(cmd.cdw14);
+ c.common.cdw15 = cpu_to_le32(cmd.cdw15);
+
+ if (cmd.timeout_ms)
+ timeout = msecs_to_jiffies(cmd.timeout_ms);
+
+ effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
+ status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
+ (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
+ (void __user *)(uintptr_t)cmd.metadata,
+ cmd.metadata_len, 0, &result, timeout);
+ nvme_passthru_end(ctrl, effects);
+
+ if (status >= 0) {
+ if (put_user(result, &ucmd->result))
+ return -EFAULT;
+ }
+
+ return status;
+}
+
+static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+ struct nvme_passthru_cmd64 __user *ucmd)
+{
+ struct nvme_passthru_cmd64 cmd;
+ struct nvme_command c;
+ unsigned timeout = 0;
+ u32 effects;
int status;
if (!capable(CAP_SYS_ADMIN))
@@ -1406,6 +1504,41 @@ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
srcu_read_unlock(&head->srcu, idx);
}
+static bool is_ctrl_ioctl(unsigned int cmd)
+{
+ if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD)
+ return true;
+ if (is_sed_ioctl(cmd))
+ return true;
+ return false;
+}
+
+static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
+ void __user *argp,
+ struct nvme_ns_head *head,
+ int srcu_idx)
+{
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ int ret;
+
+ nvme_get_ctrl(ns->ctrl);
+ nvme_put_ns_from_disk(head, srcu_idx);
+
+ switch (cmd) {
+ case NVME_IOCTL_ADMIN_CMD:
+ ret = nvme_user_cmd(ctrl, NULL, argp);
+ break;
+ case NVME_IOCTL_ADMIN64_CMD:
+ ret = nvme_user_cmd64(ctrl, NULL, argp);
+ break;
+ default:
+ ret = sed_ioctl(ctrl->opal_dev, cmd, argp);
+ break;
+ }
+ nvme_put_ctrl(ctrl);
+ return ret;
+}
+
static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
@@ -1423,20 +1556,8 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
* seperately and drop the ns SRCU reference early. This avoids a
* deadlock when deleting namespaces using the passthrough interface.
*/
- if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) {
- struct nvme_ctrl *ctrl = ns->ctrl;
-
- nvme_get_ctrl(ns->ctrl);
- nvme_put_ns_from_disk(head, srcu_idx);
-
- if (cmd == NVME_IOCTL_ADMIN_CMD)
- ret = nvme_user_cmd(ctrl, NULL, argp);
- else
- ret = sed_ioctl(ctrl->opal_dev, cmd, argp);
-
- nvme_put_ctrl(ctrl);
- return ret;
- }
+ if (is_ctrl_ioctl(cmd))
+ return nvme_handle_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
switch (cmd) {
case NVME_IOCTL_ID:
@@ -1449,6 +1570,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
case NVME_IOCTL_SUBMIT_IO:
ret = nvme_submit_io(ns, argp);
break;
+ case NVME_IOCTL_IO64_CMD:
+ ret = nvme_user_cmd64(ns->ctrl, ns, argp);
+ break;
default:
if (ns->ndev)
ret = nvme_nvm_ioctl(ns, cmd, arg);
@@ -1533,7 +1657,7 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
static void nvme_set_chunk_size(struct nvme_ns *ns)
{
- u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
+ u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob);
blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
}
@@ -1570,8 +1694,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
{
- u32 max_sectors;
- unsigned short bs = 1 << ns->lba_shift;
+ u64 max_blocks;
if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) ||
(ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
@@ -1587,16 +1710,19 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
* nvme_init_identify() if available.
*/
if (ns->ctrl->max_hw_sectors == UINT_MAX)
- max_sectors = ((u32)(USHRT_MAX + 1) * bs) >> 9;
+ max_blocks = (u64)USHRT_MAX + 1;
else
- max_sectors = ((u32)(ns->ctrl->max_hw_sectors + 1) * bs) >> 9;
+ max_blocks = ns->ctrl->max_hw_sectors + 1;
- blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors);
+ blk_queue_max_write_zeroes_sectors(disk->queue,
+ nvme_lba_to_sect(ns, max_blocks));
}
-static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
+static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
struct nvme_id_ns *id, struct nvme_ns_ids *ids)
{
+ int ret = 0;
+
memset(ids, 0, sizeof(*ids));
if (ctrl->vs >= NVME_VS(1, 1, 0))
@@ -1607,10 +1733,14 @@ static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
/* Don't treat error as fatal we potentially
* already have a NGUID or EUI-64
*/
- if (nvme_identify_ns_descs(ctrl, nsid, ids))
+ ret = nvme_identify_ns_descs(ctrl, nsid, ids);
+ if (ret)
dev_warn(ctrl->device,
- "%s: Identify Descriptors failed\n", __func__);
+ "Identify Descriptors failed (%d)\n", ret);
+ if (ret > 0)
+ ret = 0;
}
+ return ret;
}
static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids)
@@ -1630,7 +1760,7 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
static void nvme_update_disk_info(struct gendisk *disk,
struct nvme_ns *ns, struct nvme_id_ns *id)
{
- sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9);
+ sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze));
unsigned short bs = 1 << ns->lba_shift;
u32 atomic_bs, phys_bs, io_opt;
@@ -1738,25 +1868,37 @@ static int nvme_revalidate_disk(struct gendisk *disk)
return -ENODEV;
}
- id = nvme_identify_ns(ctrl, ns->head->ns_id);
- if (!id)
- return -ENODEV;
+ ret = nvme_identify_ns(ctrl, ns->head->ns_id, &id);
+ if (ret)
+ goto out;
if (id->ncap == 0) {
ret = -ENODEV;
- goto out;
+ goto free_id;
}
__nvme_revalidate_disk(disk, id);
- nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids);
+ ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids);
+ if (ret)
+ goto free_id;
+
if (!nvme_ns_ids_equal(&ns->head->ids, &ids)) {
dev_err(ctrl->device,
"identifiers changed for nsid %d\n", ns->head->ns_id);
ret = -ENODEV;
}
-out:
+free_id:
kfree(id);
+out:
+ /*
+ * Only fail the function if we got a fatal error back from the
+ * device, otherwise ignore the error and just move on.
+ */
+ if (ret == -ENOMEM || (ret > 0 && !(ret & NVME_SC_DNR)))
+ ret = 0;
+ else if (ret > 0)
+ ret = blk_status_to_errno(nvme_error_status(ret));
return ret;
}
@@ -1952,7 +2094,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
* bits', but doing so may cause the device to complete commands to the
* admin queue ... and we don't know what memory that might be pointing at!
*/
-int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
{
int ret;
@@ -1966,20 +2108,27 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
msleep(NVME_QUIRK_DELAY_AMOUNT);
- return nvme_wait_ready(ctrl, cap, false);
+ return nvme_wait_ready(ctrl, ctrl->cap, false);
}
EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
-int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
+int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
{
/*
* Default to a 4K page size, with the intention to update this
* path in the future to accomodate architectures with differing
* kernel and IO page sizes.
*/
- unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12;
+ unsigned dev_page_min, page_shift = 12;
int ret;
+ ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
+ if (ret) {
+ dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret);
+ return ret;
+ }
+ dev_page_min = NVME_CAP_MPSMIN(ctrl->cap) + 12;
+
if (page_shift < dev_page_min) {
dev_err(ctrl->device,
"Minimum device page size %u too large for host (%u)\n",
@@ -1998,7 +2147,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
if (ret)
return ret;
- return nvme_wait_ready(ctrl, cap, true);
+ return nvme_wait_ready(ctrl, ctrl->cap, true);
}
EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
@@ -2257,6 +2406,16 @@ static const struct nvme_core_quirk_entry core_quirks[] = {
.vid = 0x1179,
.mn = "THNSF5256GPUK TOSHIBA",
.quirks = NVME_QUIRK_NO_APST,
+ },
+ {
+ /*
+ * This LiteON CL1-3D*-Q11 firmware version has a race
+ * condition associated with actions related to suspend to idle
+ * LiteON has resolved the problem in future firmware
+ */
+ .vid = 0x14a4,
+ .fr = "22301111",
+ .quirks = NVME_QUIRK_SIMPLE_SUSPEND,
}
};
@@ -2322,7 +2481,8 @@ static void nvme_release_subsystem(struct device *dev)
struct nvme_subsystem *subsys =
container_of(dev, struct nvme_subsystem, dev);
- ida_simple_remove(&nvme_subsystems_ida, subsys->instance);
+ if (subsys->instance >= 0)
+ ida_simple_remove(&nvme_instance_ida, subsys->instance);
kfree(subsys);
}
@@ -2351,6 +2511,17 @@ static struct nvme_subsystem *__nvme_find_get_subsystem(const char *subsysnqn)
lockdep_assert_held(&nvme_subsystems_lock);
+ /*
+ * Fail matches for discovery subsystems. This results
+ * in each discovery controller bound to a unique subsystem.
+ * This avoids issues with validating controller values
+ * that can only be true when there is a single unique subsystem.
+ * There may be multiple and completely independent entities
+ * that provide discovery controllers.
+ */
+ if (!strcmp(subsysnqn, NVME_DISC_SUBSYS_NAME))
+ return NULL;
+
list_for_each_entry(subsys, &nvme_subsystems, entry) {
if (strcmp(subsys->subnqn, subsysnqn))
continue;
@@ -2451,12 +2622,8 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
if (!subsys)
return -ENOMEM;
- ret = ida_simple_get(&nvme_subsystems_ida, 0, 0, GFP_KERNEL);
- if (ret < 0) {
- kfree(subsys);
- return ret;
- }
- subsys->instance = ret;
+
+ subsys->instance = -1;
mutex_init(&subsys->lock);
kref_init(&subsys->ref);
INIT_LIST_HEAD(&subsys->ctrls);
@@ -2475,7 +2642,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
subsys->dev.class = nvme_subsys_class;
subsys->dev.release = nvme_release_subsystem;
subsys->dev.groups = nvme_subsys_attrs_groups;
- dev_set_name(&subsys->dev, "nvme-subsys%d", subsys->instance);
+ dev_set_name(&subsys->dev, "nvme-subsys%d", ctrl->instance);
device_initialize(&subsys->dev);
mutex_lock(&nvme_subsystems_lock);
@@ -2500,13 +2667,16 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
list_add_tail(&subsys->entry, &nvme_subsystems);
}
- if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
- dev_name(ctrl->device))) {
+ ret = sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
+ dev_name(ctrl->device));
+ if (ret) {
dev_err(ctrl->device,
"failed to create sysfs link from subsystem.\n");
goto out_put_subsystem;
}
+ if (!found)
+ subsys->instance = ctrl->instance;
ctrl->subsys = subsys;
list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
mutex_unlock(&nvme_subsystems_lock);
@@ -2564,7 +2734,6 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
int nvme_init_identify(struct nvme_ctrl *ctrl)
{
struct nvme_id_ctrl *id;
- u64 cap;
int ret, page_shift;
u32 max_hw_sectors;
bool prev_apst_enabled;
@@ -2574,16 +2743,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
dev_err(ctrl->device, "Reading VS failed (%d)\n", ret);
return ret;
}
-
- ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
- if (ret) {
- dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret);
- return ret;
- }
- page_shift = NVME_CAP_MPSMIN(cap) + 12;
+ page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
+ ctrl->sqsize = min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize);
if (ctrl->vs >= NVME_VS(1, 1, 0))
- ctrl->subsystem = NVME_CAP_NSSRC(cap);
+ ctrl->subsystem = NVME_CAP_NSSRC(ctrl->cap);
ret = nvme_identify_ctrl(ctrl, &id);
if (ret) {
@@ -2597,6 +2761,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
goto out_free;
}
+ if (!(ctrl->ops->flags & NVME_F_FABRICS))
+ ctrl->cntlid = le16_to_cpu(id->cntlid);
+
if (!ctrl->identified) {
int i;
@@ -2631,6 +2798,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->oncs = le16_to_cpu(id->oncs);
ctrl->mtfa = le16_to_cpu(id->mtfa);
ctrl->oaes = le32_to_cpu(id->oaes);
+ ctrl->wctemp = le16_to_cpu(id->wctemp);
+ ctrl->cctemp = le16_to_cpu(id->cctemp);
+
atomic_set(&ctrl->abort_limit, id->acl + 1);
ctrl->vwc = id->vwc;
if (id->mdts)
@@ -2686,6 +2856,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
* admin connect
*/
if (ctrl->cntlid != le16_to_cpu(id->cntlid)) {
+ dev_err(ctrl->device,
+ "Mismatching cntlid: Connect %u vs Identify "
+ "%u, rejecting\n",
+ ctrl->cntlid, le16_to_cpu(id->cntlid));
ret = -EINVAL;
goto out_free;
}
@@ -2697,7 +2871,6 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
goto out_free;
}
} else {
- ctrl->cntlid = le16_to_cpu(id->cntlid);
ctrl->hmpre = le32_to_cpu(id->hmpre);
ctrl->hmmin = le32_to_cpu(id->hmmin);
ctrl->hmminds = le32_to_cpu(id->hmminds);
@@ -2731,6 +2904,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
if (ret < 0)
return ret;
+ if (!ctrl->identified)
+ nvme_hwmon_init(ctrl);
+
ctrl->identified = true;
return 0;
@@ -2748,7 +2924,6 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
switch (ctrl->state) {
case NVME_CTRL_LIVE:
- case NVME_CTRL_ADMIN_ONLY:
break;
default:
return -EWOULDBLOCK;
@@ -2800,6 +2975,8 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
return nvme_user_cmd(ctrl, NULL, argp);
+ case NVME_IOCTL_ADMIN64_CMD:
+ return nvme_user_cmd64(ctrl, NULL, argp);
case NVME_IOCTL_IO_CMD:
return nvme_dev_user_cmd(ctrl, argp);
case NVME_IOCTL_RESET:
@@ -2819,7 +2996,7 @@ static const struct file_operations nvme_dev_fops = {
.owner = THIS_MODULE,
.open = nvme_dev_open,
.unlocked_ioctl = nvme_dev_ioctl,
- .compat_ioctl = nvme_dev_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static ssize_t nvme_sysfs_reset(struct device *dev,
@@ -3007,6 +3184,8 @@ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
nvme_show_int_function(cntlid);
nvme_show_int_function(numa_node);
+nvme_show_int_function(queue_count);
+nvme_show_int_function(sqsize);
static ssize_t nvme_sysfs_delete(struct device *dev,
struct device_attribute *attr, const char *buf,
@@ -3038,7 +3217,6 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
static const char *const state_name[] = {
[NVME_CTRL_NEW] = "new",
[NVME_CTRL_LIVE] = "live",
- [NVME_CTRL_ADMIN_ONLY] = "only-admin",
[NVME_CTRL_RESETTING] = "resetting",
[NVME_CTRL_CONNECTING] = "connecting",
[NVME_CTRL_DELETING] = "deleting",
@@ -3087,6 +3265,8 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_address.attr,
&dev_attr_state.attr,
&dev_attr_numa_node.attr,
+ &dev_attr_queue_count.attr,
+ &dev_attr_sqsize.attr,
NULL
};
@@ -3172,7 +3352,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
head->ns_id = nsid;
kref_init(&head->ref);
- nvme_report_ns_ids(ctrl, nsid, id, &head->ids);
+ ret = nvme_report_ns_ids(ctrl, nsid, id, &head->ids);
+ if (ret)
+ goto out_cleanup_srcu;
ret = __nvme_check_ids(ctrl->subsys, head);
if (ret) {
@@ -3197,6 +3379,8 @@ out_ida_remove:
out_free_head:
kfree(head);
out:
+ if (ret > 0)
+ ret = blk_status_to_errno(nvme_error_status(ret));
return ERR_PTR(ret);
}
@@ -3220,7 +3404,10 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
} else {
struct nvme_ns_ids ids;
- nvme_report_ns_ids(ctrl, nsid, id, &ids);
+ ret = nvme_report_ns_ids(ctrl, nsid, id, &ids);
+ if (ret)
+ goto out_unlock;
+
if (!nvme_ns_ids_equal(&head->ids, &ids)) {
dev_err(ctrl->device,
"IDs don't match for shared namespace %d\n",
@@ -3235,6 +3422,8 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
out_unlock:
mutex_unlock(&ctrl->subsys->lock);
+ if (ret > 0)
+ ret = blk_status_to_errno(nvme_error_status(ret));
return ret;
}
@@ -3326,11 +3515,9 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
nvme_set_queue_limits(ctrl, ns->queue);
- id = nvme_identify_ns(ctrl, nsid);
- if (!id) {
- ret = -EIO;
+ ret = nvme_identify_ns(ctrl, nsid, &id);
+ if (ret)
goto out_free_queue;
- }
if (id->ncap == 0) {
ret = -EINVAL;
@@ -3392,6 +3579,8 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
blk_cleanup_queue(ns->queue);
out_free_ns:
kfree(ns);
+ if (ret > 0)
+ ret = blk_status_to_errno(nvme_error_status(ret));
return ret;
}
@@ -3538,11 +3727,10 @@ static void nvme_scan_work(struct work_struct *work)
struct nvme_id_ctrl *id;
unsigned nn;
- if (ctrl->state != NVME_CTRL_LIVE)
+ /* No tagset on a live ctrl means IO queues could not created */
+ if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
return;
- WARN_ON_ONCE(!ctrl->tagset);
-
if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
dev_info(ctrl->device, "rescanning namespaces.\n");
nvme_clear_changed_ns_log(ctrl);
@@ -3605,6 +3793,33 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
+static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ struct nvme_ctrl *ctrl =
+ container_of(dev, struct nvme_ctrl, ctrl_device);
+ struct nvmf_ctrl_options *opts = ctrl->opts;
+ int ret;
+
+ ret = add_uevent_var(env, "NVME_TRTYPE=%s", ctrl->ops->name);
+ if (ret)
+ return ret;
+
+ if (opts) {
+ ret = add_uevent_var(env, "NVME_TRADDR=%s", opts->traddr);
+ if (ret)
+ return ret;
+
+ ret = add_uevent_var(env, "NVME_TRSVCID=%s",
+ opts->trsvcid ?: "none");
+ if (ret)
+ return ret;
+
+ ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s",
+ opts->host_traddr ?: "none");
+ }
+ return ret;
+}
+
static void nvme_aen_uevent(struct nvme_ctrl *ctrl)
{
char *envp[2] = { NULL, NULL };
@@ -3652,7 +3867,7 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
if (!log)
return;
- if (nvme_get_log(ctrl, NVME_NSID_ALL, 0, NVME_LOG_FW_SLOT, log,
+ if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, log,
sizeof(*log), 0))
dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
kfree(log);
@@ -3676,13 +3891,13 @@ static void nvme_fw_act_work(struct work_struct *work)
if (time_after(jiffies, fw_act_timeout)) {
dev_warn(ctrl->device,
"Fw activation timeout, reset controller\n");
- nvme_reset_ctrl(ctrl);
- break;
+ nvme_try_sched_reset(ctrl);
+ return;
}
msleep(100);
}
- if (ctrl->state != NVME_CTRL_LIVE)
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
return;
nvme_start_queues(ctrl);
@@ -3702,7 +3917,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
nvme_queue_scan(ctrl);
break;
case NVME_AER_NOTICE_FW_ACT_STARTING:
- queue_work(nvme_wq, &ctrl->fw_act_work);
+ /*
+ * We are (ab)using the RESETTING state to prevent subsequent
+ * recovery actions from interfering with the controller's
+ * firmware activation.
+ */
+ if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+ queue_work(nvme_wq, &ctrl->fw_act_work);
break;
#ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA:
@@ -3711,6 +3932,9 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
queue_work(nvme_wq, &ctrl->ana_work);
break;
#endif
+ case NVME_AER_NOTICE_DISC_CHANGED:
+ ctrl->aen_result = result;
+ break;
default:
dev_warn(ctrl->device, "async event result %08x\n", result);
}
@@ -3757,10 +3981,10 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
if (ctrl->kato)
nvme_start_keep_alive(ctrl);
+ nvme_enable_aen(ctrl);
+
if (ctrl->queue_count > 1) {
nvme_queue_scan(ctrl);
- nvme_enable_aen(ctrl);
- queue_work(nvme_wq, &ctrl->async_event_work);
nvme_start_queues(ctrl);
}
}
@@ -3780,7 +4004,9 @@ static void nvme_free_ctrl(struct device *dev)
container_of(dev, struct nvme_ctrl, ctrl_device);
struct nvme_subsystem *subsys = ctrl->subsys;
- ida_simple_remove(&nvme_instance_ida, ctrl->instance);
+ if (subsys && ctrl->instance != subsys->instance)
+ ida_simple_remove(&nvme_instance_ida, ctrl->instance);
+
kfree(ctrl->effects);
nvme_mpath_uninit(ctrl);
__free_page(ctrl->discard_page);
@@ -3820,6 +4046,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
+ init_waitqueue_head(&ctrl->state_wq);
INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
@@ -3980,6 +4207,9 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl)
list_for_each_entry(ns, &ctrl->namespaces, list)
blk_sync_queue(ns->queue);
up_read(&ctrl->namespaces_rwsem);
+
+ if (ctrl->admin_q)
+ blk_sync_queue(ctrl->admin_q);
}
EXPORT_SYMBOL_GPL(nvme_sync_queues);
@@ -4038,6 +4268,7 @@ static int __init nvme_core_init(void)
result = PTR_ERR(nvme_class);
goto unregister_chrdev;
}
+ nvme_class->dev_uevent = nvme_class_uevent;
nvme_subsys_class = class_create(THIS_MODULE, "nvme-subsystem");
if (IS_ERR(nvme_subsys_class)) {
@@ -4062,7 +4293,6 @@ out:
static void __exit nvme_core_exit(void)
{
- ida_destroy(&nvme_subsystems_ida);
class_destroy(nvme_subsys_class);
class_destroy(nvme_class);
unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 1994d5b42f94..74b8818ac9a1 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -150,7 +150,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
cmd.prop_get.fctype = nvme_fabrics_type_property_get;
cmd.prop_get.offset = cpu_to_le32(off);
- ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, NULL, 0, 0,
+ ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, 0,
NVME_QID_ANY, 0, 0, false);
if (ret >= 0)
@@ -197,7 +197,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
cmd.prop_get.attrib = 1;
cmd.prop_get.offset = cpu_to_le32(off);
- ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, NULL, 0, 0,
+ ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, 0,
NVME_QID_ANY, 0, 0, false);
if (ret >= 0)
@@ -243,7 +243,7 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
cmd.prop_set.offset = cpu_to_le32(off);
cmd.prop_set.value = cpu_to_le64(val);
- ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, NULL, 0, 0,
+ ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0, 0,
NVME_QID_ANY, 0, 0, false);
if (unlikely(ret))
dev_err(ctrl->device,
@@ -381,8 +381,8 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
* Set keep-alive timeout in seconds granularity (ms * 1000)
* and add a grace period for controller kato enforcement
*/
- cmd.connect.kato = ctrl->opts->discovery_nqn ? 0 :
- cpu_to_le32((ctrl->kato + NVME_KATO_GRACE) * 1000);
+ cmd.connect.kato = ctrl->kato ?
+ cpu_to_le32((ctrl->kato + NVME_KATO_GRACE) * 1000) : 0;
if (ctrl->opts->disable_sqflow)
cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW;
@@ -396,7 +396,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
- ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res,
+ ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res,
data, sizeof(*data), 0, NVME_QID_ANY, 1,
BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT, false);
if (ret) {
@@ -611,6 +611,7 @@ static const match_table_t opt_tokens = {
{ NVMF_OPT_DATA_DIGEST, "data_digest" },
{ NVMF_OPT_NR_WRITE_QUEUES, "nr_write_queues=%d" },
{ NVMF_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" },
+ { NVMF_OPT_TOS, "tos=%d" },
{ NVMF_OPT_ERR, NULL }
};
@@ -632,6 +633,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->duplicate_connect = false;
opts->hdr_digest = false;
opts->data_digest = false;
+ opts->tos = -1; /* < 0 == use transport default */
options = o = kstrdup(buf, GFP_KERNEL);
if (!options)
@@ -738,13 +740,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n");
}
opts->kato = token;
-
- if (opts->discovery_nqn && opts->kato) {
- pr_err("Discovery controllers cannot accept KATO != 0\n");
- ret = -EINVAL;
- goto out;
- }
-
break;
case NVMF_OPT_CTRL_LOSS_TMO:
if (match_int(args, &token)) {
@@ -856,6 +851,22 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
}
opts->nr_poll_queues = token;
break;
+ case NVMF_OPT_TOS:
+ if (match_int(args, &token)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (token < 0) {
+ pr_err("Invalid type of service %d\n", token);
+ ret = -EINVAL;
+ goto out;
+ }
+ if (token > 255) {
+ pr_warn("Clamping type of service to 255\n");
+ token = 255;
+ }
+ opts->tos = token;
+ break;
default:
pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
p);
@@ -865,7 +876,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
}
if (opts->discovery_nqn) {
- opts->kato = 0;
opts->nr_io_queues = 0;
opts->nr_write_queues = 0;
opts->nr_poll_queues = 0;
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 3044d8b99a24..a0ec40ab62ee 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -55,6 +55,7 @@ enum {
NVMF_OPT_DATA_DIGEST = 1 << 16,
NVMF_OPT_NR_WRITE_QUEUES = 1 << 17,
NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
+ NVMF_OPT_TOS = 1 << 19,
};
/**
@@ -87,6 +88,7 @@ enum {
* @data_digest: generate/verify data digest (TCP)
* @nr_write_queues: number of queues for write I/O
* @nr_poll_queues: number of queues for polling I/O
+ * @tos: type of service
*/
struct nvmf_ctrl_options {
unsigned mask;
@@ -108,6 +110,7 @@ struct nvmf_ctrl_options {
bool data_digest;
unsigned int nr_write_queues;
unsigned int nr_poll_queues;
+ int tos;
};
/*
@@ -179,8 +182,7 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live)
{
- if (likely(ctrl->state == NVME_CTRL_LIVE ||
- ctrl->state == NVME_CTRL_ADMIN_ONLY))
+ if (likely(ctrl->state == NVME_CTRL_LIVE))
return true;
return __nvmf_check_ready(ctrl, rq, queue_live);
}
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 232d8094091b..5a70ac395d53 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -95,7 +95,7 @@ struct nvme_fc_fcp_op {
struct nvme_fcp_op_w_sgl {
struct nvme_fc_fcp_op op;
- struct scatterlist sgl[SG_CHUNK_SIZE];
+ struct scatterlist sgl[NVME_INLINE_SG_CNT];
uint8_t priv[0];
};
@@ -342,7 +342,8 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
!template->ls_req || !template->fcp_io ||
!template->ls_abort || !template->fcp_abort ||
!template->max_hw_queues || !template->max_sgl_segments ||
- !template->max_dif_sgl_segments || !template->dma_boundary) {
+ !template->max_dif_sgl_segments || !template->dma_boundary ||
+ !template->module) {
ret = -EINVAL;
goto out_reghost_failed;
}
@@ -1224,7 +1225,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
lsreq->rqstlen = sizeof(*assoc_rqst);
lsreq->rspaddr = assoc_acc;
lsreq->rsplen = sizeof(*assoc_acc);
- lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
+ lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
if (ret)
@@ -1264,7 +1265,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
if (fcret) {
ret = -EBADF;
dev_err(ctrl->dev,
- "q %d connect failed: %s\n",
+ "q %d Create Association LS failed: %s\n",
queue->qnum, validation_errors[fcret]);
} else {
ctrl->association_id =
@@ -1332,7 +1333,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
lsreq->rqstlen = sizeof(*conn_rqst);
lsreq->rspaddr = conn_acc;
lsreq->rsplen = sizeof(*conn_acc);
- lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
+ lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
if (ret)
@@ -1363,7 +1364,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
if (fcret) {
ret = -EBADF;
dev_err(ctrl->dev,
- "q %d connect failed: %s\n",
+ "q %d Create I/O Connection LS failed: %s\n",
queue->qnum, validation_errors[fcret]);
} else {
queue->connection_id =
@@ -1376,7 +1377,7 @@ out_free_buffer:
out_no_memory:
if (ret)
dev_err(ctrl->dev,
- "queue %d connect command failed (%d).\n",
+ "queue %d connect I/O queue failed (%d).\n",
queue->qnum, ret);
return ret;
}
@@ -1413,8 +1414,8 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status)
static void
nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
{
- struct fcnvme_ls_disconnect_rqst *discon_rqst;
- struct fcnvme_ls_disconnect_acc *discon_acc;
+ struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst;
+ struct fcnvme_ls_disconnect_assoc_acc *discon_acc;
struct nvmefc_ls_req_op *lsop;
struct nvmefc_ls_req *lsreq;
int ret;
@@ -1430,11 +1431,11 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
lsreq = &lsop->ls_req;
lsreq->private = (void *)&lsop[1];
- discon_rqst = (struct fcnvme_ls_disconnect_rqst *)
+ discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)
(lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
- discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1];
+ discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1];
- discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT;
+ discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC;
discon_rqst->desc_list_len = cpu_to_be32(
sizeof(struct fcnvme_lsdesc_assoc_id) +
sizeof(struct fcnvme_lsdesc_disconn_cmd));
@@ -1451,22 +1452,17 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
discon_rqst->discon_cmd.desc_len =
fcnvme_lsdesc_len(
sizeof(struct fcnvme_lsdesc_disconn_cmd));
- discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION;
- discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id);
lsreq->rqstaddr = discon_rqst;
lsreq->rqstlen = sizeof(*discon_rqst);
lsreq->rspaddr = discon_acc;
lsreq->rsplen = sizeof(*discon_acc);
- lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
+ lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop,
nvme_fc_disconnect_assoc_done);
if (ret)
kfree(lsop);
-
- /* only meaningful part to terminating the association */
- ctrl->association_id = 0;
}
@@ -1608,9 +1604,13 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
sizeof(op->rsp_iu), DMA_FROM_DEVICE);
if (opstate == FCPOP_STATE_ABORTED)
- status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
- else if (freq->status)
- status = cpu_to_le16(NVME_SC_INTERNAL << 1);
+ status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
+ else if (freq->status) {
+ status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
+ dev_info(ctrl->ctrl.device,
+ "NVME-FC{%d}: io failed due to lldd error %d\n",
+ ctrl->cnum, freq->status);
+ }
/*
* For the linux implementation, if we have an unsuccesful
@@ -1637,8 +1637,13 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
* no payload in the CQE by the transport.
*/
if (freq->transferred_length !=
- be32_to_cpu(op->cmd_iu.data_len)) {
- status = cpu_to_le16(NVME_SC_INTERNAL << 1);
+ be32_to_cpu(op->cmd_iu.data_len)) {
+ status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
+ dev_info(ctrl->ctrl.device,
+ "NVME-FC{%d}: io failed due to bad transfer "
+ "length: %d vs expected %d\n",
+ ctrl->cnum, freq->transferred_length,
+ be32_to_cpu(op->cmd_iu.data_len));
goto done;
}
result.u64 = 0;
@@ -1653,9 +1658,19 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
(freq->rcv_rsplen / 4) ||
be32_to_cpu(op->rsp_iu.xfrd_len) !=
freq->transferred_length ||
- op->rsp_iu.status_code ||
+ op->rsp_iu.ersp_result ||
sqe->common.command_id != cqe->command_id)) {
- status = cpu_to_le16(NVME_SC_INTERNAL << 1);
+ status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
+ dev_info(ctrl->ctrl.device,
+ "NVME-FC{%d}: io failed due to bad NVMe_ERSP: "
+ "iu len %d, xfr len %d vs %d, status code "
+ "%d, cmdid %d vs %d\n",
+ ctrl->cnum, be16_to_cpu(op->rsp_iu.iu_len),
+ be32_to_cpu(op->rsp_iu.xfrd_len),
+ freq->transferred_length,
+ op->rsp_iu.ersp_result,
+ sqe->common.command_id,
+ cqe->command_id);
goto done;
}
result = cqe->result;
@@ -1663,7 +1678,11 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
break;
default:
- status = cpu_to_le16(NVME_SC_INTERNAL << 1);
+ status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
+ dev_info(ctrl->ctrl.device,
+ "NVME-FC{%d}: io failed due to odd NVMe_xRSP iu "
+ "len %d\n",
+ ctrl->cnum, freq->rcv_rsplen);
goto done;
}
@@ -1708,9 +1727,14 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
op->rq = rq;
op->rqno = rqno;
- cmdiu->scsi_id = NVME_CMD_SCSI_ID;
+ cmdiu->format_id = NVME_CMD_FORMAT_ID;
cmdiu->fc_id = NVME_CMD_FC_ID;
cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32));
+ if (queue->qnum)
+ cmdiu->rsv_cat = fccmnd_set_cat_css(0,
+ (NVME_CC_CSS_NVM >> NVME_CC_CSS_SHIFT));
+ else
+ cmdiu->rsv_cat = fccmnd_set_cat_admin(0);
op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev,
&op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE);
@@ -1992,6 +2016,7 @@ nvme_fc_ctrl_free(struct kref *ref)
{
struct nvme_fc_ctrl *ctrl =
container_of(ref, struct nvme_fc_ctrl, ref);
+ struct nvme_fc_lport *lport = ctrl->lport;
unsigned long flags;
if (ctrl->ctrl.tagset) {
@@ -2006,6 +2031,7 @@ nvme_fc_ctrl_free(struct kref *ref)
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
blk_cleanup_queue(ctrl->ctrl.admin_q);
+ blk_cleanup_queue(ctrl->ctrl.fabrics_q);
blk_mq_free_tag_set(&ctrl->admin_tag_set);
kfree(ctrl->queues);
@@ -2017,6 +2043,7 @@ nvme_fc_ctrl_free(struct kref *ref)
if (ctrl->ctrl.opts)
nvmf_free_options(ctrl->ctrl.opts);
kfree(ctrl);
+ module_put(lport->ops->module);
}
static void
@@ -2107,7 +2134,6 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
struct nvme_fc_fcp_op *op)
{
struct nvmefc_fcp_req *freq = &op->fcp_req;
- enum dma_data_direction dir;
int ret;
freq->sg_cnt = 0;
@@ -2118,17 +2144,16 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
freq->sg_table.sgl = freq->first_sgl;
ret = sg_alloc_table_chained(&freq->sg_table,
blk_rq_nr_phys_segments(rq), freq->sg_table.sgl,
- SG_CHUNK_SIZE);
+ NVME_INLINE_SG_CNT);
if (ret)
return -ENOMEM;
op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl);
WARN_ON(op->nents > blk_rq_nr_phys_segments(rq));
- dir = (rq_data_dir(rq) == WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl,
- op->nents, dir);
+ op->nents, rq_dma_dir(rq));
if (unlikely(freq->sg_cnt <= 0)) {
- sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&freq->sg_table, NVME_INLINE_SG_CNT);
freq->sg_cnt = 0;
return -EFAULT;
}
@@ -2149,12 +2174,9 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
return;
fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents,
- ((rq_data_dir(rq) == WRITE) ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE));
-
- nvme_cleanup_cmd(rq);
+ rq_dma_dir(rq));
- sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&freq->sg_table, NVME_INLINE_SG_CNT);
freq->sg_cnt = 0;
}
@@ -2284,6 +2306,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
if (!(op->flags & FCOP_FLAGS_AEN))
nvme_fc_unmap_data(ctrl, op->rq, op);
+ nvme_cleanup_cmd(op->rq);
nvme_fc_ctrl_put(ctrl);
if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE &&
@@ -2633,8 +2656,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
if (ret)
goto out_delete_hw_queue;
- blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
-
ret = nvmf_connect_admin_queue(&ctrl->ctrl);
if (ret)
goto out_disconnect_admin_queue;
@@ -2648,23 +2669,15 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
* prior connection values
*/
- ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
- if (ret) {
- dev_err(ctrl->ctrl.device,
- "prop_get NVME_REG_CAP failed\n");
- goto out_disconnect_admin_queue;
- }
-
- ctrl->ctrl.sqsize =
- min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
-
- ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
+ ret = nvme_enable_ctrl(&ctrl->ctrl);
if (ret)
goto out_disconnect_admin_queue;
ctrl->ctrl.max_hw_sectors =
(ctrl->lport->ops->max_sgl_segments - 1) << (PAGE_SHIFT - 9);
+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+
ret = nvme_init_identify(&ctrl->ctrl);
if (ret)
goto out_disconnect_admin_queue;
@@ -2684,7 +2697,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
/* warn if maxcmd is lower than queue_size */
dev_warn(ctrl->ctrl.device,
"queue_size %zu > ctrl maxcmd %u, reducing "
- "to queue_size\n",
+ "to maxcmd\n",
opts->queue_size, ctrl->ctrl.maxcmd);
opts->queue_size = ctrl->ctrl.maxcmd;
}
@@ -2692,7 +2705,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
/* warn if sqsize is lower than queue_size */
dev_warn(ctrl->ctrl.device,
- "queue_size %zu > ctrl sqsize %u, clamping down\n",
+ "queue_size %zu > ctrl sqsize %u, reducing "
+ "to sqsize\n",
opts->queue_size, ctrl->ctrl.sqsize + 1);
opts->queue_size = ctrl->ctrl.sqsize + 1;
}
@@ -2728,6 +2742,7 @@ out_term_aen_ops:
out_disconnect_admin_queue:
/* send a Disconnect(association) LS to fc-nvme target */
nvme_fc_xmt_disconnect_assoc(ctrl);
+ ctrl->association_id = 0;
out_delete_hw_queue:
__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
out_free_queue:
@@ -2774,6 +2789,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
nvme_stop_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
+ blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
}
/*
@@ -2796,6 +2812,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
+ blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
/* kill the aens as they are a separate path */
nvme_fc_abort_aen_ops(ctrl);
@@ -2817,6 +2834,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
if (ctrl->association_id)
nvme_fc_xmt_disconnect_assoc(ctrl);
+ ctrl->association_id = 0;
+
if (ctrl->ctrl.tagset) {
nvme_fc_delete_hw_io_queues(ctrl);
nvme_fc_free_io_queues(ctrl);
@@ -2894,10 +2913,22 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
static void
__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
{
- nvme_stop_keep_alive(&ctrl->ctrl);
+ /*
+ * if state is connecting - the error occurred as part of a
+ * reconnect attempt. The create_association error paths will
+ * clean up any outstanding io.
+ *
+ * if it's a different state - ensure all pending io is
+ * terminated. Given this can delay while waiting for the
+ * aborted io to return, we recheck adapter state below
+ * before changing state.
+ */
+ if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
+ nvme_stop_keep_alive(&ctrl->ctrl);
- /* will block will waiting for io to terminate */
- nvme_fc_delete_association(ctrl);
+ /* will block will waiting for io to terminate */
+ nvme_fc_delete_association(ctrl);
+ }
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
@@ -3043,10 +3074,15 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
goto out_fail;
}
+ if (!try_module_get(lport->ops->module)) {
+ ret = -EUNATCH;
+ goto out_free_ctrl;
+ }
+
idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL);
if (idx < 0) {
ret = -ENOSPC;
- goto out_free_ctrl;
+ goto out_mod_put;
}
ctrl->ctrl.opts = opts;
@@ -3109,10 +3145,16 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
goto out_free_queues;
ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
+ ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set);
+ if (IS_ERR(ctrl->ctrl.fabrics_q)) {
+ ret = PTR_ERR(ctrl->ctrl.fabrics_q);
+ goto out_free_admin_tag_set;
+ }
+
ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
if (IS_ERR(ctrl->ctrl.admin_q)) {
ret = PTR_ERR(ctrl->ctrl.admin_q);
- goto out_free_admin_tag_set;
+ goto out_cleanup_fabrics_q;
}
/*
@@ -3184,6 +3226,8 @@ fail_ctrl:
out_cleanup_admin_q:
blk_cleanup_queue(ctrl->ctrl.admin_q);
+out_cleanup_fabrics_q:
+ blk_cleanup_queue(ctrl->ctrl.fabrics_q);
out_free_admin_tag_set:
blk_mq_free_tag_set(&ctrl->admin_tag_set);
out_free_queues:
@@ -3191,6 +3235,8 @@ out_free_queues:
out_free_ida:
put_device(ctrl->dev);
ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
+out_mod_put:
+ module_put(lport->ops->module);
out_free_ctrl:
kfree(ctrl);
out_fail:
diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c
new file mode 100644
index 000000000000..2e6477ed420f
--- /dev/null
+++ b/drivers/nvme/host/hwmon.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVM Express hardware monitoring support
+ * Copyright (c) 2019, Guenter Roeck
+ */
+
+#include <linux/hwmon.h>
+#include <linux/units.h>
+#include <asm/unaligned.h>
+
+#include "nvme.h"
+
+struct nvme_hwmon_data {
+ struct nvme_ctrl *ctrl;
+ struct nvme_smart_log log;
+ struct mutex read_lock;
+};
+
+static int nvme_get_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under,
+ long *temp)
+{
+ unsigned int threshold = sensor << NVME_TEMP_THRESH_SELECT_SHIFT;
+ u32 status;
+ int ret;
+
+ if (under)
+ threshold |= NVME_TEMP_THRESH_TYPE_UNDER;
+
+ ret = nvme_get_features(ctrl, NVME_FEAT_TEMP_THRESH, threshold, NULL, 0,
+ &status);
+ if (ret > 0)
+ return -EIO;
+ if (ret < 0)
+ return ret;
+ *temp = kelvin_to_millicelsius(status & NVME_TEMP_THRESH_MASK);
+
+ return 0;
+}
+
+static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under,
+ long temp)
+{
+ unsigned int threshold = sensor << NVME_TEMP_THRESH_SELECT_SHIFT;
+ int ret;
+
+ temp = millicelsius_to_kelvin(temp);
+ threshold |= clamp_val(temp, 0, NVME_TEMP_THRESH_MASK);
+
+ if (under)
+ threshold |= NVME_TEMP_THRESH_TYPE_UNDER;
+
+ ret = nvme_set_features(ctrl, NVME_FEAT_TEMP_THRESH, threshold, NULL, 0,
+ NULL);
+ if (ret > 0)
+ return -EIO;
+
+ return ret;
+}
+
+static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
+{
+ int ret;
+
+ ret = nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
+ &data->log, sizeof(data->log), 0);
+
+ return ret <= 0 ? ret : -EIO;
+}
+
+static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ struct nvme_hwmon_data *data = dev_get_drvdata(dev);
+ struct nvme_smart_log *log = &data->log;
+ int temp;
+ int err;
+
+ /*
+ * First handle attributes which don't require us to read
+ * the smart log.
+ */
+ switch (attr) {
+ case hwmon_temp_max:
+ return nvme_get_temp_thresh(data->ctrl, channel, false, val);
+ case hwmon_temp_min:
+ return nvme_get_temp_thresh(data->ctrl, channel, true, val);
+ case hwmon_temp_crit:
+ *val = kelvin_to_millicelsius(data->ctrl->cctemp);
+ return 0;
+ default:
+ break;
+ }
+
+ mutex_lock(&data->read_lock);
+ err = nvme_hwmon_get_smart_log(data);
+ if (err)
+ goto unlock;
+
+ switch (attr) {
+ case hwmon_temp_input:
+ if (!channel)
+ temp = get_unaligned_le16(log->temperature);
+ else
+ temp = le16_to_cpu(log->temp_sensor[channel - 1]);
+ *val = kelvin_to_millicelsius(temp);
+ break;
+ case hwmon_temp_alarm:
+ *val = !!(log->critical_warning & NVME_SMART_CRIT_TEMPERATURE);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+unlock:
+ mutex_unlock(&data->read_lock);
+ return err;
+}
+
+static int nvme_hwmon_write(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long val)
+{
+ struct nvme_hwmon_data *data = dev_get_drvdata(dev);
+
+ switch (attr) {
+ case hwmon_temp_max:
+ return nvme_set_temp_thresh(data->ctrl, channel, false, val);
+ case hwmon_temp_min:
+ return nvme_set_temp_thresh(data->ctrl, channel, true, val);
+ default:
+ break;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static const char * const nvme_hwmon_sensor_names[] = {
+ "Composite",
+ "Sensor 1",
+ "Sensor 2",
+ "Sensor 3",
+ "Sensor 4",
+ "Sensor 5",
+ "Sensor 6",
+ "Sensor 7",
+ "Sensor 8",
+};
+
+static int nvme_hwmon_read_string(struct device *dev,
+ enum hwmon_sensor_types type, u32 attr,
+ int channel, const char **str)
+{
+ *str = nvme_hwmon_sensor_names[channel];
+ return 0;
+}
+
+static umode_t nvme_hwmon_is_visible(const void *_data,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ const struct nvme_hwmon_data *data = _data;
+
+ switch (attr) {
+ case hwmon_temp_crit:
+ if (!channel && data->ctrl->cctemp)
+ return 0444;
+ break;
+ case hwmon_temp_max:
+ case hwmon_temp_min:
+ if ((!channel && data->ctrl->wctemp) ||
+ (channel && data->log.temp_sensor[channel - 1])) {
+ if (data->ctrl->quirks &
+ NVME_QUIRK_NO_TEMP_THRESH_CHANGE)
+ return 0444;
+ return 0644;
+ }
+ break;
+ case hwmon_temp_alarm:
+ if (!channel)
+ return 0444;
+ break;
+ case hwmon_temp_input:
+ case hwmon_temp_label:
+ if (!channel || data->log.temp_sensor[channel - 1])
+ return 0444;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct hwmon_channel_info *nvme_hwmon_info[] = {
+ HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ),
+ HWMON_CHANNEL_INFO(temp,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_CRIT | HWMON_T_LABEL | HWMON_T_ALARM,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL),
+ NULL
+};
+
+static const struct hwmon_ops nvme_hwmon_ops = {
+ .is_visible = nvme_hwmon_is_visible,
+ .read = nvme_hwmon_read,
+ .read_string = nvme_hwmon_read_string,
+ .write = nvme_hwmon_write,
+};
+
+static const struct hwmon_chip_info nvme_hwmon_chip_info = {
+ .ops = &nvme_hwmon_ops,
+ .info = nvme_hwmon_info,
+};
+
+void nvme_hwmon_init(struct nvme_ctrl *ctrl)
+{
+ struct device *dev = ctrl->dev;
+ struct nvme_hwmon_data *data;
+ struct device *hwmon;
+ int err;
+
+ data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return;
+
+ data->ctrl = ctrl;
+ mutex_init(&data->read_lock);
+
+ err = nvme_hwmon_get_smart_log(data);
+ if (err) {
+ dev_warn(dev, "Failed to read smart log (error %d)\n", err);
+ devm_kfree(dev, data);
+ return;
+ }
+
+ hwmon = devm_hwmon_device_register_with_info(dev, "nvme", data,
+ &nvme_hwmon_chip_info,
+ NULL);
+ if (IS_ERR(hwmon)) {
+ dev_warn(dev, "Failed to instantiate hwmon device\n");
+ devm_kfree(dev, data);
+ }
+}
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index ba009d4c9dfa..ec46693f6b64 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -667,11 +667,14 @@ static struct request *nvme_nvm_alloc_request(struct request_queue *q,
return rq;
}
-static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
+static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd,
+ void *buf)
{
+ struct nvm_geo *geo = &dev->geo;
struct request_queue *q = dev->q;
struct nvme_nvm_command *cmd;
struct request *rq;
+ int ret;
cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL);
if (!cmd)
@@ -679,8 +682,15 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
rq = nvme_nvm_alloc_request(q, rqd, cmd);
if (IS_ERR(rq)) {
- kfree(cmd);
- return PTR_ERR(rq);
+ ret = PTR_ERR(rq);
+ goto err_free_cmd;
+ }
+
+ if (buf) {
+ ret = blk_rq_map_kern(q, rq, buf, geo->csecs * rqd->nr_ppas,
+ GFP_KERNEL);
+ if (ret)
+ goto err_free_cmd;
}
rq->end_io_data = rqd;
@@ -688,33 +698,9 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io);
return 0;
-}
-
-static int nvme_nvm_submit_io_sync(struct nvm_dev *dev, struct nvm_rq *rqd)
-{
- struct request_queue *q = dev->q;
- struct request *rq;
- struct nvme_nvm_command cmd;
- int ret = 0;
-
- memset(&cmd, 0, sizeof(struct nvme_nvm_command));
-
- rq = nvme_nvm_alloc_request(q, rqd, &cmd);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
-
- /* I/Os can fail and the error is signaled through rqd. Callers must
- * handle the error accordingly.
- */
- blk_execute_rq(q, NULL, rq, 0);
- if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
- ret = -EINTR;
-
- rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64);
- rqd->error = nvme_req(rq)->status;
-
- blk_mq_free_request(rq);
+err_free_cmd:
+ kfree(cmd);
return ret;
}
@@ -754,7 +740,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
.get_chk_meta = nvme_nvm_get_chk_meta,
.submit_io = nvme_nvm_submit_io,
- .submit_io_sync = nvme_nvm_submit_io_sync,
.create_dma_pool = nvme_nvm_create_dma_pool,
.destroy_dma_pool = nvme_nvm_destroy_dma_pool,
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 888d4543894e..797c18337d96 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -95,6 +95,7 @@ void nvme_failover_req(struct request *req)
}
break;
case NVME_SC_HOST_PATH_ERROR:
+ case NVME_SC_HOST_ABORTED_CMD:
/*
* Temporary transport disruption in talking to the controller.
* Try to send on a new path.
@@ -158,9 +159,11 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
struct nvme_ns *ns;
mutex_lock(&ctrl->scan_lock);
+ down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
if (nvme_mpath_clear_current_path(ns))
kblockd_schedule_work(&ns->head->requeue_work);
+ up_read(&ctrl->namespaces_rwsem);
mutex_unlock(&ctrl->scan_lock);
}
@@ -428,6 +431,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
srcu_read_unlock(&head->srcu, srcu_idx);
}
+ synchronize_srcu(&ns->head->srcu);
kblockd_schedule_work(&ns->head->requeue_work);
}
@@ -443,8 +447,14 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) {
struct nvme_ana_group_desc *desc = base + offset;
- u32 nr_nsids = le32_to_cpu(desc->nnsids);
- size_t nsid_buf_size = nr_nsids * sizeof(__le32);
+ u32 nr_nsids;
+ size_t nsid_buf_size;
+
+ if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
+ return -EINVAL;
+
+ nr_nsids = le32_to_cpu(desc->nnsids);
+ nsid_buf_size = nr_nsids * sizeof(__le32);
if (WARN_ON_ONCE(desc->grpid == 0))
return -EINVAL;
@@ -464,8 +474,6 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
return error;
offset += nsid_buf_size;
- if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
- return -EINVAL;
}
return 0;
@@ -508,25 +516,26 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
down_write(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) {
- if (ns->head->ns_id != le32_to_cpu(desc->nsids[n]))
+ unsigned nsid = le32_to_cpu(desc->nsids[n]);
+
+ if (ns->head->ns_id < nsid)
continue;
- nvme_update_ns_ana_state(desc, ns);
+ if (ns->head->ns_id == nsid)
+ nvme_update_ns_ana_state(desc, ns);
if (++n == nr_nsids)
break;
}
up_write(&ctrl->namespaces_rwsem);
- WARN_ON_ONCE(n < nr_nsids);
return 0;
}
-static int nvme_read_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
+static int nvme_read_ana_log(struct nvme_ctrl *ctrl)
{
u32 nr_change_groups = 0;
int error;
mutex_lock(&ctrl->ana_lock);
- error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA,
- groups_only ? NVME_ANA_LOG_RGO : 0,
+ error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA, 0,
ctrl->ana_log_buf, ctrl->ana_log_size, 0);
if (error) {
dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error);
@@ -562,7 +571,7 @@ static void nvme_ana_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work);
- nvme_read_ana_log(ctrl, false);
+ nvme_read_ana_log(ctrl);
}
static void nvme_anatt_timeout(struct timer_list *t)
@@ -712,7 +721,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
goto out;
}
- error = nvme_read_ana_log(ctrl, true);
+ error = nvme_read_ana_log(ctrl);
if (error)
goto out_free_ana_log_buf;
return 0;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 778b3a0b6adb..1024fec7914c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -15,6 +15,9 @@
#include <linux/sed-opal.h>
#include <linux/fault-inject.h>
#include <linux/rcupdate.h>
+#include <linux/wait.h>
+
+#include <trace/events/block.h>
extern unsigned int nvme_io_timeout;
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
@@ -25,6 +28,12 @@ extern unsigned int admin_timeout;
#define NVME_DEFAULT_KATO 5
#define NVME_KATO_GRACE 10
+#ifdef CONFIG_ARCH_NO_SG_CHAIN
+#define NVME_INLINE_SG_CNT 0
+#else
+#define NVME_INLINE_SG_CNT 2
+#endif
+
extern struct workqueue_struct *nvme_wq;
extern struct workqueue_struct *nvme_reset_wq;
extern struct workqueue_struct *nvme_delete_wq;
@@ -92,6 +101,31 @@ enum nvme_quirks {
* Broken Write Zeroes.
*/
NVME_QUIRK_DISABLE_WRITE_ZEROES = (1 << 9),
+
+ /*
+ * Force simple suspend/resume path.
+ */
+ NVME_QUIRK_SIMPLE_SUSPEND = (1 << 10),
+
+ /*
+ * Use only one interrupt vector for all queues
+ */
+ NVME_QUIRK_SINGLE_VECTOR = (1 << 11),
+
+ /*
+ * Use non-standard 128 bytes SQEs.
+ */
+ NVME_QUIRK_128_BYTES_SQES = (1 << 12),
+
+ /*
+ * Prevent tag overlap between queues
+ */
+ NVME_QUIRK_SHARED_TAGS = (1 << 13),
+
+ /*
+ * Don't change the value of the temperature threshold feature
+ */
+ NVME_QUIRK_NO_TEMP_THRESH_CHANGE = (1 << 14),
};
/*
@@ -139,7 +173,6 @@ static inline u16 nvme_req_qid(struct request *req)
enum nvme_ctrl_state {
NVME_CTRL_NEW,
NVME_CTRL_LIVE,
- NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */
NVME_CTRL_RESETTING,
NVME_CTRL_CONNECTING,
NVME_CTRL_DELETING,
@@ -164,6 +197,7 @@ struct nvme_ctrl {
const struct nvme_ctrl_ops *ops;
struct request_queue *admin_q;
struct request_queue *connect_q;
+ struct request_queue *fabrics_q;
struct device *dev;
int instance;
int numa_node;
@@ -176,6 +210,7 @@ struct nvme_ctrl {
struct cdev cdev;
struct work_struct reset_work;
struct work_struct delete_work;
+ wait_queue_head_t state_wq;
struct nvme_subsystem *subsys;
struct list_head subsys_entry;
@@ -198,6 +233,7 @@ struct nvme_ctrl {
u16 oacs;
u16 nssa;
u16 nr_streams;
+ u16 sqsize;
u32 max_namespaces;
atomic_t abort_limit;
u8 vwc;
@@ -206,6 +242,8 @@ struct nvme_ctrl {
u16 kas;
u8 npss;
u8 apsta;
+ u16 wctemp;
+ u16 cctemp;
u32 oaes;
u32 aen_result;
u32 ctratt;
@@ -246,7 +284,6 @@ struct nvme_ctrl {
u16 hmmaxd;
/* Fabrics only */
- u16 sqsize;
u32 ioccsz;
u32 iorcsz;
u16 icdoff;
@@ -395,9 +432,20 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
}
-static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
+/*
+ * Convert a 512B sector number to a device logical block number.
+ */
+static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector)
+{
+ return sector >> (ns->lba_shift - SECTOR_SHIFT);
+}
+
+/*
+ * Convert a device logical block number to a 512B sector number.
+ */
+static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba)
{
- return (sector >> (ns->lba_shift - 9));
+ return lba << (ns->lba_shift - SECTOR_SHIFT);
}
static inline void nvme_end_request(struct request *req, __le16 status,
@@ -422,12 +470,18 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl)
put_device(ctrl->device);
}
+static inline bool nvme_is_aen_req(u16 qid, __u16 command_id)
+{
+ return !qid && command_id >= NVME_AQ_BLK_MQ_DEPTH;
+}
+
void nvme_complete_rq(struct request *req);
bool nvme_cancel_request(struct request *req, void *data, bool reserved);
bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
enum nvme_ctrl_state new_state);
-int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
-int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
+bool nvme_wait_reset(struct nvme_ctrl *ctrl);
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl);
+int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
const struct nvme_ctrl_ops *ops, unsigned long quirks);
@@ -476,6 +530,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
+int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
@@ -515,6 +570,16 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
kblockd_schedule_work(&head->requeue_work);
}
+static inline void nvme_trace_bio_complete(struct request *req,
+ blk_status_t status)
+{
+ struct nvme_ns *ns = req->q->queuedata;
+
+ if (req->cmd_flags & REQ_NVME_MPATH)
+ trace_block_bio_complete(ns->head->disk->queue,
+ req->bio, status);
+}
+
extern struct device_attribute dev_attr_ana_grpid;
extern struct device_attribute dev_attr_ana_state;
extern struct device_attribute subsys_attr_iopolicy;
@@ -562,6 +627,10 @@ static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
{
}
+static inline void nvme_trace_bio_complete(struct request *req,
+ blk_status_t status)
+{
+}
static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
struct nvme_id_ctrl *id)
{
@@ -612,4 +681,10 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
return dev_to_disk(dev)->private_data;
}
+#ifdef CONFIG_NVME_HWMON
+void nvme_hwmon_init(struct nvme_ctrl *ctrl);
+#else
+static inline void nvme_hwmon_init(struct nvme_ctrl *ctrl) { }
+#endif
+
#endif /* _NVME_H */
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6bd9b1033965..9c80f9f08149 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -28,8 +28,8 @@
#include "trace.h"
#include "nvme.h"
-#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
-#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
+#define SQ_SIZE(q) ((q)->q_depth << (q)->sqes)
+#define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion))
#define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc))
@@ -68,14 +68,14 @@ static int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
-static int write_queues;
-module_param(write_queues, int, 0644);
+static unsigned int write_queues;
+module_param(write_queues, uint, 0644);
MODULE_PARM_DESC(write_queues,
"Number of queues to use for writes. If not set, reads and writes "
"will share a queue set.");
-static int poll_queues;
-module_param(poll_queues, int, 0644);
+static unsigned int poll_queues;
+module_param(poll_queues, uint, 0644);
MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");
struct nvme_dev;
@@ -100,6 +100,7 @@ struct nvme_dev {
unsigned io_queues[HCTX_MAX_TYPES];
unsigned int num_vecs;
int q_depth;
+ int io_sqes;
u32 db_stride;
void __iomem *bar;
unsigned long bar_mapped_size;
@@ -162,11 +163,10 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
struct nvme_queue {
struct nvme_dev *dev;
spinlock_t sq_lock;
- struct nvme_command *sq_cmds;
+ void *sq_cmds;
/* only used for poll queues: */
spinlock_t cq_poll_lock ____cacheline_aligned_in_smp;
volatile struct nvme_completion *cqes;
- struct blk_mq_tags **tags;
dma_addr_t sq_dma_addr;
dma_addr_t cq_dma_addr;
u32 __iomem *q_db;
@@ -175,9 +175,9 @@ struct nvme_queue {
u16 sq_tail;
u16 last_sq_tail;
u16 cq_head;
- u16 last_cq_head;
u16 qid;
u8 cq_phase;
+ u8 sqes;
unsigned long flags;
#define NVMEQ_ENABLED 0
#define NVMEQ_SQ_CMB 1
@@ -375,29 +375,17 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
WARN_ON(hctx_idx != 0);
WARN_ON(dev->admin_tagset.tags[0] != hctx->tags);
- WARN_ON(nvmeq->tags);
hctx->driver_data = nvmeq;
- nvmeq->tags = &dev->admin_tagset.tags[0];
return 0;
}
-static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
-{
- struct nvme_queue *nvmeq = hctx->driver_data;
-
- nvmeq->tags = NULL;
-}
-
static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
struct nvme_dev *dev = data;
struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1];
- if (!nvmeq->tags)
- nvmeq->tags = &dev->tagset.tags[hctx_idx];
-
WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags);
hctx->driver_data = nvmeq;
return 0;
@@ -488,7 +476,8 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
bool write_sq)
{
spin_lock(&nvmeq->sq_lock);
- memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
+ memcpy(nvmeq->sq_cmds + (nvmeq->sq_tail << nvmeq->sqes),
+ cmd, sizeof(*cmd));
if (++nvmeq->sq_tail == nvmeq->q_depth)
nvmeq->sq_tail = 0;
nvme_write_sq_db(nvmeq, write_sq);
@@ -534,21 +523,22 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- enum dma_data_direction dma_dir = rq_data_dir(req) ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE;
const int last_prp = dev->ctrl.page_size / sizeof(__le64) - 1;
dma_addr_t dma_addr = iod->first_dma, next_dma_addr;
int i;
if (iod->dma_len) {
- dma_unmap_page(dev->dev, dma_addr, iod->dma_len, dma_dir);
+ dma_unmap_page(dev->dev, dma_addr, iod->dma_len,
+ rq_dma_dir(req));
return;
}
WARN_ON_ONCE(!iod->nents);
- /* P2PDMA requests do not need to be unmapped */
- if (!is_pci_p2pdma_page(sg_page(iod->sg)))
+ if (is_pci_p2pdma_page(sg_page(iod->sg)))
+ pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents,
+ rq_dma_dir(req));
+ else
dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req));
@@ -769,7 +759,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
struct bio_vec *bv)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- unsigned int first_prp_len = dev->ctrl.page_size - bv->bv_offset;
+ unsigned int offset = bv->bv_offset & (dev->ctrl.page_size - 1);
+ unsigned int first_prp_len = dev->ctrl.page_size - offset;
iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0);
if (dma_mapping_error(dev->dev, iod->first_dma))
@@ -832,8 +823,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
goto out;
if (is_pci_p2pdma_page(sg_page(iod->sg)))
- nr_mapped = pci_p2pdma_map_sg(dev->dev, iod->sg, iod->nents,
- rq_dma_dir(req));
+ nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg,
+ iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN);
else
nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents,
rq_dma_dir(req), DMA_ATTR_NO_WARN);
@@ -920,7 +911,6 @@ static void nvme_pci_complete_rq(struct request *req)
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_dev *dev = iod->nvmeq->dev;
- nvme_cleanup_cmd(req);
if (blk_integrity_rq(req))
dma_unmap_page(dev->dev, iod->meta_dma,
rq_integrity_vec(req)->bv_len, rq_data_dir(req));
@@ -945,6 +935,13 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
}
+static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq)
+{
+ if (!nvmeq->qid)
+ return nvmeq->dev->admin_tagset.tags[0];
+ return nvmeq->dev->tagset.tags[nvmeq->qid - 1];
+}
+
static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
{
volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];
@@ -963,14 +960,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
* aborts. We don't even bother to allocate a struct request
* for them but rather special case them here.
*/
- if (unlikely(nvmeq->qid == 0 &&
- cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) {
+ if (unlikely(nvme_is_aen_req(nvmeq->qid, cqe->command_id))) {
nvme_complete_async_event(&nvmeq->dev->ctrl,
cqe->status, &cqe->result);
return;
}
- req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
+ req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id);
trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
nvme_end_request(req, cqe->status, cqe->result);
}
@@ -1023,10 +1019,7 @@ static irqreturn_t nvme_irq(int irq, void *data)
* the irq handler, even if that was on another CPU.
*/
rmb();
- if (nvmeq->cq_head != nvmeq->last_cq_head)
- ret = IRQ_HANDLED;
nvme_process_cq(nvmeq, &start, &end, -1);
- nvmeq->last_cq_head = nvmeq->cq_head;
wmb();
if (start != end) {
@@ -1344,16 +1337,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
static void nvme_free_queue(struct nvme_queue *nvmeq)
{
- dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq->q_depth),
+ dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
if (!nvmeq->sq_cmds)
return;
if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) {
pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev),
- nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth));
+ nvmeq->sq_cmds, SQ_SIZE(nvmeq));
} else {
- dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq->q_depth),
+ dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq),
nvmeq->sq_cmds, nvmeq->sq_dma_addr);
}
}
@@ -1403,11 +1396,28 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
if (shutdown)
nvme_shutdown_ctrl(&dev->ctrl);
else
- nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
+ nvme_disable_ctrl(&dev->ctrl);
nvme_poll_irqdisable(nvmeq, -1);
}
+/*
+ * Called only on a device that has been disabled and after all other threads
+ * that can check this device's completion queues have synced. This is the
+ * last chance for the driver to see a natural completion before
+ * nvme_cancel_request() terminates all incomplete requests.
+ */
+static void nvme_reap_pending_cqes(struct nvme_dev *dev)
+{
+ u16 start, end;
+ int i;
+
+ for (i = dev->ctrl.queue_count - 1; i > 0; i--) {
+ nvme_process_cq(&dev->queues[i], &start, &end, -1);
+ nvme_complete_cqes(&dev->queues[i], start, end);
+ }
+}
+
static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
int entry_size)
{
@@ -1433,12 +1443,12 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
}
static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
- int qid, int depth)
+ int qid)
{
struct pci_dev *pdev = to_pci_dev(dev->dev);
if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
- nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth));
+ nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(nvmeq));
if (nvmeq->sq_cmds) {
nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
nvmeq->sq_cmds);
@@ -1447,11 +1457,11 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
return 0;
}
- pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(depth));
+ pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(nvmeq));
}
}
- nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+ nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(nvmeq),
&nvmeq->sq_dma_addr, GFP_KERNEL);
if (!nvmeq->sq_cmds)
return -ENOMEM;
@@ -1465,12 +1475,14 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
if (dev->ctrl.queue_count > qid)
return 0;
- nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(depth),
+ nvmeq->sqes = qid ? dev->io_sqes : NVME_ADM_SQES;
+ nvmeq->q_depth = depth;
+ nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(nvmeq),
&nvmeq->cq_dma_addr, GFP_KERNEL);
if (!nvmeq->cqes)
goto free_nvmeq;
- if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth))
+ if (nvme_alloc_sq_cmds(dev, nvmeq, qid))
goto free_cqdma;
nvmeq->dev = dev;
@@ -1479,15 +1491,14 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
- nvmeq->q_depth = depth;
nvmeq->qid = qid;
dev->ctrl.queue_count++;
return 0;
free_cqdma:
- dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes,
- nvmeq->cq_dma_addr);
+ dma_free_coherent(dev->dev, CQ_SIZE(nvmeq), (void *)nvmeq->cqes,
+ nvmeq->cq_dma_addr);
free_nvmeq:
return -ENOMEM;
}
@@ -1515,7 +1526,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
- memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
+ memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq));
nvme_dbbuf_init(dev, nvmeq, qid);
dev->online_queues++;
wmb(); /* ensure the first interrupt sees the initialization */
@@ -1545,14 +1556,13 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
result = adapter_alloc_sq(dev, qid, nvmeq);
if (result < 0)
return result;
- else if (result)
+ if (result)
goto release_cq;
nvmeq->cq_vector = vector;
nvme_init_queue(nvmeq, qid);
if (!polled) {
- nvmeq->cq_vector = vector;
result = queue_request_irq(nvmeq);
if (result < 0)
goto release_sq;
@@ -1573,7 +1583,6 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
.queue_rq = nvme_queue_rq,
.complete = nvme_pci_complete_rq,
.init_hctx = nvme_admin_init_hctx,
- .exit_hctx = nvme_admin_exit_hctx,
.init_request = nvme_init_request,
.timeout = nvme_timeout,
};
@@ -1679,7 +1688,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
(readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
- result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
+ result = nvme_disable_ctrl(&dev->ctrl);
if (result < 0)
return result;
@@ -1695,7 +1704,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
- result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap);
+ result = nvme_enable_ctrl(&dev->ctrl);
if (result)
return result;
@@ -2055,7 +2064,6 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
.priv = dev,
};
unsigned int irq_queues, this_p_queues;
- unsigned int nr_cpus = num_possible_cpus();
/*
* Poll queues don't need interrupts, but we need at least one IO
@@ -2066,10 +2074,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
this_p_queues = nr_io_queues - 1;
irq_queues = 1;
} else {
- if (nr_cpus < nr_io_queues - this_p_queues)
- irq_queues = nr_cpus + 1;
- else
- irq_queues = nr_io_queues - this_p_queues + 1;
+ irq_queues = nr_io_queues - this_p_queues + 1;
}
dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
@@ -2077,6 +2082,13 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
dev->io_queues[HCTX_TYPE_READ] = 0;
+ /*
+ * Some Apple controllers require all queues to use the
+ * first vector.
+ */
+ if (dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR)
+ irq_queues = 1;
+
return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues,
PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd);
}
@@ -2095,6 +2107,14 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
unsigned long size;
nr_io_queues = max_io_queues();
+
+ /*
+ * If tags are shared with admin queue (Apple bug), then
+ * make sure we only use one IO queue.
+ */
+ if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
+ nr_io_queues = 1;
+
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
if (result < 0)
return result;
@@ -2232,11 +2252,6 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
if (timeout == 0)
return false;
- /* handle any remaining CQEs */
- if (opcode == nvme_admin_delete_cq &&
- !test_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags))
- nvme_poll_irqdisable(nvmeq, -1);
-
sent--;
if (nr_queues)
goto retry;
@@ -2244,10 +2259,7 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
return true;
}
-/*
- * return error value only when tagset allocation failed
- */
-static int nvme_dev_add(struct nvme_dev *dev)
+static void nvme_dev_add(struct nvme_dev *dev)
{
int ret;
@@ -2265,11 +2277,19 @@ static int nvme_dev_add(struct nvme_dev *dev)
dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
dev->tagset.driver_data = dev;
+ /*
+ * Some Apple controllers requires tags to be unique
+ * across admin and IO queue, so reserve the first 32
+ * tags of the IO queue.
+ */
+ if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
+ dev->tagset.reserved_tags = NVME_AQ_DEPTH;
+
ret = blk_mq_alloc_tag_set(&dev->tagset);
if (ret) {
dev_warn(dev->ctrl.device,
"IO queues tagset allocation failed %d\n", ret);
- return ret;
+ return;
}
dev->ctrl.tagset = &dev->tagset;
} else {
@@ -2280,7 +2300,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
}
nvme_dbbuf_set(dev);
- return 0;
}
static int nvme_pci_enable(struct nvme_dev *dev)
@@ -2314,10 +2333,21 @@ static int nvme_pci_enable(struct nvme_dev *dev)
dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,
io_queue_depth);
+ dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */
dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
dev->dbs = dev->bar + 4096;
/*
+ * Some Apple controllers require a non-standard SQE size.
+ * Interestingly they also seem to ignore the CC:IOSQES register
+ * so we don't bother updating it here.
+ */
+ if (dev->ctrl.quirks & NVME_QUIRK_128_BYTES_SQES)
+ dev->io_sqes = 7;
+ else
+ dev->io_sqes = NVME_NVM_IOSQES;
+
+ /*
* Temporary fix for the Apple controller found in the MacBook8,1 and
* some MacBook7,1 to avoid controller resets and data loss.
*/
@@ -2334,6 +2364,18 @@ static int nvme_pci_enable(struct nvme_dev *dev)
"set queue depth=%u\n", dev->q_depth);
}
+ /*
+ * Controllers with the shared tags quirk need the IO queue to be
+ * big enough so that we get 32 tags for the admin queue
+ */
+ if ((dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) &&
+ (dev->q_depth < (NVME_AQ_DEPTH + 2))) {
+ dev->q_depth = NVME_AQ_DEPTH + 2;
+ dev_warn(dev->ctrl.device, "IO queue depth clamped to %d\n",
+ dev->q_depth);
+ }
+
+
nvme_map_cmb(dev);
pci_enable_pcie_error_reporting(pdev);
@@ -2398,9 +2440,12 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
nvme_suspend_io_queues(dev);
nvme_suspend_queue(&dev->queues[0]);
nvme_pci_disable(dev);
+ nvme_reap_pending_cqes(dev);
blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
+ blk_mq_tagset_wait_completed_request(&dev->tagset);
+ blk_mq_tagset_wait_completed_request(&dev->admin_tagset);
/*
* The driver will not be starting up queues again if shutting down so
@@ -2415,6 +2460,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
mutex_unlock(&dev->shutdown_lock);
}
+static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
+{
+ if (!nvme_wait_reset(&dev->ctrl))
+ return -EBUSY;
+ nvme_dev_disable(dev, shutdown);
+ return 0;
+}
+
static int nvme_setup_prp_pools(struct nvme_dev *dev)
{
dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
@@ -2438,14 +2491,20 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
dma_pool_destroy(dev->prp_small_pool);
}
+static void nvme_free_tagset(struct nvme_dev *dev)
+{
+ if (dev->tagset.tags)
+ blk_mq_free_tag_set(&dev->tagset);
+ dev->ctrl.tagset = NULL;
+}
+
static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
{
struct nvme_dev *dev = to_nvme_dev(ctrl);
nvme_dbbuf_dma_free(dev);
put_device(dev->dev);
- if (dev->tagset.tags)
- blk_mq_free_tag_set(&dev->tagset);
+ nvme_free_tagset(dev);
if (dev->ctrl.admin_q)
blk_put_queue(dev->ctrl.admin_q);
kfree(dev->queues);
@@ -2456,6 +2515,11 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
{
+ /*
+ * Set state to deleting now to avoid blocking nvme_wait_reset(), which
+ * may be holding this pci_dev's device lock.
+ */
+ nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
nvme_get_ctrl(&dev->ctrl);
nvme_dev_disable(dev, false);
nvme_kill_queues(&dev->ctrl);
@@ -2469,7 +2533,6 @@ static void nvme_reset_work(struct work_struct *work)
container_of(work, struct nvme_dev, ctrl.reset_work);
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result;
- enum nvme_ctrl_state new_state = NVME_CTRL_LIVE;
if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) {
result = -ENODEV;
@@ -2563,13 +2626,11 @@ static void nvme_reset_work(struct work_struct *work)
dev_warn(dev->ctrl.device, "IO queues not created\n");
nvme_kill_queues(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl);
- new_state = NVME_CTRL_ADMIN_ONLY;
+ nvme_free_tagset(dev);
} else {
nvme_start_queues(&dev->ctrl);
nvme_wait_freeze(&dev->ctrl);
- /* hit this only when allocate tagset fails */
- if (nvme_dev_add(dev))
- new_state = NVME_CTRL_ADMIN_ONLY;
+ nvme_dev_add(dev);
nvme_unfreeze(&dev->ctrl);
}
@@ -2577,9 +2638,9 @@ static void nvme_reset_work(struct work_struct *work)
* If only admin queue live, keep it to do further investigation or
* recovery.
*/
- if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) {
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
dev_warn(dev->ctrl.device,
- "failed to mark controller state %d\n", new_state);
+ "failed to mark controller live state\n");
result = -ENODEV;
goto out;
}
@@ -2620,7 +2681,7 @@ static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
{
- *val = readq(to_nvme_dev(ctrl)->bar + off);
+ *val = lo_hi_readq(to_nvme_dev(ctrl)->bar + off);
return 0;
}
@@ -2784,19 +2845,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
static void nvme_reset_prepare(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_dev_disable(dev, false);
+
+ /*
+ * We don't need to check the return value from waiting for the reset
+ * state as pci_dev device lock is held, making it impossible to race
+ * with ->remove().
+ */
+ nvme_disable_prepare_reset(dev, false);
+ nvme_sync_queues(&dev->ctrl);
}
static void nvme_reset_done(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_reset_ctrl_sync(&dev->ctrl);
+
+ if (!nvme_try_sched_reset(&dev->ctrl))
+ flush_work(&dev->ctrl.reset_work);
}
static void nvme_shutdown(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_dev_disable(dev, true);
+ nvme_disable_prepare_reset(dev, true);
}
/*
@@ -2849,7 +2919,7 @@ static int nvme_resume(struct device *dev)
if (ndev->last_ps == U32_MAX ||
nvme_set_power_state(ctrl, ndev->last_ps) != 0)
- nvme_reset_ctrl(ctrl);
+ return nvme_try_sched_reset(&ndev->ctrl);
return 0;
}
@@ -2876,43 +2946,43 @@ static int nvme_suspend(struct device *dev)
* state (which may not be possible if the link is up).
*/
if (pm_suspend_via_firmware() || !ctrl->npss ||
- !pcie_aspm_enabled(pdev)) {
- nvme_dev_disable(ndev, true);
- return 0;
- }
+ !pcie_aspm_enabled(pdev) ||
+ (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
+ return nvme_disable_prepare_reset(ndev, true);
nvme_start_freeze(ctrl);
nvme_wait_freeze(ctrl);
nvme_sync_queues(ctrl);
- if (ctrl->state != NVME_CTRL_LIVE &&
- ctrl->state != NVME_CTRL_ADMIN_ONLY)
+ if (ctrl->state != NVME_CTRL_LIVE)
goto unfreeze;
ret = nvme_get_power_state(ctrl, &ndev->last_ps);
if (ret < 0)
goto unfreeze;
+ /*
+ * A saved state prevents pci pm from generically controlling the
+ * device's power. If we're using protocol specific settings, we don't
+ * want pci interfering.
+ */
+ pci_save_state(pdev);
+
ret = nvme_set_power_state(ctrl, ctrl->npss);
if (ret < 0)
goto unfreeze;
if (ret) {
+ /* discard the saved state */
+ pci_load_saved_state(pdev, NULL);
+
/*
* Clearing npss forces a controller reset on resume. The
- * correct value will be resdicovered then.
+ * correct value will be rediscovered then.
*/
- nvme_dev_disable(ndev, true);
+ ret = nvme_disable_prepare_reset(ndev, true);
ctrl->npss = 0;
- ret = 0;
- goto unfreeze;
}
- /*
- * A saved state prevents pci pm from generically controlling the
- * device's power. If we're using protocol specific settings, we don't
- * want pci interfering.
- */
- pci_save_state(pdev);
unfreeze:
nvme_unfreeze(ctrl);
return ret;
@@ -2921,9 +2991,7 @@ unfreeze:
static int nvme_simple_suspend(struct device *dev)
{
struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
-
- nvme_dev_disable(ndev, true);
- return 0;
+ return nvme_disable_prepare_reset(ndev, true);
}
static int nvme_simple_resume(struct device *dev)
@@ -2931,8 +2999,7 @@ static int nvme_simple_resume(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- nvme_reset_ctrl(&ndev->ctrl);
- return 0;
+ return nvme_try_sched_reset(&ndev->ctrl);
}
static const struct dev_pm_ops nvme_dev_pm_ops = {
@@ -3011,7 +3078,8 @@ static const struct pci_device_id nvme_id_table[] = {
NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
- NVME_QUIRK_MEDIUM_PRIO_SQ },
+ NVME_QUIRK_MEDIUM_PRIO_SQ |
+ NVME_QUIRK_NO_TEMP_THRESH_CHANGE },
{ PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
@@ -3037,9 +3105,16 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */
+ .driver_data = NVME_QUIRK_NO_DEEPEST_PS |
+ NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
+ { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005),
+ .driver_data = NVME_QUIRK_SINGLE_VECTOR |
+ NVME_QUIRK_128_BYTES_SQES |
+ NVME_QUIRK_SHARED_TAGS },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, nvme_id_table);
@@ -3065,6 +3140,9 @@ static int __init nvme_init(void)
BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
+
+ write_queues = min(write_queues, num_possible_cpus());
+ poll_queues = min(poll_queues, num_possible_cpus());
return pci_register_driver(&nvme_driver);
}
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 1a6449bc547b..3e85c5cacefd 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -427,7 +427,7 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev)
{
return min_t(u32, NVME_RDMA_MAX_SEGMENTS,
- ibdev->attrs.max_fast_reg_page_list_len);
+ ibdev->attrs.max_fast_reg_page_list_len - 1);
}
static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
@@ -437,7 +437,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
const int cq_factor = send_wr_factor + 1; /* + RECV */
int comp_vector, idx = nvme_rdma_queue_idx(queue);
enum ib_poll_context poll_ctx;
- int ret;
+ int ret, pages_per_mr;
queue->device = nvme_rdma_find_get_device(queue->cm_id);
if (!queue->device) {
@@ -479,10 +479,16 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
goto out_destroy_qp;
}
+ /*
+ * Currently we don't use SG_GAPS MR's so if the first entry is
+ * misaligned we'll end up using two entries for a single data page,
+ * so one additional entry is required.
+ */
+ pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1;
ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
queue->queue_size,
IB_MR_TYPE_MEM_REG,
- nvme_rdma_get_max_fr_pages(ibdev), 0);
+ pages_per_mr, 0);
if (ret) {
dev_err(queue->ctrl->ctrl.device,
"failed to initialize MR pool sized %d for QID %d\n",
@@ -614,7 +620,8 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
if (!ret) {
set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
} else {
- __nvme_rdma_stop_queue(queue);
+ if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
+ __nvme_rdma_stop_queue(queue);
dev_info(ctrl->ctrl.device,
"failed to connect queue: %d ret=%d\n", idx, ret);
}
@@ -724,7 +731,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->reserved_tags = 2; /* connect + keep-alive */
set->numa_node = nctrl->numa_node;
set->cmd_size = sizeof(struct nvme_rdma_request) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
set->driver_data = ctrl;
set->nr_hw_queues = 1;
set->timeout = ADMIN_TIMEOUT;
@@ -738,7 +745,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE;
set->cmd_size = sizeof(struct nvme_rdma_request) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
@@ -757,6 +764,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
{
if (remove) {
blk_cleanup_queue(ctrl->ctrl.admin_q);
+ blk_cleanup_queue(ctrl->ctrl.fabrics_q);
blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
}
if (ctrl->async_event_sqe.data) {
@@ -798,10 +806,16 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
goto out_free_async_qe;
}
+ ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set);
+ if (IS_ERR(ctrl->ctrl.fabrics_q)) {
+ error = PTR_ERR(ctrl->ctrl.fabrics_q);
+ goto out_free_tagset;
+ }
+
ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
if (IS_ERR(ctrl->ctrl.admin_q)) {
error = PTR_ERR(ctrl->ctrl.admin_q);
- goto out_free_tagset;
+ goto out_cleanup_fabrics_q;
}
}
@@ -809,23 +823,14 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
if (error)
goto out_cleanup_queue;
- error = ctrl->ctrl.ops->reg_read64(&ctrl->ctrl, NVME_REG_CAP,
- &ctrl->ctrl.cap);
- if (error) {
- dev_err(ctrl->ctrl.device,
- "prop_get NVME_REG_CAP failed\n");
- goto out_stop_queue;
- }
-
- ctrl->ctrl.sqsize =
- min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
-
- error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
+ error = nvme_enable_ctrl(&ctrl->ctrl);
if (error)
goto out_stop_queue;
- ctrl->ctrl.max_hw_sectors =
- (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9);
+ ctrl->ctrl.max_segments = ctrl->max_fr_pages;
+ ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
+
+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
error = nvme_init_identify(&ctrl->ctrl);
if (error)
@@ -838,6 +843,9 @@ out_stop_queue:
out_cleanup_queue:
if (new)
blk_cleanup_queue(ctrl->ctrl.admin_q);
+out_cleanup_fabrics_q:
+ if (new)
+ blk_cleanup_queue(ctrl->ctrl.fabrics_q);
out_free_tagset:
if (new)
blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
@@ -907,10 +915,13 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
{
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]);
- if (ctrl->ctrl.admin_tagset)
+ if (ctrl->ctrl.admin_tagset) {
blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset,
nvme_cancel_request, &ctrl->ctrl);
- blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+ blk_mq_tagset_wait_completed_request(ctrl->ctrl.admin_tagset);
+ }
+ if (remove)
+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_destroy_admin_queue(ctrl, remove);
}
@@ -920,9 +931,11 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl);
- if (ctrl->ctrl.tagset)
+ if (ctrl->ctrl.tagset) {
blk_mq_tagset_busy_iter(ctrl->ctrl.tagset,
nvme_cancel_request, &ctrl->ctrl);
+ blk_mq_tagset_wait_completed_request(ctrl->ctrl.tagset);
+ }
if (remove)
nvme_start_queues(&ctrl->ctrl);
nvme_rdma_destroy_io_queues(ctrl, remove);
@@ -1059,6 +1072,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
nvme_rdma_teardown_io_queues(ctrl, false);
nvme_start_queues(&ctrl->ctrl);
nvme_rdma_teardown_admin_queue(ctrl, false);
+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we're in DELETING state */
@@ -1074,7 +1088,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
return;
- queue_work(nvme_wq, &ctrl->err_work);
+ queue_work(nvme_reset_wq, &ctrl->err_work);
}
static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
@@ -1145,12 +1159,8 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
req->mr = NULL;
}
- ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
- req->nents, rq_data_dir(rq) ==
- WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
- nvme_cleanup_cmd(rq);
- sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE);
+ ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
+ sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
}
static int nvme_rdma_set_sg_null(struct nvme_command *c)
@@ -1266,14 +1276,14 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
req->sg_table.sgl = req->first_sgl;
ret = sg_alloc_table_chained(&req->sg_table,
blk_rq_nr_phys_segments(rq), req->sg_table.sgl,
- SG_CHUNK_SIZE);
+ NVME_INLINE_SG_CNT);
if (ret)
return -ENOMEM;
req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
- rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ rq_dma_dir(rq));
if (unlikely(count <= 0)) {
ret = -EIO;
goto out_free_table;
@@ -1302,11 +1312,9 @@ out:
return 0;
out_unmap_sg:
- ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
- req->nents, rq_data_dir(rq) ==
- WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
out_free_table:
- sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
return ret;
}
@@ -1491,8 +1499,8 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
* aborts. We don't even bother to allocate a struct request
* for them but rather special case them here.
*/
- if (unlikely(nvme_rdma_queue_idx(queue) == 0 &&
- cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH))
+ if (unlikely(nvme_is_aen_req(nvme_rdma_queue_idx(queue),
+ cqe->command_id)))
nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
&cqe->result);
else
@@ -1547,16 +1555,18 @@ static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
{
+ struct nvme_ctrl *ctrl = &queue->ctrl->ctrl;
int ret;
ret = nvme_rdma_create_queue_ib(queue);
if (ret)
return ret;
+ if (ctrl->opts->tos >= 0)
+ rdma_set_service_type(queue->cm_id, ctrl->opts->tos);
ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS);
if (ret) {
- dev_err(queue->ctrl->ctrl.device,
- "rdma_resolve_route failed (%d).\n",
+ dev_err(ctrl->device, "rdma_resolve_route failed (%d).\n",
queue->cm_error);
goto out_destroy_queue;
}
@@ -1689,6 +1699,14 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
rq->tag, nvme_rdma_queue_idx(queue));
+ /*
+ * Restart the timer if a controller reset is already scheduled. Any
+ * timed out commands would be handled before entering the connecting
+ * state.
+ */
+ if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
+ return BLK_EH_RESET_TIMER;
+
if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
/*
* Teardown immediately if controller times out while starting
@@ -1748,7 +1766,6 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
if (unlikely(err < 0)) {
dev_err(queue->ctrl->ctrl.device,
"Failed to map data (%d)\n", err);
- nvme_cleanup_cmd(rq);
goto err;
}
@@ -1759,18 +1776,19 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
req->mr ? &req->reg_wr.wr : NULL);
- if (unlikely(err)) {
- nvme_rdma_unmap_data(queue, rq);
- goto err;
- }
+ if (unlikely(err))
+ goto err_unmap;
return BLK_STS_OK;
+err_unmap:
+ nvme_rdma_unmap_data(queue, rq);
err:
if (err == -ENOMEM || err == -EAGAIN)
ret = BLK_STS_RESOURCE;
else
ret = BLK_STS_IOERR;
+ nvme_cleanup_cmd(rq);
unmap_qe:
ib_dma_unmap_single(dev, req->sqe.dma, sizeof(struct nvme_command),
DMA_TO_DEVICE);
@@ -1869,10 +1887,11 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
cancel_delayed_work_sync(&ctrl->reconnect_work);
nvme_rdma_teardown_io_queues(ctrl, shutdown);
+ blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
if (shutdown)
nvme_shutdown_ctrl(&ctrl->ctrl);
else
- nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
+ nvme_disable_ctrl(&ctrl->ctrl);
nvme_rdma_teardown_admin_queue(ctrl, shutdown);
}
@@ -2051,7 +2070,8 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
.required_opts = NVMF_OPT_TRADDR,
.allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
- NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES,
+ NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
+ NVMF_OPT_TOS,
.create_ctrl = nvme_rdma_create_ctrl,
};
@@ -2111,8 +2131,16 @@ err_unreg_client:
static void __exit nvme_rdma_cleanup_module(void)
{
+ struct nvme_rdma_ctrl *ctrl;
+
nvmf_unregister_transport(&nvme_rdma_transport);
ib_unregister_client(&nvme_rdma_ib_client);
+
+ mutex_lock(&nvme_rdma_ctrl_mutex);
+ list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list)
+ nvme_delete_ctrl(&ctrl->ctrl);
+ mutex_unlock(&nvme_rdma_ctrl_mutex);
+ flush_workqueue(nvme_delete_wq);
}
module_init(nvme_rdma_init_module);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 606b13d35d16..49d4373b84eb 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -13,6 +13,7 @@
#include <net/tcp.h>
#include <linux/blk-mq.h>
#include <crypto/hash.h>
+#include <net/busy_poll.h>
#include "nvme.h"
#include "fabrics.h"
@@ -72,6 +73,7 @@ struct nvme_tcp_queue {
int pdu_offset;
size_t data_remaining;
size_t ddgst_remaining;
+ unsigned int nr_cqe;
/* send state */
struct nvme_tcp_request *request;
@@ -420,7 +422,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
return;
- queue_work(nvme_wq, &to_tcp_ctrl(ctrl)->err_work);
+ queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work);
}
static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
@@ -438,6 +440,7 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
}
nvme_end_request(rq, cqe->status, cqe->result);
+ queue->nr_cqe++;
return 0;
}
@@ -488,8 +491,8 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
* aborts. We don't even bother to allocate a struct request
* for them but rather special case them here.
*/
- if (unlikely(nvme_tcp_queue_id(queue) == 0 &&
- cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH))
+ if (unlikely(nvme_is_aen_req(nvme_tcp_queue_id(queue),
+ cqe->command_id)))
nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
&cqe->result);
else
@@ -608,23 +611,18 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
switch (hdr->type) {
case nvme_tcp_c2h_data:
- ret = nvme_tcp_handle_c2h_data(queue, (void *)queue->pdu);
- break;
+ return nvme_tcp_handle_c2h_data(queue, (void *)queue->pdu);
case nvme_tcp_rsp:
nvme_tcp_init_recv_ctx(queue);
- ret = nvme_tcp_handle_comp(queue, (void *)queue->pdu);
- break;
+ return nvme_tcp_handle_comp(queue, (void *)queue->pdu);
case nvme_tcp_r2t:
nvme_tcp_init_recv_ctx(queue);
- ret = nvme_tcp_handle_r2t(queue, (void *)queue->pdu);
- break;
+ return nvme_tcp_handle_r2t(queue, (void *)queue->pdu);
default:
dev_err(queue->ctrl->ctrl.device,
"unsupported pdu type (%d)\n", hdr->type);
return -EINVAL;
}
-
- return ret;
}
static inline void nvme_tcp_end_request(struct request *rq, u16 status)
@@ -701,8 +699,10 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
} else {
- if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS)
+ if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
+ queue->nr_cqe++;
+ }
nvme_tcp_init_recv_ctx(queue);
}
}
@@ -742,6 +742,7 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
pdu->command_id);
nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
+ queue->nr_cqe++;
}
nvme_tcp_init_recv_ctx(queue);
@@ -841,7 +842,7 @@ static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
{
- nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_DATA_XFER_ERROR);
+ nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_HOST_PATH_ERROR);
}
static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
@@ -1023,14 +1024,16 @@ done:
static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue)
{
- struct sock *sk = queue->sock->sk;
+ struct socket *sock = queue->sock;
+ struct sock *sk = sock->sk;
read_descriptor_t rd_desc;
int consumed;
rd_desc.arg.data = queue;
rd_desc.count = 1;
lock_sock(sk);
- consumed = tcp_read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
+ queue->nr_cqe = 0;
+ consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
release_sock(sk);
return consumed;
}
@@ -1039,7 +1042,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
{
struct nvme_tcp_queue *queue =
container_of(w, struct nvme_tcp_queue, io_work);
- unsigned long start = jiffies + msecs_to_jiffies(1);
+ unsigned long deadline = jiffies + msecs_to_jiffies(1);
do {
bool pending = false;
@@ -1051,7 +1054,12 @@ static void nvme_tcp_io_work(struct work_struct *w)
} else if (unlikely(result < 0)) {
dev_err(queue->ctrl->ctrl.device,
"failed to send request %d\n", result);
- if (result != -EPIPE)
+
+ /*
+ * Fail the request unless peer closed the connection,
+ * in which case error recovery flow will complete all.
+ */
+ if ((result != -EPIPE) && (result != -ECONNRESET))
nvme_tcp_fail_request(queue->request);
nvme_tcp_done_send_req(queue);
return;
@@ -1064,7 +1072,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
if (!pending)
return;
- } while (time_after(jiffies, start)); /* quota is exhausted */
+ } while (!time_after(jiffies, deadline)); /* quota is exhausted */
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
}
@@ -1255,7 +1263,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
queue->queue_size = queue_size;
if (qid > 0)
- queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16;
+ queue->cmnd_capsule_len = nctrl->ioccsz * 16;
else
queue->cmnd_capsule_len = sizeof(struct nvme_command) +
NVME_TCP_ADMIN_CCSZ;
@@ -1263,7 +1271,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM,
IPPROTO_TCP, &queue->sock);
if (ret) {
- dev_err(ctrl->ctrl.device,
+ dev_err(nctrl->device,
"failed to create socket: %d\n", ret);
return ret;
}
@@ -1273,7 +1281,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
ret = kernel_setsockopt(queue->sock, IPPROTO_TCP, TCP_SYNCNT,
(char *)&opt, sizeof(opt));
if (ret) {
- dev_err(ctrl->ctrl.device,
+ dev_err(nctrl->device,
"failed to set TCP_SYNCNT sock opt %d\n", ret);
goto err_sock;
}
@@ -1283,7 +1291,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
ret = kernel_setsockopt(queue->sock, IPPROTO_TCP,
TCP_NODELAY, (char *)&opt, sizeof(opt));
if (ret) {
- dev_err(ctrl->ctrl.device,
+ dev_err(nctrl->device,
"failed to set TCP_NODELAY sock opt %d\n", ret);
goto err_sock;
}
@@ -1296,11 +1304,23 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_LINGER,
(char *)&sol, sizeof(sol));
if (ret) {
- dev_err(ctrl->ctrl.device,
+ dev_err(nctrl->device,
"failed to set SO_LINGER sock opt %d\n", ret);
goto err_sock;
}
+ /* Set socket type of service */
+ if (nctrl->opts->tos >= 0) {
+ opt = nctrl->opts->tos;
+ ret = kernel_setsockopt(queue->sock, SOL_IP, IP_TOS,
+ (char *)&opt, sizeof(opt));
+ if (ret) {
+ dev_err(nctrl->device,
+ "failed to set IP_TOS sock opt %d\n", ret);
+ goto err_sock;
+ }
+ }
+
queue->sock->sk->sk_allocation = GFP_ATOMIC;
if (!qid)
n = 0;
@@ -1314,11 +1334,11 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
queue->pdu_offset = 0;
sk_set_memalloc(queue->sock->sk);
- if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) {
+ if (nctrl->opts->mask & NVMF_OPT_HOST_TRADDR) {
ret = kernel_bind(queue->sock, (struct sockaddr *)&ctrl->src_addr,
sizeof(ctrl->src_addr));
if (ret) {
- dev_err(ctrl->ctrl.device,
+ dev_err(nctrl->device,
"failed to bind queue %d socket %d\n",
qid, ret);
goto err_sock;
@@ -1330,7 +1350,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
if (queue->hdr_digest || queue->data_digest) {
ret = nvme_tcp_alloc_crypto(queue);
if (ret) {
- dev_err(ctrl->ctrl.device,
+ dev_err(nctrl->device,
"failed to allocate queue %d crypto\n", qid);
goto err_sock;
}
@@ -1344,13 +1364,13 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
goto err_crypto;
}
- dev_dbg(ctrl->ctrl.device, "connecting queue %d\n",
+ dev_dbg(nctrl->device, "connecting queue %d\n",
nvme_tcp_queue_id(queue));
ret = kernel_connect(queue->sock, (struct sockaddr *)&ctrl->addr,
sizeof(ctrl->addr), 0);
if (ret) {
- dev_err(ctrl->ctrl.device,
+ dev_err(nctrl->device,
"failed to connect socket: %d\n", ret);
goto err_rcv_pdu;
}
@@ -1371,6 +1391,9 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
queue->sock->sk->sk_state_change = nvme_tcp_state_change;
queue->sock->sk->sk_write_space = nvme_tcp_write_space;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ queue->sock->sk->sk_ll_usec = 1;
+#endif
write_unlock_bh(&queue->sock->sk->sk_callback_lock);
return 0;
@@ -1469,7 +1492,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
- set->nr_maps = 2 /* default + read */;
+ set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
}
ret = blk_mq_alloc_tag_set(set);
@@ -1568,6 +1591,7 @@ static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
nr_io_queues = min(ctrl->opts->nr_io_queues, num_online_cpus());
nr_io_queues += min(ctrl->opts->nr_write_queues, num_online_cpus());
+ nr_io_queues += min(ctrl->opts->nr_poll_queues, num_online_cpus());
return nr_io_queues;
}
@@ -1599,6 +1623,12 @@ static void nvme_tcp_set_io_queues(struct nvme_ctrl *nctrl,
min(opts->nr_io_queues, nr_io_queues);
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
}
+
+ if (opts->nr_poll_queues && nr_io_queues) {
+ /* map dedicated poll queues only if we have queues left */
+ ctrl->io_queues[HCTX_TYPE_POLL] =
+ min(opts->nr_poll_queues, nr_io_queues);
+ }
}
static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
@@ -1680,6 +1710,7 @@ static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
nvme_tcp_stop_queue(ctrl, 0);
if (remove) {
blk_cleanup_queue(ctrl->admin_q);
+ blk_cleanup_queue(ctrl->fabrics_q);
blk_mq_free_tag_set(ctrl->admin_tagset);
}
nvme_tcp_free_admin_queue(ctrl);
@@ -1700,10 +1731,16 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
goto out_free_queue;
}
+ ctrl->fabrics_q = blk_mq_init_queue(ctrl->admin_tagset);
+ if (IS_ERR(ctrl->fabrics_q)) {
+ error = PTR_ERR(ctrl->fabrics_q);
+ goto out_free_tagset;
+ }
+
ctrl->admin_q = blk_mq_init_queue(ctrl->admin_tagset);
if (IS_ERR(ctrl->admin_q)) {
error = PTR_ERR(ctrl->admin_q);
- goto out_free_tagset;
+ goto out_cleanup_fabrics_q;
}
}
@@ -1711,19 +1748,12 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
if (error)
goto out_cleanup_queue;
- error = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
- if (error) {
- dev_err(ctrl->device,
- "prop_get NVME_REG_CAP failed\n");
- goto out_stop_queue;
- }
-
- ctrl->sqsize = min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize);
-
- error = nvme_enable_ctrl(ctrl, ctrl->cap);
+ error = nvme_enable_ctrl(ctrl);
if (error)
goto out_stop_queue;
+ blk_mq_unquiesce_queue(ctrl->admin_q);
+
error = nvme_init_identify(ctrl);
if (error)
goto out_stop_queue;
@@ -1735,6 +1765,9 @@ out_stop_queue:
out_cleanup_queue:
if (new)
blk_cleanup_queue(ctrl->admin_q);
+out_cleanup_fabrics_q:
+ if (new)
+ blk_cleanup_queue(ctrl->fabrics_q);
out_free_tagset:
if (new)
blk_mq_free_tag_set(ctrl->admin_tagset);
@@ -1748,10 +1781,13 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
{
blk_mq_quiesce_queue(ctrl->admin_q);
nvme_tcp_stop_queue(ctrl, 0);
- if (ctrl->admin_tagset)
+ if (ctrl->admin_tagset) {
blk_mq_tagset_busy_iter(ctrl->admin_tagset,
nvme_cancel_request, ctrl);
- blk_mq_unquiesce_queue(ctrl->admin_q);
+ blk_mq_tagset_wait_completed_request(ctrl->admin_tagset);
+ }
+ if (remove)
+ blk_mq_unquiesce_queue(ctrl->admin_q);
nvme_tcp_destroy_admin_queue(ctrl, remove);
}
@@ -1762,9 +1798,11 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
return;
nvme_stop_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl);
- if (ctrl->tagset)
+ if (ctrl->tagset) {
blk_mq_tagset_busy_iter(ctrl->tagset,
nvme_cancel_request, ctrl);
+ blk_mq_tagset_wait_completed_request(ctrl->tagset);
+ }
if (remove)
nvme_start_queues(ctrl);
nvme_tcp_destroy_io_queues(ctrl, remove);
@@ -1793,7 +1831,7 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
{
struct nvmf_ctrl_options *opts = ctrl->opts;
- int ret = -EINVAL;
+ int ret;
ret = nvme_tcp_configure_admin_queue(ctrl, new);
if (ret)
@@ -1876,6 +1914,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
/* unquiesce to fail fast pending requests */
nvme_start_queues(ctrl);
nvme_tcp_teardown_admin_queue(ctrl, false);
+ blk_mq_unquiesce_queue(ctrl->admin_q);
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we're in DELETING state */
@@ -1892,10 +1931,11 @@ static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
nvme_tcp_teardown_io_queues(ctrl, shutdown);
+ blk_mq_quiesce_queue(ctrl->admin_q);
if (shutdown)
nvme_shutdown_ctrl(ctrl);
else
- nvme_disable_ctrl(ctrl, ctrl->cap);
+ nvme_disable_ctrl(ctrl);
nvme_tcp_teardown_admin_queue(ctrl, shutdown);
}
@@ -2011,6 +2051,14 @@ nvme_tcp_timeout(struct request *rq, bool reserved)
struct nvme_tcp_ctrl *ctrl = req->queue->ctrl;
struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+ /*
+ * Restart the timer if a controller reset is already scheduled. Any
+ * timed out commands would be handled before entering the connecting
+ * state.
+ */
+ if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
+ return BLK_EH_RESET_TIMER;
+
dev_warn(ctrl->ctrl.device,
"queue %d: timeout request %#x type %d\n",
nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);
@@ -2093,6 +2141,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
ret = nvme_tcp_map_data(queue, rq);
if (unlikely(ret)) {
+ nvme_cleanup_cmd(rq);
dev_err(queue->ctrl->ctrl.device,
"Failed to map data (%d)\n", ret);
return ret;
@@ -2151,14 +2200,36 @@ static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
+ if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
+ /* map dedicated poll queues only if we have queues left */
+ set->map[HCTX_TYPE_POLL].nr_queues =
+ ctrl->io_queues[HCTX_TYPE_POLL];
+ set->map[HCTX_TYPE_POLL].queue_offset =
+ ctrl->io_queues[HCTX_TYPE_DEFAULT] +
+ ctrl->io_queues[HCTX_TYPE_READ];
+ blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
+ }
+
dev_info(ctrl->ctrl.device,
- "mapped %d/%d default/read queues.\n",
+ "mapped %d/%d/%d default/read/poll queues.\n",
ctrl->io_queues[HCTX_TYPE_DEFAULT],
- ctrl->io_queues[HCTX_TYPE_READ]);
+ ctrl->io_queues[HCTX_TYPE_READ],
+ ctrl->io_queues[HCTX_TYPE_POLL]);
return 0;
}
+static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
+{
+ struct nvme_tcp_queue *queue = hctx->driver_data;
+ struct sock *sk = queue->sock->sk;
+
+ if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
+ sk_busy_loop(sk, true);
+ nvme_tcp_try_recv(queue);
+ return queue->nr_cqe;
+}
+
static struct blk_mq_ops nvme_tcp_mq_ops = {
.queue_rq = nvme_tcp_queue_rq,
.complete = nvme_complete_rq,
@@ -2167,6 +2238,7 @@ static struct blk_mq_ops nvme_tcp_mq_ops = {
.init_hctx = nvme_tcp_init_hctx,
.timeout = nvme_tcp_timeout,
.map_queues = nvme_tcp_map_queues,
+ .poll = nvme_tcp_poll,
};
static struct blk_mq_ops nvme_tcp_admin_mq_ops = {
@@ -2220,7 +2292,8 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
INIT_LIST_HEAD(&ctrl->list);
ctrl->ctrl.opts = opts;
- ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + 1;
+ ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
+ opts->nr_poll_queues + 1;
ctrl->ctrl.sqsize = opts->queue_size - 1;
ctrl->ctrl.kato = opts->kato;
@@ -2314,7 +2387,8 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
.allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
- NVMF_OPT_NR_WRITE_QUEUES,
+ NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
+ NVMF_OPT_TOS,
.create_ctrl = nvme_tcp_create_ctrl,
};
diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c
index 9778eb0406b3..5c3cb6928f3c 100644
--- a/drivers/nvme/host/trace.c
+++ b/drivers/nvme/host/trace.c
@@ -86,6 +86,22 @@ static const char *nvme_trace_admin_get_features(struct trace_seq *p,
return ret;
}
+static const char *nvme_trace_get_lba_status(struct trace_seq *p,
+ u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u64 slba = get_unaligned_le64(cdw10);
+ u32 mndw = get_unaligned_le32(cdw10 + 8);
+ u16 rl = get_unaligned_le16(cdw10 + 12);
+ u8 atype = cdw10[15];
+
+ trace_seq_printf(p, "slba=0x%llx, mndw=0x%x, rl=0x%x, atype=%u",
+ slba, mndw, rl, atype);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
static const char *nvme_trace_read_write(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
@@ -141,6 +157,8 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p,
return nvme_trace_admin_identify(p, cdw10);
case nvme_admin_get_features:
return nvme_trace_admin_get_features(p, cdw10);
+ case nvme_admin_get_lba_status:
+ return nvme_trace_get_lba_status(p, cdw10);
default:
return nvme_trace_common(p, cdw10);
}
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 4dc12ea52f23..72a7e41f3018 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -24,6 +24,16 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd)
return len;
}
+static u32 nvmet_feat_data_len(struct nvmet_req *req, u32 cdw10)
+{
+ switch (cdw10 & 0xff) {
+ case NVME_FEAT_HOST_ID:
+ return sizeof(req->sq->ctrl->hostid);
+ default:
+ return 0;
+ }
+}
+
u64 nvmet_get_log_page_offset(struct nvme_command *cmd)
{
return le64_to_cpu(cmd->get_log_page.lpo);
@@ -31,13 +41,12 @@ u64 nvmet_get_log_page_offset(struct nvme_command *cmd)
static void nvmet_execute_get_log_page_noop(struct nvmet_req *req)
{
- nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len));
+ nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->transfer_len));
}
static void nvmet_execute_get_log_page_error(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
- u16 status = NVME_SC_SUCCESS;
unsigned long flags;
off_t offset = 0;
u64 slot;
@@ -47,9 +56,8 @@ static void nvmet_execute_get_log_page_error(struct nvmet_req *req)
slot = ctrl->err_counter % NVMET_ERROR_LOG_SLOTS;
for (i = 0; i < NVMET_ERROR_LOG_SLOTS; i++) {
- status = nvmet_copy_to_sgl(req, offset, &ctrl->slots[slot],
- sizeof(struct nvme_error_slot));
- if (status)
+ if (nvmet_copy_to_sgl(req, offset, &ctrl->slots[slot],
+ sizeof(struct nvme_error_slot)))
break;
if (slot == 0)
@@ -59,7 +67,7 @@ static void nvmet_execute_get_log_page_error(struct nvmet_req *req)
offset += sizeof(struct nvme_error_slot);
}
spin_unlock_irqrestore(&ctrl->error_lock, flags);
- nvmet_req_complete(req, status);
+ nvmet_req_complete(req, 0);
}
static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
@@ -81,9 +89,11 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
goto out;
host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]);
- data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]);
+ data_units_read = DIV_ROUND_UP(part_stat_read(ns->bdev->bd_part,
+ sectors[READ]), 1000);
host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]);
- data_units_written = part_stat_read(ns->bdev->bd_part, sectors[WRITE]);
+ data_units_written = DIV_ROUND_UP(part_stat_read(ns->bdev->bd_part,
+ sectors[WRITE]), 1000);
put_unaligned_le64(host_reads, &slog->host_reads[0]);
put_unaligned_le64(data_units_read, &slog->data_units_read[0]);
@@ -111,11 +121,11 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
if (!ns->bdev)
continue;
host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]);
- data_units_read +=
- part_stat_read(ns->bdev->bd_part, sectors[READ]);
+ data_units_read += DIV_ROUND_UP(
+ part_stat_read(ns->bdev->bd_part, sectors[READ]), 1000);
host_writes += part_stat_read(ns->bdev->bd_part, ios[WRITE]);
- data_units_written +=
- part_stat_read(ns->bdev->bd_part, sectors[WRITE]);
+ data_units_written += DIV_ROUND_UP(
+ part_stat_read(ns->bdev->bd_part, sectors[WRITE]), 1000);
}
rcu_read_unlock();
@@ -134,7 +144,7 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
u16 status = NVME_SC_INTERNAL;
unsigned long flags;
- if (req->data_len != sizeof(*log))
+ if (req->transfer_len != sizeof(*log))
goto out;
log = kzalloc(sizeof(*log), GFP_KERNEL);
@@ -196,7 +206,7 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
u16 status = NVME_SC_INTERNAL;
size_t len;
- if (req->data_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32))
+ if (req->transfer_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32))
goto out;
mutex_lock(&ctrl->lock);
@@ -206,7 +216,7 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
len = ctrl->nr_changed_ns * sizeof(__le32);
status = nvmet_copy_to_sgl(req, 0, ctrl->changed_ns_list, len);
if (!status)
- status = nvmet_zero_sgl(req, len, req->data_len - len);
+ status = nvmet_zero_sgl(req, len, req->transfer_len - len);
ctrl->nr_changed_ns = 0;
nvmet_clear_aen_bit(req, NVME_AEN_BIT_NS_ATTR);
mutex_unlock(&ctrl->lock);
@@ -282,6 +292,36 @@ out:
nvmet_req_complete(req, status);
}
+static void nvmet_execute_get_log_page(struct nvmet_req *req)
+{
+ if (!nvmet_check_data_len(req, nvmet_get_log_page_len(req->cmd)))
+ return;
+
+ switch (req->cmd->get_log_page.lid) {
+ case NVME_LOG_ERROR:
+ return nvmet_execute_get_log_page_error(req);
+ case NVME_LOG_SMART:
+ return nvmet_execute_get_log_page_smart(req);
+ case NVME_LOG_FW_SLOT:
+ /*
+ * We only support a single firmware slot which always is
+ * active, so we can zero out the whole firmware slot log and
+ * still claim to fully implement this mandatory log page.
+ */
+ return nvmet_execute_get_log_page_noop(req);
+ case NVME_LOG_CHANGED_NS:
+ return nvmet_execute_get_log_changed_ns(req);
+ case NVME_LOG_CMD_EFFECTS:
+ return nvmet_execute_get_log_cmd_effects_ns(req);
+ case NVME_LOG_ANA:
+ return nvmet_execute_get_log_page_ana(req);
+ }
+ pr_err("unhandled lid %d on qid %d\n",
+ req->cmd->get_log_page.lid, req->sq->qid);
+ req->error_loc = offsetof(struct nvme_get_log_page_command, lid);
+ nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+}
+
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -565,6 +605,28 @@ out:
nvmet_req_complete(req, status);
}
+static void nvmet_execute_identify(struct nvmet_req *req)
+{
+ if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
+ return;
+
+ switch (req->cmd->identify.cns) {
+ case NVME_ID_CNS_NS:
+ return nvmet_execute_identify_ns(req);
+ case NVME_ID_CNS_CTRL:
+ return nvmet_execute_identify_ctrl(req);
+ case NVME_ID_CNS_NS_ACTIVE_LIST:
+ return nvmet_execute_identify_nslist(req);
+ case NVME_ID_CNS_NS_DESC_LIST:
+ return nvmet_execute_identify_desclist(req);
+ }
+
+ pr_err("unhandled identify cns %d on qid %d\n",
+ req->cmd->identify.cns, req->sq->qid);
+ req->error_loc = offsetof(struct nvme_identify, cns);
+ nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+}
+
/*
* A "minimum viable" abort implementation: the command is mandatory in the
* spec, but we are not required to do any useful work. We couldn't really
@@ -574,6 +636,8 @@ out:
*/
static void nvmet_execute_abort(struct nvmet_req *req)
{
+ if (!nvmet_check_data_len(req, 0))
+ return;
nvmet_set_result(req, 1);
nvmet_req_complete(req, 0);
}
@@ -658,6 +722,9 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 status = 0;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
switch (cdw10 & 0xff) {
case NVME_FEAT_NUM_QUEUES:
nvmet_set_result(req,
@@ -721,6 +788,9 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 status = 0;
+ if (!nvmet_check_data_len(req, nvmet_feat_data_len(req, cdw10)))
+ return;
+
switch (cdw10 & 0xff) {
/*
* These features are mandatory in the spec, but we don't
@@ -785,6 +855,9 @@ void nvmet_execute_async_event(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
mutex_lock(&ctrl->lock);
if (ctrl->nr_async_event_cmds >= NVMET_ASYNC_EVENTS) {
mutex_unlock(&ctrl->lock);
@@ -801,6 +874,9 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
pr_debug("ctrl %d update keep-alive timer for %d secs\n",
ctrl->cntlid, ctrl->kato);
@@ -813,77 +889,36 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
struct nvme_command *cmd = req->cmd;
u16 ret;
+ if (nvme_is_fabrics(cmd))
+ return nvmet_parse_fabrics_cmd(req);
+ if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
+ return nvmet_parse_discovery_cmd(req);
+
ret = nvmet_check_ctrl_status(req, cmd);
if (unlikely(ret))
return ret;
switch (cmd->common.opcode) {
case nvme_admin_get_log_page:
- req->data_len = nvmet_get_log_page_len(cmd);
-
- switch (cmd->get_log_page.lid) {
- case NVME_LOG_ERROR:
- req->execute = nvmet_execute_get_log_page_error;
- return 0;
- case NVME_LOG_SMART:
- req->execute = nvmet_execute_get_log_page_smart;
- return 0;
- case NVME_LOG_FW_SLOT:
- /*
- * We only support a single firmware slot which always
- * is active, so we can zero out the whole firmware slot
- * log and still claim to fully implement this mandatory
- * log page.
- */
- req->execute = nvmet_execute_get_log_page_noop;
- return 0;
- case NVME_LOG_CHANGED_NS:
- req->execute = nvmet_execute_get_log_changed_ns;
- return 0;
- case NVME_LOG_CMD_EFFECTS:
- req->execute = nvmet_execute_get_log_cmd_effects_ns;
- return 0;
- case NVME_LOG_ANA:
- req->execute = nvmet_execute_get_log_page_ana;
- return 0;
- }
- break;
+ req->execute = nvmet_execute_get_log_page;
+ return 0;
case nvme_admin_identify:
- req->data_len = NVME_IDENTIFY_DATA_SIZE;
- switch (cmd->identify.cns) {
- case NVME_ID_CNS_NS:
- req->execute = nvmet_execute_identify_ns;
- return 0;
- case NVME_ID_CNS_CTRL:
- req->execute = nvmet_execute_identify_ctrl;
- return 0;
- case NVME_ID_CNS_NS_ACTIVE_LIST:
- req->execute = nvmet_execute_identify_nslist;
- return 0;
- case NVME_ID_CNS_NS_DESC_LIST:
- req->execute = nvmet_execute_identify_desclist;
- return 0;
- }
- break;
+ req->execute = nvmet_execute_identify;
+ return 0;
case nvme_admin_abort_cmd:
req->execute = nvmet_execute_abort;
- req->data_len = 0;
return 0;
case nvme_admin_set_features:
req->execute = nvmet_execute_set_features;
- req->data_len = 0;
return 0;
case nvme_admin_get_features:
req->execute = nvmet_execute_get_features;
- req->data_len = 0;
return 0;
case nvme_admin_async_event:
req->execute = nvmet_execute_async_event;
- req->data_len = 0;
return 0;
case nvme_admin_keep_alive:
req->execute = nvmet_execute_keep_alive;
- req->data_len = 0;
return 0;
}
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 3a67e244e568..576de773b4db 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -129,27 +129,8 @@ static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
}
-static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
+static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
{
- struct nvmet_req *req;
-
- while (1) {
- mutex_lock(&ctrl->lock);
- if (!ctrl->nr_async_event_cmds) {
- mutex_unlock(&ctrl->lock);
- return;
- }
-
- req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
- mutex_unlock(&ctrl->lock);
- nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
- }
-}
-
-static void nvmet_async_event_work(struct work_struct *work)
-{
- struct nvmet_ctrl *ctrl =
- container_of(work, struct nvmet_ctrl, async_event_work);
struct nvmet_async_event *aen;
struct nvmet_req *req;
@@ -159,18 +140,41 @@ static void nvmet_async_event_work(struct work_struct *work)
struct nvmet_async_event, entry);
if (!aen || !ctrl->nr_async_event_cmds) {
mutex_unlock(&ctrl->lock);
- return;
+ break;
}
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
- nvmet_set_result(req, nvmet_async_event_result(aen));
+ if (status == 0)
+ nvmet_set_result(req, nvmet_async_event_result(aen));
list_del(&aen->entry);
kfree(aen);
mutex_unlock(&ctrl->lock);
- nvmet_req_complete(req, 0);
+ nvmet_req_complete(req, status);
+ }
+}
+
+static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
+{
+ struct nvmet_req *req;
+
+ mutex_lock(&ctrl->lock);
+ while (ctrl->nr_async_event_cmds) {
+ req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
+ mutex_unlock(&ctrl->lock);
+ nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
+ mutex_lock(&ctrl->lock);
}
+ mutex_unlock(&ctrl->lock);
+}
+
+static void nvmet_async_event_work(struct work_struct *work)
+{
+ struct nvmet_ctrl *ctrl =
+ container_of(work, struct nvmet_ctrl, async_event_work);
+
+ nvmet_async_events_process(ctrl, 0);
}
void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
@@ -555,7 +559,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
} else {
struct nvmet_ns *old;
- list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) {
+ list_for_each_entry_rcu(old, &subsys->namespaces, dev_link,
+ lockdep_is_held(&subsys->lock)) {
BUG_ON(ns->nsid == old->nsid);
if (ns->nsid < old->nsid)
break;
@@ -752,19 +757,24 @@ static void nvmet_confirm_sq(struct percpu_ref *ref)
void nvmet_sq_destroy(struct nvmet_sq *sq)
{
+ u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
+ struct nvmet_ctrl *ctrl = sq->ctrl;
+
/*
* If this is the admin queue, complete all AERs so that our
* queue doesn't have outstanding requests on it.
*/
- if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
- nvmet_async_events_free(sq->ctrl);
+ if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) {
+ nvmet_async_events_process(ctrl, status);
+ nvmet_async_events_free(ctrl);
+ }
percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
wait_for_completion(&sq->confirm_done);
wait_for_completion(&sq->free_done);
percpu_ref_exit(&sq->ref);
- if (sq->ctrl) {
- nvmet_ctrl_put(sq->ctrl);
+ if (ctrl) {
+ nvmet_ctrl_put(ctrl);
sq->ctrl = NULL; /* allows reusing the queue later */
}
}
@@ -892,14 +902,10 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
}
if (unlikely(!req->sq->ctrl))
- /* will return an error for any Non-connect command: */
+ /* will return an error for any non-connect command: */
status = nvmet_parse_connect_cmd(req);
else if (likely(req->sq->qid != 0))
status = nvmet_parse_io_cmd(req);
- else if (nvme_is_fabrics(req->cmd))
- status = nvmet_parse_fabrics_cmd(req);
- else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
- status = nvmet_parse_discovery_cmd(req);
else
status = nvmet_parse_admin_cmd(req);
@@ -930,15 +936,28 @@ void nvmet_req_uninit(struct nvmet_req *req)
}
EXPORT_SYMBOL_GPL(nvmet_req_uninit);
-void nvmet_req_execute(struct nvmet_req *req)
+bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len)
{
- if (unlikely(req->data_len != req->transfer_len)) {
+ if (unlikely(data_len != req->transfer_len)) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
- } else
- req->execute(req);
+ return false;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(nvmet_check_data_len);
+
+bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
+{
+ if (unlikely(data_len > req->transfer_len)) {
+ req->error_loc = offsetof(struct nvme_common_command, dptr);
+ nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
+ return false;
+ }
+
+ return true;
}
-EXPORT_SYMBOL_GPL(nvmet_req_execute);
int nvmet_req_alloc_sgl(struct nvmet_req *req)
{
@@ -966,7 +985,7 @@ int nvmet_req_alloc_sgl(struct nvmet_req *req)
}
req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
- if (!req->sg)
+ if (unlikely(!req->sg))
return -ENOMEM;
return 0;
@@ -1174,7 +1193,8 @@ static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl,
ctrl->p2p_client = get_device(req->p2p_client);
- list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
+ list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link,
+ lockdep_is_held(&ctrl->subsys->lock))
nvmet_p2pmem_ns_add_p2p(ctrl, ns);
}
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 8efca26b4776..0c2274b21e15 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -157,7 +157,7 @@ static size_t discovery_log_entries(struct nvmet_req *req)
return entries;
}
-static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
+static void nvmet_execute_disc_get_log_page(struct nvmet_req *req)
{
const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry);
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -171,6 +171,16 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
u16 status = 0;
void *buffer;
+ if (!nvmet_check_data_len(req, data_len))
+ return;
+
+ if (req->cmd->get_log_page.lid != NVME_LOG_DISC) {
+ req->error_loc =
+ offsetof(struct nvme_get_log_page_command, lid);
+ status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ goto out;
+ }
+
/* Spec requires dword aligned offsets */
if (offset & 0x3) {
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
@@ -227,20 +237,35 @@ out:
nvmet_req_complete(req, status);
}
-static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req)
+static void nvmet_execute_disc_identify(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
+ const char model[] = "Linux";
u16 status = 0;
+ if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
+ return;
+
+ if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) {
+ req->error_loc = offsetof(struct nvme_identify, cns);
+ status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ goto out;
+ }
+
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id) {
status = NVME_SC_INTERNAL;
goto out;
}
+ memset(id->sn, ' ', sizeof(id->sn));
+ bin2hex(id->sn, &ctrl->subsys->serial,
+ min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
memset(id->fr, ' ', sizeof(id->fr));
- strncpy((char *)id->fr, UTS_RELEASE, sizeof(id->fr));
+ memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' ');
+ memcpy_and_pad(id->fr, sizeof(id->fr),
+ UTS_RELEASE, strlen(UTS_RELEASE), ' ');
/* no limit on data transfer sizes for now */
id->mdts = 0;
@@ -273,6 +298,9 @@ static void nvmet_execute_disc_set_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 stat;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
switch (cdw10 & 0xff) {
case NVME_FEAT_KATO:
stat = nvmet_set_feat_kato(req);
@@ -296,6 +324,9 @@ static void nvmet_execute_disc_get_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 stat = 0;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
switch (cdw10 & 0xff) {
case NVME_FEAT_KATO:
nvmet_get_feat_kato(req);
@@ -328,47 +359,22 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
switch (cmd->common.opcode) {
case nvme_admin_set_features:
req->execute = nvmet_execute_disc_set_features;
- req->data_len = 0;
return 0;
case nvme_admin_get_features:
req->execute = nvmet_execute_disc_get_features;
- req->data_len = 0;
return 0;
case nvme_admin_async_event:
req->execute = nvmet_execute_async_event;
- req->data_len = 0;
return 0;
case nvme_admin_keep_alive:
req->execute = nvmet_execute_keep_alive;
- req->data_len = 0;
return 0;
case nvme_admin_get_log_page:
- req->data_len = nvmet_get_log_page_len(cmd);
-
- switch (cmd->get_log_page.lid) {
- case NVME_LOG_DISC:
- req->execute = nvmet_execute_get_disc_log_page;
- return 0;
- default:
- pr_err("unsupported get_log_page lid %d\n",
- cmd->get_log_page.lid);
- req->error_loc =
- offsetof(struct nvme_get_log_page_command, lid);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
- }
+ req->execute = nvmet_execute_disc_get_log_page;
+ return 0;
case nvme_admin_identify:
- req->data_len = NVME_IDENTIFY_DATA_SIZE;
- switch (cmd->identify.cns) {
- case NVME_ID_CNS_CTRL:
- req->execute =
- nvmet_execute_identify_disc_ctrl;
- return 0;
- default:
- pr_err("unsupported identify cns %d\n",
- cmd->identify.cns);
- req->error_loc = offsetof(struct nvme_identify, cns);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
- }
+ req->execute = nvmet_execute_disc_identify;
+ return 0;
default:
pr_err("unhandled cmd %d\n", cmd->common.opcode);
req->error_loc = offsetof(struct nvme_common_command, opcode);
@@ -381,9 +387,7 @@ int __init nvmet_init_discovery(void)
{
nvmet_disc_subsys =
nvmet_subsys_alloc(NVME_DISC_SUBSYS_NAME, NVME_NQN_DISC);
- if (IS_ERR(nvmet_disc_subsys))
- return PTR_ERR(nvmet_disc_subsys);
- return 0;
+ return PTR_ERR_OR_ZERO(nvmet_disc_subsys);
}
void nvmet_exit_discovery(void)
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index d16b55ffe79f..feef15c38ec9 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -12,6 +12,9 @@ static void nvmet_execute_prop_set(struct nvmet_req *req)
u64 val = le64_to_cpu(req->cmd->prop_set.value);
u16 status = 0;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
if (req->cmd->prop_set.attrib & 1) {
req->error_loc =
offsetof(struct nvmf_property_set_command, attrib);
@@ -38,6 +41,9 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
u16 status = 0;
u64 val = 0;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
if (req->cmd->prop_get.attrib & 1) {
switch (le32_to_cpu(req->cmd->prop_get.offset)) {
case NVME_REG_CAP:
@@ -82,11 +88,9 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
switch (cmd->fabrics.fctype) {
case nvme_fabrics_type_property_set:
- req->data_len = 0;
req->execute = nvmet_execute_prop_set;
break;
case nvme_fabrics_type_property_get:
- req->data_len = 0;
req->execute = nvmet_execute_prop_get;
break;
default:
@@ -105,6 +109,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
u16 qid = le16_to_cpu(c->qid);
u16 sqsize = le16_to_cpu(c->sqsize);
struct nvmet_ctrl *old;
+ u16 ret;
old = cmpxchg(&req->sq->ctrl, NULL, ctrl);
if (old) {
@@ -115,7 +120,8 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
if (!sqsize) {
pr_warn("queue size zero!\n");
req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
- return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ goto err;
}
/* note: convert queue size from 0's-based value to 1's-based value */
@@ -128,16 +134,19 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
}
if (ctrl->ops->install_queue) {
- u16 ret = ctrl->ops->install_queue(req->sq);
-
+ ret = ctrl->ops->install_queue(req->sq);
if (ret) {
pr_err("failed to install queue %d cntlid %d ret %x\n",
- qid, ret, ctrl->cntlid);
- return ret;
+ qid, ctrl->cntlid, ret);
+ goto err;
}
}
return 0;
+
+err:
+ req->sq->ctrl = NULL;
+ return ret;
}
static void nvmet_execute_admin_connect(struct nvmet_req *req)
@@ -147,6 +156,9 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
struct nvmet_ctrl *ctrl = NULL;
u16 status = 0;
+ if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
+ return;
+
d = kmalloc(sizeof(*d), GFP_KERNEL);
if (!d) {
status = NVME_SC_INTERNAL;
@@ -211,6 +223,9 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
u16 qid = le16_to_cpu(c->qid);
u16 status = 0;
+ if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
+ return;
+
d = kmalloc(sizeof(*d), GFP_KERNEL);
if (!d) {
status = NVME_SC_INTERNAL;
@@ -281,7 +296,6 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
- req->data_len = sizeof(struct nvmf_connect_data);
if (cmd->connect.qid == 0)
req->execute = nvmet_execute_admin_connect;
else
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index ce8d819f86cc..a0db6371b43e 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1495,20 +1495,20 @@ static void
nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_ls_iod *iod)
{
- struct fcnvme_ls_disconnect_rqst *rqst =
- (struct fcnvme_ls_disconnect_rqst *)iod->rqstbuf;
- struct fcnvme_ls_disconnect_acc *acc =
- (struct fcnvme_ls_disconnect_acc *)iod->rspbuf;
+ struct fcnvme_ls_disconnect_assoc_rqst *rqst =
+ (struct fcnvme_ls_disconnect_assoc_rqst *)iod->rqstbuf;
+ struct fcnvme_ls_disconnect_assoc_acc *acc =
+ (struct fcnvme_ls_disconnect_assoc_acc *)iod->rspbuf;
struct nvmet_fc_tgt_assoc *assoc;
int ret = 0;
memset(acc, 0, sizeof(*acc));
- if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_rqst))
+ if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst))
ret = VERR_DISCONN_LEN;
else if (rqst->desc_list_len !=
fcnvme_lsdesc_len(
- sizeof(struct fcnvme_ls_disconnect_rqst)))
+ sizeof(struct fcnvme_ls_disconnect_assoc_rqst)))
ret = VERR_DISCONN_RQST_LEN;
else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID))
ret = VERR_ASSOC_ID;
@@ -1523,8 +1523,11 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
fcnvme_lsdesc_len(
sizeof(struct fcnvme_lsdesc_disconn_cmd)))
ret = VERR_DISCONN_CMD_LEN;
- else if ((rqst->discon_cmd.scope != FCNVME_DISCONN_ASSOCIATION) &&
- (rqst->discon_cmd.scope != FCNVME_DISCONN_CONNECTION))
+ /*
+ * As the standard changed on the LS, check if old format and scope
+ * something other than Association (e.g. 0).
+ */
+ else if (rqst->discon_cmd.rsvd8[0])
ret = VERR_DISCONN_SCOPE;
else {
/* match an active association */
@@ -1556,8 +1559,8 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
fcnvme_lsdesc_len(
- sizeof(struct fcnvme_ls_disconnect_acc)),
- FCNVME_LS_DISCONNECT);
+ sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
+ FCNVME_LS_DISCONNECT_ASSOC);
/* release get taken in nvmet_fc_find_target_assoc */
nvmet_fc_tgt_a_put(iod->assoc);
@@ -1632,7 +1635,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport,
/* Creates an IO Queue/Connection */
nvmet_fc_ls_create_connection(tgtport, iod);
break;
- case FCNVME_LS_DISCONNECT:
+ case FCNVME_LS_DISCONNECT_ASSOC:
/* Terminate a Queue/Connection or the Association */
nvmet_fc_ls_disconnect(tgtport, iod);
break;
@@ -2015,7 +2018,7 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod)
}
/* data transfer complete, resume with nvmet layer */
- nvmet_req_execute(&fod->req);
+ fod->req.execute(&fod->req);
break;
case NVMET_FCOP_READDATA:
@@ -2231,7 +2234,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
* can invoke the nvmet_layer now. If read data, cmd completion will
* push the data
*/
- nvmet_req_execute(&fod->req);
+ fod->req.execute(&fod->req);
return;
transport_error:
@@ -2299,7 +2302,7 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
/* validate iu, so the connection id can be used to find the queue */
if ((cmdiubuf_len != sizeof(*cmdiu)) ||
- (cmdiu->scsi_id != NVME_CMD_SCSI_ID) ||
+ (cmdiu->format_id != NVME_CMD_FORMAT_ID) ||
(cmdiu->fc_id != NVME_CMD_FC_ID) ||
(be16_to_cpu(cmdiu->iu_len) != (sizeof(*cmdiu)/4)))
return -EIO;
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index b50b53db3746..1c50af6219f3 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -850,6 +850,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport)
#define FCLOOP_DMABOUND_4G 0xFFFFFFFF
static struct nvme_fc_port_template fctemplate = {
+ .module = THIS_MODULE,
.localport_delete = fcloop_localport_delete,
.remoteport_delete = fcloop_remoteport_delete,
.create_queue = fcloop_create_queue,
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index de0bff70ebb6..ea0e596be15d 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -11,10 +11,10 @@
void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
{
const struct queue_limits *ql = &bdev_get_queue(bdev)->limits;
- /* Number of physical blocks per logical block. */
- const u32 ppl = ql->physical_block_size / ql->logical_block_size;
- /* Physical blocks per logical block, 0's based. */
- const __le16 ppl0b = to0based(ppl);
+ /* Number of logical blocks per physical block. */
+ const u32 lpp = ql->physical_block_size / ql->logical_block_size;
+ /* Logical blocks per physical block, 0's based. */
+ const __le16 lpp0b = to0based(lpp);
/*
* For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN,
@@ -25,9 +25,9 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
* field from the identify controller data structure should be used.
*/
id->nsfeat |= 1 << 1;
- id->nawun = ppl0b;
- id->nawupf = ppl0b;
- id->nacwu = ppl0b;
+ id->nawun = lpp0b;
+ id->nawupf = lpp0b;
+ id->nacwu = lpp0b;
/*
* Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
@@ -36,7 +36,7 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
*/
id->nsfeat |= 1 << 4;
/* NPWG = Namespace Preferred Write Granularity. 0's based */
- id->npwg = ppl0b;
+ id->npwg = lpp0b;
/* NPWA = Namespace Preferred Write Alignment. 0's based */
id->npwa = id->npwg;
/* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
@@ -147,8 +147,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
int sg_cnt = req->sg_cnt;
struct bio *bio;
struct scatterlist *sg;
+ struct blk_plug plug;
sector_t sector;
- int op, op_flags = 0, i;
+ int op, i;
+
+ if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
+ return;
if (!req->sg_cnt) {
nvmet_req_complete(req, 0);
@@ -156,21 +160,20 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
}
if (req->cmd->rw.opcode == nvme_cmd_write) {
- op = REQ_OP_WRITE;
- op_flags = REQ_SYNC | REQ_IDLE;
+ op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
- op_flags |= REQ_FUA;
+ op |= REQ_FUA;
} else {
op = REQ_OP_READ;
}
if (is_pci_p2pdma_page(sg_page(req->sg)))
- op_flags |= REQ_NOMERGE;
+ op |= REQ_NOMERGE;
sector = le64_to_cpu(req->cmd->rw.slba);
sector <<= (req->ns->blksize_shift - 9);
- if (req->data_len <= NVMET_MAX_INLINE_DATA_LEN) {
+ if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
bio = &req->b.inline_bio;
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
} else {
@@ -180,8 +183,9 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
bio->bi_iter.bi_sector = sector;
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
- bio_set_op_attrs(bio, op, op_flags);
+ bio->bi_opf = op;
+ blk_start_plug(&plug);
for_each_sg(req->sg, sg, req->sg_cnt, i) {
while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
!= sg->length) {
@@ -190,7 +194,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
- bio_set_op_attrs(bio, op, op_flags);
+ bio->bi_opf = op;
bio_chain(bio, prev);
submit_bio(prev);
@@ -201,12 +205,16 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
}
submit_bio(bio);
+ blk_finish_plug(&plug);
}
static void nvmet_bdev_execute_flush(struct nvmet_req *req)
{
struct bio *bio = &req->b.inline_bio;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
bio_set_dev(bio, req->ns->bdev);
bio->bi_private = req;
@@ -261,12 +269,10 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req)
if (bio) {
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
- if (status) {
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
- } else {
+ if (status)
+ bio_io_error(bio);
+ else
submit_bio(bio);
- }
} else {
nvmet_req_complete(req, status);
}
@@ -274,6 +280,9 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req)
static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
{
+ if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
+ return;
+
switch (le32_to_cpu(req->cmd->dsm.attributes)) {
case NVME_DSMGMT_AD:
nvmet_bdev_execute_discard(req);
@@ -295,6 +304,9 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
sector_t nr_sector;
int ret;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
sector = le64_to_cpu(write_zeroes->slba) <<
(req->ns->blksize_shift - 9);
nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
@@ -319,20 +331,15 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
case nvme_cmd_read:
case nvme_cmd_write:
req->execute = nvmet_bdev_execute_rw;
- req->data_len = nvmet_rw_len(req);
return 0;
case nvme_cmd_flush:
req->execute = nvmet_bdev_execute_flush;
- req->data_len = 0;
return 0;
case nvme_cmd_dsm:
req->execute = nvmet_bdev_execute_dsm;
- req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
- sizeof(struct nvme_dsm_range);
return 0;
case nvme_cmd_write_zeroes:
req->execute = nvmet_bdev_execute_write_zeroes;
- req->data_len = 0;
return 0;
default:
pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 05453f5d1448..cd5670b83118 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -126,7 +126,7 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
mempool_free(req->f.bvec, req->ns->bvec_pool);
}
- if (unlikely(ret != req->data_len))
+ if (unlikely(ret != req->transfer_len))
status = errno_to_nvme_status(req, ret);
nvmet_req_complete(req, status);
}
@@ -146,7 +146,7 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
is_sync = true;
pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
- if (unlikely(pos + req->data_len > req->ns->size)) {
+ if (unlikely(pos + req->transfer_len > req->ns->size)) {
nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC));
return true;
}
@@ -173,7 +173,7 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
nr_bvec--;
}
- if (WARN_ON_ONCE(total_len != req->data_len)) {
+ if (WARN_ON_ONCE(total_len != req->transfer_len)) {
ret = -EIO;
goto complete;
}
@@ -232,6 +232,9 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
{
ssize_t nr_bvec = req->sg_cnt;
+ if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
+ return;
+
if (!req->sg_cnt || !nr_bvec) {
nvmet_req_complete(req, 0);
return;
@@ -273,6 +276,8 @@ static void nvmet_file_flush_work(struct work_struct *w)
static void nvmet_file_execute_flush(struct nvmet_req *req)
{
+ if (!nvmet_check_data_len(req, 0))
+ return;
INIT_WORK(&req->f.work, nvmet_file_flush_work);
schedule_work(&req->f.work);
}
@@ -331,6 +336,8 @@ static void nvmet_file_dsm_work(struct work_struct *w)
static void nvmet_file_execute_dsm(struct nvmet_req *req)
{
+ if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
+ return;
INIT_WORK(&req->f.work, nvmet_file_dsm_work);
schedule_work(&req->f.work);
}
@@ -359,6 +366,8 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w)
static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
{
+ if (!nvmet_check_data_len(req, 0))
+ return;
INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
schedule_work(&req->f.work);
}
@@ -371,20 +380,15 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
case nvme_cmd_read:
case nvme_cmd_write:
req->execute = nvmet_file_execute_rw;
- req->data_len = nvmet_rw_len(req);
return 0;
case nvme_cmd_flush:
req->execute = nvmet_file_execute_flush;
- req->data_len = 0;
return 0;
case nvme_cmd_dsm:
req->execute = nvmet_file_execute_dsm;
- req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
- sizeof(struct nvme_dsm_range);
return 0;
case nvme_cmd_write_zeroes:
req->execute = nvmet_file_execute_write_zeroes;
- req->data_len = 0;
return 0;
default:
pr_err("unhandled cmd for file ns %d on qid %d\n",
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 0940c5024a34..4df4ebde208a 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -76,8 +76,7 @@ static void nvme_loop_complete_rq(struct request *req)
{
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
- nvme_cleanup_cmd(req);
- sg_free_table_chained(&iod->sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&iod->sg_table, NVME_INLINE_SG_CNT);
nvme_complete_rq(req);
}
@@ -102,8 +101,8 @@ static void nvme_loop_queue_response(struct nvmet_req *req)
* aborts. We don't even bother to allocate a struct request
* for them but rather special case them here.
*/
- if (unlikely(nvme_loop_queue_idx(queue) == 0 &&
- cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) {
+ if (unlikely(nvme_is_aen_req(nvme_loop_queue_idx(queue),
+ cqe->command_id))) {
nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
&cqe->result);
} else {
@@ -126,7 +125,7 @@ static void nvme_loop_execute_work(struct work_struct *work)
struct nvme_loop_iod *iod =
container_of(work, struct nvme_loop_iod, work);
- nvmet_req_execute(&iod->req);
+ iod->req.execute(&iod->req);
}
static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -157,8 +156,10 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
iod->sg_table.sgl = iod->first_sgl;
if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req),
- iod->sg_table.sgl, SG_CHUNK_SIZE))
+ iod->sg_table.sgl, NVME_INLINE_SG_CNT)) {
+ nvme_cleanup_cmd(req);
return BLK_STS_RESOURCE;
+ }
iod->req.sg = iod->sg_table.sgl;
iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
@@ -253,6 +254,7 @@ static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags);
nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
blk_cleanup_queue(ctrl->ctrl.admin_q);
+ blk_cleanup_queue(ctrl->ctrl.fabrics_q);
blk_mq_free_tag_set(&ctrl->admin_tag_set);
}
@@ -340,7 +342,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
ctrl->admin_tag_set.driver_data = ctrl;
ctrl->admin_tag_set.nr_hw_queues = 1;
ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
@@ -357,10 +359,16 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
goto out_free_sq;
ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
+ ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set);
+ if (IS_ERR(ctrl->ctrl.fabrics_q)) {
+ error = PTR_ERR(ctrl->ctrl.fabrics_q);
+ goto out_free_tagset;
+ }
+
ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
if (IS_ERR(ctrl->ctrl.admin_q)) {
error = PTR_ERR(ctrl->ctrl.admin_q);
- goto out_free_tagset;
+ goto out_cleanup_fabrics_q;
}
error = nvmf_connect_admin_queue(&ctrl->ctrl);
@@ -369,23 +377,15 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags);
- error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
- if (error) {
- dev_err(ctrl->ctrl.device,
- "prop_get NVME_REG_CAP failed\n");
- goto out_cleanup_queue;
- }
-
- ctrl->ctrl.sqsize =
- min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
-
- error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
+ error = nvme_enable_ctrl(&ctrl->ctrl);
if (error)
goto out_cleanup_queue;
ctrl->ctrl.max_hw_sectors =
(NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9);
+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+
error = nvme_init_identify(&ctrl->ctrl);
if (error)
goto out_cleanup_queue;
@@ -394,6 +394,8 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
out_cleanup_queue:
blk_cleanup_queue(ctrl->ctrl.admin_q);
+out_cleanup_fabrics_q:
+ blk_cleanup_queue(ctrl->ctrl.fabrics_q);
out_free_tagset:
blk_mq_free_tag_set(&ctrl->admin_tag_set);
out_free_sq:
@@ -407,16 +409,17 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
nvme_stop_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_cancel_request, &ctrl->ctrl);
+ blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
nvme_loop_destroy_io_queues(ctrl);
}
+ blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
if (ctrl->ctrl.state == NVME_CTRL_LIVE)
nvme_shutdown_ctrl(&ctrl->ctrl);
- blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_cancel_request, &ctrl->ctrl);
- blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+ blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
nvme_loop_destroy_admin_queue(ctrl);
}
@@ -513,7 +516,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
ctrl->tag_set.numa_node = NUMA_NO_NODE;
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
ctrl->tag_set.driver_data = ctrl;
ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index c51f8dd01dc4..eda28b22a2c8 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -304,8 +304,6 @@ struct nvmet_req {
} f;
};
int sg_cnt;
- /* data length as parsed from the command: */
- size_t data_len;
/* data length as parsed from the SGL descriptor: */
size_t transfer_len;
@@ -375,7 +373,8 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
void nvmet_req_uninit(struct nvmet_req *req);
-void nvmet_req_execute(struct nvmet_req *req);
+bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len);
+bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len);
void nvmet_req_complete(struct nvmet_req *req, u16 status);
int nvmet_req_alloc_sgl(struct nvmet_req *req);
void nvmet_req_free_sgl(struct nvmet_req *req);
@@ -495,6 +494,12 @@ static inline u32 nvmet_rw_len(struct nvmet_req *req)
req->ns->blksize_shift;
}
+static inline u32 nvmet_dsm_len(struct nvmet_req *req)
+{
+ return (le32_to_cpu(req->cmd->dsm.nr) + 1) *
+ sizeof(struct nvme_dsm_range);
+}
+
u16 errno_to_nvme_status(struct nvmet_req *req, int errno);
/* Convert a 32-bit number to a 16-bit 0's based number */
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 36d906a7f70d..37d262a65877 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -603,7 +603,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
- nvmet_req_execute(&rsp->req);
+ rsp->req.execute(&rsp->req);
}
static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
@@ -672,13 +672,13 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
return 0;
ret = nvmet_req_alloc_sgl(&rsp->req);
- if (ret < 0)
+ if (unlikely(ret < 0))
goto error_out;
ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
nvmet_data_dir(&rsp->req));
- if (ret < 0)
+ if (unlikely(ret < 0))
goto error_out;
rsp->n_rdma += ret;
@@ -746,7 +746,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
queue->cm_id->port_num, &rsp->read_cqe, NULL))
nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR);
} else {
- nvmet_req_execute(&rsp->req);
+ rsp->req.execute(&rsp->req);
}
return true;
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 69b83fa0c76c..af674fc0bb1e 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -320,7 +320,7 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl;
u32 len = le32_to_cpu(sgl->length);
- if (!cmd->req.data_len)
+ if (!len)
return 0;
if (sgl->type == ((NVME_SGL_FMT_DATA_DESC << 4) |
@@ -813,13 +813,11 @@ free_crypto:
static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
struct nvmet_tcp_cmd *cmd, struct nvmet_req *req)
{
+ size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
int ret;
- /* recover the expected data transfer length */
- req->data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
-
if (!nvme_is_write(cmd->req.cmd) ||
- req->data_len > cmd->req.port->inline_data_size) {
+ data_len > cmd->req.port->inline_data_size) {
nvmet_prepare_receive_pdu(queue);
return;
}
@@ -932,7 +930,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
goto out;
}
- nvmet_req_execute(&queue->cmd->req);
+ queue->cmd->req.execute(&queue->cmd->req);
out:
nvmet_prepare_receive_pdu(queue);
return ret;
@@ -1052,7 +1050,7 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue)
nvmet_tcp_prep_recv_ddgst(cmd);
return 0;
}
- nvmet_req_execute(&cmd->req);
+ cmd->req.execute(&cmd->req);
}
nvmet_prepare_receive_pdu(queue);
@@ -1092,7 +1090,7 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue)
if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) &&
cmd->rbytes_done == cmd->req.transfer_len)
- nvmet_req_execute(&cmd->req);
+ cmd->req.execute(&cmd->req);
ret = 0;
out:
nvmet_prepare_receive_pdu(queue);
@@ -1306,6 +1304,7 @@ static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd)
{
nvmet_req_uninit(&cmd->req);
nvmet_tcp_unmap_pdu_iovec(cmd);
+ kfree(cmd->iov);
sgl_free(cmd->req.sg);
}
@@ -1410,6 +1409,7 @@ done:
static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
{
struct socket *sock = queue->sock;
+ struct inet_sock *inet = inet_sk(sock->sk);
struct linger sol = { .l_onoff = 1, .l_linger = 0 };
int ret;
@@ -1433,6 +1433,16 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
if (ret)
return ret;
+ /* Set socket type of service */
+ if (inet->rcv_tos > 0) {
+ int tos = inet->rcv_tos;
+
+ ret = kernel_setsockopt(sock, SOL_IP, IP_TOS,
+ (char *)&tos, sizeof(tos));
+ if (ret)
+ return ret;
+ }
+
write_lock_bh(&sock->sk->sk_callback_lock);
sock->sk->sk_user_data = queue;
queue->data_ready = sock->sk->sk_data_ready;
diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c
index 6af11d493271..1373a3c67962 100644
--- a/drivers/nvme/target/trace.c
+++ b/drivers/nvme/target/trace.c
@@ -33,6 +33,22 @@ static const char *nvmet_trace_admin_get_features(struct trace_seq *p,
return ret;
}
+static const char *nvmet_trace_get_lba_status(struct trace_seq *p,
+ u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u64 slba = get_unaligned_le64(cdw10);
+ u32 mndw = get_unaligned_le32(cdw10 + 8);
+ u16 rl = get_unaligned_le16(cdw10 + 12);
+ u8 atype = cdw10[15];
+
+ trace_seq_printf(p, "slba=0x%llx, mndw=0x%x, rl=0x%x, atype=%u",
+ slba, mndw, rl, atype);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
static const char *nvmet_trace_read_write(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
@@ -80,6 +96,8 @@ const char *nvmet_trace_parse_admin_cmd(struct trace_seq *p,
return nvmet_trace_admin_identify(p, cdw10);
case nvme_admin_get_features:
return nvmet_trace_admin_get_features(p, cdw10);
+ case nvme_admin_get_lba_status:
+ return nvmet_trace_get_lba_status(p, cdw10);
default:
return nvmet_trace_common(p, cdw10);
}
OpenPOWER on IntegriCloud