diff options
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/core.c | 42 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 83 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 4 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 139 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 108 | ||||
-rw-r--r-- | drivers/nvme/target/admin-cmd.c | 16 | ||||
-rw-r--r-- | drivers/nvme/target/configfs.c | 30 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 5 | ||||
-rw-r--r-- | drivers/nvme/target/fc.c | 20 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd.c | 2 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 47 | ||||
-rw-r--r-- | drivers/nvme/target/nvmet.h | 2 |
12 files changed, 283 insertions, 215 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d70df1d0072d..3b77cfe5aa1e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -131,7 +131,7 @@ void nvme_complete_rq(struct request *req) { if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) { nvme_req(req)->retries++; - blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q)); + blk_mq_requeue_request(req, true); return; } @@ -336,7 +336,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl, c.directive.opcode = nvme_admin_directive_recv; c.directive.nsid = cpu_to_le32(nsid); - c.directive.numd = sizeof(*s); + c.directive.numd = cpu_to_le32(sizeof(*s)); c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM; c.directive.dtype = NVME_DIR_STREAMS; @@ -2591,12 +2591,29 @@ static void nvme_release_instance(struct nvme_ctrl *ctrl) spin_unlock(&dev_list_lock); } -void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) +void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { + nvme_stop_keep_alive(ctrl); flush_work(&ctrl->async_event_work); flush_work(&ctrl->scan_work); - nvme_remove_namespaces(ctrl); +} +EXPORT_SYMBOL_GPL(nvme_stop_ctrl); +void nvme_start_ctrl(struct nvme_ctrl *ctrl) +{ + if (ctrl->kato) + nvme_start_keep_alive(ctrl); + + if (ctrl->queue_count > 1) { + nvme_queue_scan(ctrl); + nvme_queue_async_events(ctrl); + nvme_start_queues(ctrl); + } +} +EXPORT_SYMBOL_GPL(nvme_start_ctrl); + +void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) +{ device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance)); spin_lock(&dev_list_lock); @@ -2694,9 +2711,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) /* Forcibly unquiesce queues to avoid blocking dispatch */ blk_mq_unquiesce_queue(ctrl->admin_q); - /* Forcibly start all queues to avoid having stuck requests */ - blk_mq_start_hw_queues(ctrl->admin_q); - list_for_each_entry(ns, &ctrl->namespaces, list) { /* * Revalidating a dead namespace sets capacity to 0. This will @@ -2709,16 +2723,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) /* Forcibly unquiesce queues to avoid blocking dispatch */ blk_mq_unquiesce_queue(ns->queue); - - /* - * Forcibly start all queues to avoid having stuck requests. - * Note that we must ensure the queues are not stopped - * when the final removal happens. - */ - blk_mq_start_hw_queues(ns->queue); - - /* draining requests in requeue list */ - blk_mq_kick_requeue_list(ns->queue); } mutex_unlock(&ctrl->namespaces_mutex); } @@ -2787,10 +2791,8 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) struct nvme_ns *ns; mutex_lock(&ctrl->namespaces_mutex); - list_for_each_entry(ns, &ctrl->namespaces, list) { + list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_unquiesce_queue(ns->queue); - blk_mq_kick_requeue_list(ns->queue); - } mutex_unlock(&ctrl->namespaces_mutex); } EXPORT_SYMBOL_GPL(nvme_start_queues); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index ed87214fdc0e..d666ada39a9b 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -148,13 +148,10 @@ struct nvme_fc_ctrl { struct device *dev; struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; - u32 queue_count; u32 cnum; u64 association_id; - u64 cap; - struct list_head ctrl_list; /* rport->ctrl_list */ struct blk_mq_tag_set admin_tag_set; @@ -1614,7 +1611,7 @@ nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl) { int i; - for (i = 1; i < ctrl->queue_count; i++) + for (i = 1; i < ctrl->ctrl.queue_count; i++) nvme_fc_free_queue(&ctrl->queues[i]); } @@ -1635,10 +1632,10 @@ __nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl, static void nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl) { - struct nvme_fc_queue *queue = &ctrl->queues[ctrl->queue_count - 1]; + struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - 1]; int i; - for (i = ctrl->queue_count - 1; i >= 1; i--, queue--) + for (i = ctrl->ctrl.queue_count - 1; i >= 1; i--, queue--) __nvme_fc_delete_hw_queue(ctrl, queue, i); } @@ -1648,7 +1645,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) struct nvme_fc_queue *queue = &ctrl->queues[1]; int i, ret; - for (i = 1; i < ctrl->queue_count; i++, queue++) { + for (i = 1; i < ctrl->ctrl.queue_count; i++, queue++) { ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize); if (ret) goto delete_queues; @@ -1667,7 +1664,7 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) { int i, ret = 0; - for (i = 1; i < ctrl->queue_count; i++) { + for (i = 1; i < ctrl->ctrl.queue_count; i++) { ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize, (qsize / 5)); if (ret) @@ -1685,7 +1682,7 @@ nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl) { int i; - for (i = 1; i < ctrl->queue_count; i++) + for (i = 1; i < ctrl->ctrl.queue_count; i++) nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize); } @@ -1706,6 +1703,7 @@ nvme_fc_ctrl_free(struct kref *ref) list_del(&ctrl->ctrl_list); spin_unlock_irqrestore(&ctrl->rport->lock, flags); + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); blk_cleanup_queue(ctrl->ctrl.admin_q); blk_mq_free_tag_set(&ctrl->admin_tag_set); @@ -1969,10 +1967,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, if (ret != -EBUSY) return BLK_STS_IOERR; - if (op->rq) { - blk_mq_stop_hw_queues(op->rq->q); - blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY); - } + if (op->rq) + blk_mq_delay_run_hw_queue(queue->hctx, NVMEFC_QUEUE_DELAY); + return BLK_STS_RESOURCE; } @@ -2178,17 +2175,20 @@ static int nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + unsigned int nr_io_queues; int ret; - ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); + nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), + ctrl->lport->ops->max_hw_queues); + ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) { dev_info(ctrl->ctrl.device, "set_queue_count failed: %d\n", ret); return ret; } - ctrl->queue_count = opts->nr_io_queues + 1; - if (!opts->nr_io_queues) + ctrl->ctrl.queue_count = nr_io_queues + 1; + if (!nr_io_queues) return 0; nvme_fc_init_io_queues(ctrl); @@ -2204,7 +2204,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) sizeof(struct scatterlist)) + ctrl->lport->ops->fcprqst_priv_sz; ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; + ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; ctrl->tag_set.timeout = NVME_IO_TIMEOUT; ret = blk_mq_alloc_tag_set(&ctrl->tag_set); @@ -2232,7 +2232,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) out_delete_hw_queues: nvme_fc_delete_hw_io_queues(ctrl); out_cleanup_blk_queue: - nvme_stop_keep_alive(&ctrl->ctrl); blk_cleanup_queue(ctrl->ctrl.connect_q); out_free_tag_set: blk_mq_free_tag_set(&ctrl->tag_set); @@ -2248,17 +2247,21 @@ static int nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + unsigned int nr_io_queues; int ret; - ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); + nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), + ctrl->lport->ops->max_hw_queues); + ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) { dev_info(ctrl->ctrl.device, "set_queue_count failed: %d\n", ret); return ret; } + ctrl->ctrl.queue_count = nr_io_queues + 1; /* check for io queues existing */ - if (ctrl->queue_count == 1) + if (ctrl->ctrl.queue_count == 1) return 0; nvme_fc_init_io_queues(ctrl); @@ -2275,6 +2278,8 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) if (ret) goto out_delete_hw_queues; + blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); + return 0; out_delete_hw_queues: @@ -2316,7 +2321,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) goto out_delete_hw_queue; if (ctrl->ctrl.state != NVME_CTRL_NEW) - blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true); + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); ret = nvmf_connect_admin_queue(&ctrl->ctrl); if (ret) @@ -2329,7 +2334,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) * prior connection values */ - ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); + ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); if (ret) { dev_err(ctrl->ctrl.device, "prop_get NVME_REG_CAP failed\n"); @@ -2337,9 +2342,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) } ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); + min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize); - ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); + ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); if (ret) goto out_disconnect_admin_queue; @@ -2360,8 +2365,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) goto out_disconnect_admin_queue; } - nvme_start_keep_alive(&ctrl->ctrl); - /* FC-NVME supports normal SGL Data Block Descriptors */ if (opts->queue_size > ctrl->ctrl.maxcmd) { @@ -2381,7 +2384,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) * Create the io queues */ - if (ctrl->queue_count > 1) { + if (ctrl->ctrl.queue_count > 1) { if (ctrl->ctrl.state == NVME_CTRL_NEW) ret = nvme_fc_create_io_queues(ctrl); else @@ -2395,17 +2398,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ctrl->ctrl.nr_reconnects = 0; - if (ctrl->queue_count > 1) { - nvme_start_queues(&ctrl->ctrl); - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); - } + nvme_start_ctrl(&ctrl->ctrl); return 0; /* Success */ out_term_aen_ops: nvme_fc_term_aen_ops(ctrl); - nvme_stop_keep_alive(&ctrl->ctrl); out_disconnect_admin_queue: /* send a Disconnect(association) LS to fc-nvme target */ nvme_fc_xmt_disconnect_assoc(ctrl); @@ -2428,8 +2426,6 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) { unsigned long flags; - nvme_stop_keep_alive(&ctrl->ctrl); - spin_lock_irqsave(&ctrl->lock, flags); ctrl->flags |= FCCTRL_TERMIO; ctrl->iocnt = 0; @@ -2447,7 +2443,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) * io requests back to the block layer as part of normal completions * (but with error status). */ - if (ctrl->queue_count > 1) { + if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); @@ -2470,7 +2466,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) * use blk_mq_tagset_busy_itr() and the transport routine to * terminate the exchanges. */ - blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); @@ -2511,7 +2507,8 @@ nvme_fc_delete_ctrl_work(struct work_struct *work) cancel_work_sync(&ctrl->ctrl.reset_work); cancel_delayed_work_sync(&ctrl->connect_work); - + nvme_stop_ctrl(&ctrl->ctrl); + nvme_remove_namespaces(&ctrl->ctrl); /* * kill the association on the link side. this will block * waiting for io to terminate @@ -2606,6 +2603,7 @@ nvme_fc_reset_ctrl_work(struct work_struct *work) container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); int ret; + nvme_stop_ctrl(&ctrl->ctrl); /* will block will waiting for io to terminate */ nvme_fc_delete_association(ctrl); @@ -2702,18 +2700,17 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, spin_lock_init(&ctrl->lock); /* io queue count */ - ctrl->queue_count = min_t(unsigned int, + ctrl->ctrl.queue_count = min_t(unsigned int, opts->nr_io_queues, lport->ops->max_hw_queues); - opts->nr_io_queues = ctrl->queue_count; /* so opts has valid value */ - ctrl->queue_count++; /* +1 for admin queue */ + ctrl->ctrl.queue_count++; /* +1 for admin queue */ ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; ret = -ENOMEM; - ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue), - GFP_KERNEL); + ctrl->queues = kcalloc(ctrl->ctrl.queue_count, + sizeof(struct nvme_fc_queue), GFP_KERNEL); if (!ctrl->queues) goto out_free_ida; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index d70ff0fdd36b..8f2a168ddc01 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -142,7 +142,9 @@ struct nvme_ctrl { u16 cntlid; u32 ctrl_config; + u32 queue_count; + u64 cap; u32 page_size; u32 max_hw_sectors; u16 oncs; @@ -278,6 +280,8 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, const struct nvme_ctrl_ops *ops, unsigned long quirks); void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); +void nvme_start_ctrl(struct nvme_ctrl *ctrl); +void nvme_stop_ctrl(struct nvme_ctrl *ctrl); void nvme_put_ctrl(struct nvme_ctrl *ctrl); int nvme_init_identify(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b7a84c523475..8569ee771269 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -35,7 +35,6 @@ #include "nvme.h" -#define NVME_Q_DEPTH 1024 #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) @@ -57,6 +56,16 @@ module_param(max_host_mem_size_mb, uint, 0444); MODULE_PARM_DESC(max_host_mem_size_mb, "Maximum Host Memory Buffer (HMB) size per controller (in MiB)"); +static int io_queue_depth_set(const char *val, const struct kernel_param *kp); +static const struct kernel_param_ops io_queue_depth_ops = { + .set = io_queue_depth_set, + .get = param_get_int, +}; + +static int io_queue_depth = 1024; +module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); +MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); + struct nvme_dev; struct nvme_queue; @@ -74,7 +83,6 @@ struct nvme_dev { struct device *dev; struct dma_pool *prp_page_pool; struct dma_pool *prp_small_pool; - unsigned queue_count; unsigned online_queues; unsigned max_qid; int q_depth; @@ -105,6 +113,17 @@ struct nvme_dev { void **host_mem_desc_bufs; }; +static int io_queue_depth_set(const char *val, const struct kernel_param *kp) +{ + int n = 0, ret; + + ret = kstrtoint(val, 10, &n); + if (ret != 0 || n < 2) + return -EINVAL; + + return param_set_int(val, kp); +} + static inline unsigned int sq_idx(unsigned int qid, u32 stride) { return qid * 2 * stride; @@ -520,7 +539,7 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi) } #endif -static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req) +static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct dma_pool *pool; @@ -537,7 +556,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req) length -= (page_size - offset); if (length <= 0) - return true; + return BLK_STS_OK; dma_len -= (page_size - offset); if (dma_len) { @@ -550,7 +569,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req) if (length <= page_size) { iod->first_dma = dma_addr; - return true; + return BLK_STS_OK; } nprps = DIV_ROUND_UP(length, page_size); @@ -566,7 +585,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req) if (!prp_list) { iod->first_dma = dma_addr; iod->npages = -1; - return false; + return BLK_STS_RESOURCE; } list[0] = prp_list; iod->first_dma = prp_dma; @@ -576,7 +595,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req) __le64 *old_prp_list = prp_list; prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); if (!prp_list) - return false; + return BLK_STS_RESOURCE; list[iod->npages++] = prp_list; prp_list[0] = old_prp_list[i - 1]; old_prp_list[i - 1] = cpu_to_le64(prp_dma); @@ -590,13 +609,29 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req) break; if (dma_len > 0) continue; - BUG_ON(dma_len < 0); + if (unlikely(dma_len < 0)) + goto bad_sgl; sg = sg_next(sg); dma_addr = sg_dma_address(sg); dma_len = sg_dma_len(sg); } - return true; + return BLK_STS_OK; + + bad_sgl: + if (WARN_ONCE(1, "Invalid SGL for payload:%d nents:%d\n", + blk_rq_payload_bytes(req), iod->nents)) { + for_each_sg(iod->sg, sg, iod->nents, i) { + dma_addr_t phys = sg_phys(sg); + pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d " + "dma_address:%pad dma_length:%d\n", i, &phys, + sg->offset, sg->length, + &sg_dma_address(sg), + sg_dma_len(sg)); + } + } + return BLK_STS_IOERR; + } static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, @@ -618,7 +653,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, DMA_ATTR_NO_WARN)) goto out; - if (!nvme_setup_prps(dev, req)) + ret = nvme_setup_prps(dev, req); + if (ret != BLK_STS_OK) goto out_unmap; ret = BLK_STS_IOERR; @@ -1099,9 +1135,9 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) { int i; - for (i = dev->queue_count - 1; i >= lowest; i--) { + for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) { struct nvme_queue *nvmeq = dev->queues[i]; - dev->queue_count--; + dev->ctrl.queue_count--; dev->queues[i] = NULL; nvme_free_queue(nvmeq); } @@ -1126,7 +1162,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) spin_unlock_irq(&nvmeq->q_lock); if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) - blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q); + blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q); pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq); @@ -1145,8 +1181,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) if (shutdown) nvme_shutdown_ctrl(&dev->ctrl); else - nvme_disable_ctrl(&dev->ctrl, lo_hi_readq( - dev->bar + NVME_REG_CAP)); + nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap); spin_lock_irq(&nvmeq->q_lock); nvme_process_cq(nvmeq); @@ -1221,7 +1256,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->qid = qid; nvmeq->cq_vector = -1; dev->queues[qid] = nvmeq; - dev->queue_count++; + dev->ctrl.queue_count++; return nvmeq; @@ -1317,7 +1352,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev) * user requests may be waiting on a stopped queue. Start the * queue to flush these to completion. */ - blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true); + blk_mq_unquiesce_queue(dev->ctrl.admin_q); blk_cleanup_queue(dev->ctrl.admin_q); blk_mq_free_tag_set(&dev->admin_tagset); } @@ -1354,7 +1389,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) return -ENODEV; } } else - blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true); + blk_mq_unquiesce_queue(dev->ctrl.admin_q); return 0; } @@ -1385,11 +1420,10 @@ static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size) return 0; } -static int nvme_configure_admin_queue(struct nvme_dev *dev) +static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) { int result; u32 aqa; - u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP); struct nvme_queue *nvmeq; result = nvme_remap_bar(dev, db_bar_size(dev, 0)); @@ -1397,13 +1431,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) return result; dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ? - NVME_CAP_NSSRC(cap) : 0; + NVME_CAP_NSSRC(dev->ctrl.cap) : 0; if (dev->subsystem && (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO)) writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS); - result = nvme_disable_ctrl(&dev->ctrl, cap); + result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap); if (result < 0) return result; @@ -1422,7 +1456,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ); lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ); - result = nvme_enable_ctrl(&dev->ctrl, cap); + result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap); if (result) return result; @@ -1441,7 +1475,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev) unsigned i, max; int ret = 0; - for (i = dev->queue_count; i <= dev->max_qid; i++) { + for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) { /* vector == qid - 1, match nvme_create_queue */ if (!nvme_alloc_queue(dev, i, dev->q_depth, pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) { @@ -1450,7 +1484,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev) } } - max = min(dev->max_qid, dev->queue_count - 1); + max = min(dev->max_qid, dev->ctrl.queue_count - 1); for (i = dev->online_queues; i <= max; i++) { ret = nvme_create_queue(dev->queues[i], i); if (ret) @@ -1585,9 +1619,10 @@ static void nvme_free_host_mem(struct nvme_dev *dev) static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) { struct nvme_host_mem_buf_desc *descs; - u32 chunk_size, max_entries, i = 0; + u32 chunk_size, max_entries; + int i = 0; void **bufs; - u64 size, tmp; + u64 size = 0, tmp; /* start big and work our way down */ chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER); @@ -1866,7 +1901,6 @@ static int nvme_dev_add(struct nvme_dev *dev) static int nvme_pci_enable(struct nvme_dev *dev) { - u64 cap; int result = -ENOMEM; struct pci_dev *pdev = to_pci_dev(dev->dev); @@ -1893,10 +1927,11 @@ static int nvme_pci_enable(struct nvme_dev *dev) if (result < 0) return result; - cap = lo_hi_readq(dev->bar + NVME_REG_CAP); + dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP); - dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH); - dev->db_stride = 1 << NVME_CAP_STRIDE(cap); + dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1, + io_queue_depth); + dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap); dev->dbs = dev->bar + 4096; /* @@ -1908,6 +1943,12 @@ static int nvme_pci_enable(struct nvme_dev *dev) dev_warn(dev->ctrl.device, "detected Apple NVMe controller, " "set queue depth=%u to work around controller resets\n", dev->q_depth); + } else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG && + (pdev->device == 0xa821 || pdev->device == 0xa822) && + NVME_CAP_MQES(dev->ctrl.cap) == 0) { + dev->q_depth = 64; + dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, " + "set queue depth=%u\n", dev->q_depth); } /* @@ -1996,7 +2037,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_stop_queues(&dev->ctrl); queues = dev->online_queues - 1; - for (i = dev->queue_count - 1; i > 0; i--) + for (i = dev->ctrl.queue_count - 1; i > 0; i--) nvme_suspend_queue(dev->queues[i]); if (dead) { @@ -2004,7 +2045,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) * probe, before the admin queue is configured. Thus, * queue_count can be 0 here. */ - if (dev->queue_count) + if (dev->ctrl.queue_count) nvme_suspend_queue(dev->queues[0]); } else { nvme_disable_io_queues(dev, queues); @@ -2094,7 +2135,7 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out; - result = nvme_configure_admin_queue(dev); + result = nvme_pci_configure_admin_queue(dev); if (result) goto out; @@ -2133,15 +2174,6 @@ static void nvme_reset_work(struct work_struct *work) goto out; /* - * A controller that can not execute IO typically requires user - * intervention to correct. For such degraded controllers, the driver - * should not submit commands the user did not request, so skip - * registering for asynchronous event notification on this condition. - */ - if (dev->online_queues > 1) - nvme_queue_async_events(&dev->ctrl); - - /* * Keep the controller around but remove all namespaces if we don't have * any working I/O queue. */ @@ -2161,8 +2193,7 @@ static void nvme_reset_work(struct work_struct *work) goto out; } - if (dev->online_queues > 1) - nvme_queue_scan(&dev->ctrl); + nvme_start_ctrl(&dev->ctrl); return; out: @@ -2268,7 +2299,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) result = nvme_dev_map(dev); if (result) - goto free; + goto put_pci; INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work); INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); @@ -2277,7 +2308,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) result = nvme_setup_prp_pools(dev); if (result) - goto put_pci; + goto unmap; quirks |= check_dell_samsung_bug(pdev); @@ -2294,9 +2325,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) release_pools: nvme_release_prp_pools(dev); + unmap: + nvme_dev_unmap(dev); put_pci: put_device(dev->dev); - nvme_dev_unmap(dev); free: kfree(dev->queues); kfree(dev); @@ -2341,11 +2373,13 @@ static void nvme_remove(struct pci_dev *pdev) } flush_work(&dev->ctrl.reset_work); - nvme_uninit_ctrl(&dev->ctrl); + nvme_stop_ctrl(&dev->ctrl); + nvme_remove_namespaces(&dev->ctrl); nvme_dev_disable(dev, true); nvme_free_host_mem(dev); nvme_dev_remove_admin(dev); nvme_free_queues(dev, 0); + nvme_uninit_ctrl(&dev->ctrl); nvme_release_prp_pools(dev); nvme_dev_unmap(dev); nvme_put_ctrl(&dev->ctrl); @@ -2450,6 +2484,9 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0x0a54), .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES, }, + { PCI_VDEVICE(INTEL, 0x0a55), + .driver_data = NVME_QUIRK_STRIPE_SIZE | + NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ @@ -2458,6 +2495,10 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 6d4119dfbdaa..da04df1af231 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -86,7 +86,7 @@ enum nvme_rdma_queue_flags { struct nvme_rdma_queue { struct nvme_rdma_qe *rsp_ring; - u8 sig_count; + atomic_t sig_count; int queue_size; size_t cmnd_capsule_len; struct nvme_rdma_ctrl *ctrl; @@ -103,7 +103,6 @@ struct nvme_rdma_queue { struct nvme_rdma_ctrl { /* read only in the hot path */ struct nvme_rdma_queue *queues; - u32 queue_count; /* other member variables */ struct blk_mq_tag_set tag_set; @@ -119,7 +118,6 @@ struct nvme_rdma_ctrl { struct blk_mq_tag_set admin_tag_set; struct nvme_rdma_device *device; - u64 cap; u32 max_fr_pages; struct sockaddr_storage addr; @@ -274,9 +272,6 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); int ret = 0; - if (!req->mr->need_inval) - goto out; - ib_dereg_mr(req->mr); req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG, @@ -349,7 +344,7 @@ static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, struct nvme_rdma_ctrl *ctrl = data; struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1]; - BUG_ON(hctx_idx >= ctrl->queue_count); + BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); hctx->driver_data = queue; return 0; @@ -525,6 +520,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, queue->cmnd_capsule_len = sizeof(struct nvme_command); queue->queue_size = queue_size; + atomic_set(&queue->sig_count, 0); queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue, RDMA_PS_TCP, IB_QPT_RC); @@ -587,7 +583,7 @@ static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl) { int i; - for (i = 1; i < ctrl->queue_count; i++) + for (i = 1; i < ctrl->ctrl.queue_count; i++) nvme_rdma_stop_and_free_queue(&ctrl->queues[i]); } @@ -595,7 +591,7 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl) { int i, ret = 0; - for (i = 1; i < ctrl->queue_count; i++) { + for (i = 1; i < ctrl->ctrl.queue_count; i++) { ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) { dev_info(ctrl->ctrl.device, @@ -623,14 +619,14 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) if (ret) return ret; - ctrl->queue_count = nr_io_queues + 1; - if (ctrl->queue_count < 2) + ctrl->ctrl.queue_count = nr_io_queues + 1; + if (ctrl->ctrl.queue_count < 2) return 0; dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues); - for (i = 1; i < ctrl->queue_count; i++) { + for (i = 1; i < ctrl->ctrl.queue_count; i++) { ret = nvme_rdma_init_queue(ctrl, i, ctrl->ctrl.opts->queue_size); if (ret) { @@ -705,7 +701,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ++ctrl->ctrl.nr_reconnects; - if (ctrl->queue_count > 1) { + if (ctrl->ctrl.queue_count > 1) { nvme_rdma_free_io_queues(ctrl); ret = blk_mq_reinit_tagset(&ctrl->tag_set); @@ -729,13 +725,11 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); - ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); + ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); if (ret) goto requeue; - nvme_start_keep_alive(&ctrl->ctrl); - - if (ctrl->queue_count > 1) { + if (ctrl->ctrl.queue_count > 1) { ret = nvme_rdma_init_io_queues(ctrl); if (ret) goto requeue; @@ -743,16 +737,16 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ret = nvme_rdma_connect_io_queues(ctrl); if (ret) goto requeue; + + blk_mq_update_nr_hw_queues(&ctrl->tag_set, + ctrl->ctrl.queue_count - 1); } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); ctrl->ctrl.nr_reconnects = 0; - if (ctrl->queue_count > 1) { - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); - } + nvme_start_ctrl(&ctrl->ctrl); dev_info(ctrl->ctrl.device, "Successfully reconnected\n"); @@ -770,17 +764,17 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) struct nvme_rdma_ctrl, err_work); int i; - nvme_stop_keep_alive(&ctrl->ctrl); + nvme_stop_ctrl(&ctrl->ctrl); - for (i = 0; i < ctrl->queue_count; i++) + for (i = 0; i < ctrl->ctrl.queue_count; i++) clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); - if (ctrl->queue_count > 1) + if (ctrl->ctrl.queue_count > 1) nvme_stop_queues(&ctrl->ctrl); - blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); /* We must take care of fastfail/requeue all our inflight requests */ - if (ctrl->queue_count > 1) + if (ctrl->ctrl.queue_count > 1) blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, @@ -790,7 +784,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) * queues are not a live anymore, so restart the queues to fail fast * new IO */ - blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true); + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_start_queues(&ctrl->ctrl); nvme_rdma_reconnect_or_remove(ctrl); @@ -1008,17 +1002,16 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) nvme_rdma_wr_error(cq, wc, "SEND"); } -static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue) +/* + * We want to signal completion at least every queue depth/2. This returns the + * largest power of two that is not above half of (queue size + 1) to optimize + * (avoid divisions). + */ +static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue) { - int sig_limit; + int limit = 1 << ilog2((queue->queue_size + 1) / 2); - /* - * We signal completion every queue depth/2 and also handle the - * degenerated case of a device with queue_depth=1, where we - * would need to signal every message. - */ - sig_limit = max(queue->queue_size / 2, 1); - return (++queue->sig_count % sig_limit) == 0; + return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0; } static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, @@ -1574,7 +1567,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); - error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); + error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, + &ctrl->ctrl.cap); if (error) { dev_err(ctrl->ctrl.device, "prop_get NVME_REG_CAP failed\n"); @@ -1582,9 +1576,9 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) } ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize); + min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); - error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); + error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); if (error) goto out_cleanup_queue; @@ -1601,8 +1595,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) if (error) goto out_cleanup_queue; - nvme_start_keep_alive(&ctrl->ctrl); - return 0; out_cleanup_queue: @@ -1620,11 +1612,10 @@ out_free_queue: static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) { - nvme_stop_keep_alive(&ctrl->ctrl); cancel_work_sync(&ctrl->err_work); cancel_delayed_work_sync(&ctrl->reconnect_work); - if (ctrl->queue_count > 1) { + if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); @@ -1634,18 +1625,21 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags)) nvme_shutdown_ctrl(&ctrl->ctrl); - blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_rdma_destroy_admin_queue(ctrl); } static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { - nvme_uninit_ctrl(&ctrl->ctrl); + nvme_stop_ctrl(&ctrl->ctrl); + nvme_remove_namespaces(&ctrl->ctrl); if (shutdown) nvme_rdma_shutdown_ctrl(ctrl); + nvme_uninit_ctrl(&ctrl->ctrl); if (ctrl->ctrl.tagset) { blk_cleanup_queue(ctrl->ctrl.connect_q); blk_mq_free_tag_set(&ctrl->tag_set); @@ -1707,6 +1701,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) int ret; bool changed; + nvme_stop_ctrl(&ctrl->ctrl); nvme_rdma_shutdown_ctrl(ctrl); ret = nvme_rdma_configure_admin_queue(ctrl); @@ -1716,7 +1711,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) goto del_dead_ctrl; } - if (ctrl->queue_count > 1) { + if (ctrl->ctrl.queue_count > 1) { ret = blk_mq_reinit_tagset(&ctrl->tag_set); if (ret) goto del_dead_ctrl; @@ -1728,16 +1723,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) ret = nvme_rdma_connect_io_queues(ctrl); if (ret) goto del_dead_ctrl; + + blk_mq_update_nr_hw_queues(&ctrl->tag_set, + ctrl->ctrl.queue_count - 1); } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); - if (ctrl->queue_count > 1) { - nvme_start_queues(&ctrl->ctrl); - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); - } + nvme_start_ctrl(&ctrl->ctrl); return; @@ -1785,7 +1779,7 @@ static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl) ctrl->tag_set.cmd_size = sizeof(struct nvme_rdma_request) + SG_CHUNK_SIZE * sizeof(struct scatterlist); ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; + ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; ctrl->tag_set.timeout = NVME_IO_TIMEOUT; ret = blk_mq_alloc_tag_set(&ctrl->tag_set); @@ -1863,12 +1857,12 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work); - ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */ + ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */ ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; ret = -ENOMEM; - ctrl->queues = kcalloc(ctrl->queue_count, sizeof(*ctrl->queues), + ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues), GFP_KERNEL); if (!ctrl->queues) goto out_uninit_ctrl; @@ -1925,15 +1919,11 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); mutex_unlock(&nvme_rdma_ctrl_mutex); - if (opts->nr_io_queues) { - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); - } + nvme_start_ctrl(&ctrl->ctrl); return &ctrl->ctrl; out_remove_admin_queue: - nvme_stop_keep_alive(&ctrl->ctrl); nvme_rdma_destroy_admin_queue(ctrl); out_kfree_queues: kfree(ctrl->queues); diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 35f930db3c02..2d7a98ab53fb 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -168,11 +168,21 @@ out: nvmet_req_complete(req, status); } +static void copy_and_pad(char *dst, int dst_len, const char *src, int src_len) +{ + int len = min(src_len, dst_len); + + memcpy(dst, src, len); + if (dst_len > len) + memset(dst + len, ' ', dst_len - len); +} + static void nvmet_execute_identify_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvme_id_ctrl *id; u16 status = 0; + const char model[] = "Linux"; id = kzalloc(sizeof(*id), GFP_KERNEL); if (!id) { @@ -184,8 +194,10 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->vid = 0; id->ssvid = 0; - memset(id->sn, ' ', sizeof(id->sn)); - snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->serial); + bin2hex(id->sn, &ctrl->subsys->serial, + min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2)); + copy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1); + copy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE)); memset(id->mn, ' ', sizeof(id->mn)); strncpy((char *)id->mn, "Linux", sizeof(id->mn)); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index a358ecd93e11..0a0067e771f5 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -650,7 +650,7 @@ out_unlock: CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host); -static ssize_t nvmet_subsys_version_show(struct config_item *item, +static ssize_t nvmet_subsys_attr_version_show(struct config_item *item, char *page) { struct nvmet_subsys *subsys = to_subsys(item); @@ -666,7 +666,7 @@ static ssize_t nvmet_subsys_version_show(struct config_item *item, (int)NVME_MINOR(subsys->ver)); } -static ssize_t nvmet_subsys_version_store(struct config_item *item, +static ssize_t nvmet_subsys_attr_version_store(struct config_item *item, const char *page, size_t count) { struct nvmet_subsys *subsys = to_subsys(item); @@ -684,11 +684,33 @@ static ssize_t nvmet_subsys_version_store(struct config_item *item, return count; } -CONFIGFS_ATTR(nvmet_subsys_, version); +CONFIGFS_ATTR(nvmet_subsys_, attr_version); + +static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item, + char *page) +{ + struct nvmet_subsys *subsys = to_subsys(item); + + return snprintf(page, PAGE_SIZE, "%llx\n", subsys->serial); +} + +static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item); + + down_write(&nvmet_config_sem); + sscanf(page, "%llx\n", &subsys->serial); + up_write(&nvmet_config_sem); + + return count; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_serial); static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_allow_any_host, - &nvmet_subsys_attr_version, + &nvmet_subsys_attr_attr_version, + &nvmet_subsys_attr_attr_serial, NULL, }; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b5b4ac103748..f4b02bb4a1a8 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -767,9 +767,6 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); - /* generate a random serial number as our controllers are ephemeral: */ - get_random_bytes(&ctrl->serial, sizeof(ctrl->serial)); - kref_init(&ctrl->ref); ctrl->subsys = subsys; @@ -928,6 +925,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, return NULL; subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */ + /* generate a random serial number as our controllers are ephemeral: */ + get_random_bytes(&subsys->serial, sizeof(subsys->serial)); switch (type) { case NVME_NQN_NVME: diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 7692a96c9065..d5801c150b1c 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1164,18 +1164,24 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, memset(acc, 0, sizeof(*acc)); - if (iod->rqstdatalen < sizeof(struct fcnvme_ls_cr_assoc_rqst)) + /* + * FC-NVME spec changes. There are initiators sending different + * lengths as padding sizes for Create Association Cmd descriptor + * was incorrect. + * Accept anything of "minimum" length. Assume format per 1.15 + * spec (with HOSTID reduced to 16 bytes), ignore how long the + * trailing pad length is. + */ + if (iod->rqstdatalen < FCNVME_LSDESC_CRA_RQST_MINLEN) ret = VERR_CR_ASSOC_LEN; - else if (rqst->desc_list_len != - fcnvme_lsdesc_len( - sizeof(struct fcnvme_ls_cr_assoc_rqst))) + else if (be32_to_cpu(rqst->desc_list_len) < + FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN) ret = VERR_CR_ASSOC_RQST_LEN; else if (rqst->assoc_cmd.desc_tag != cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD)) ret = VERR_CR_ASSOC_CMD; - else if (rqst->assoc_cmd.desc_len != - fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_cr_assoc_cmd))) + else if (be32_to_cpu(rqst->assoc_cmd.desc_len) < + FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN) ret = VERR_CR_ASSOC_CMD_LEN; else if (!rqst->assoc_cmd.ersp_ratio || (be16_to_cpu(rqst->assoc_cmd.ersp_ratio) >= diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 40128793e613..3b4d47a6abdb 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -85,7 +85,7 @@ static void nvmet_execute_rw(struct nvmet_req *req) bio_set_op_attrs(bio, op, op_flags); bio_chain(bio, prev); - cookie = submit_bio(prev); + submit_bio(prev); } sector += sg->length >> 9; diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 5f55c683b338..717ed7ddb2f6 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -44,12 +44,10 @@ struct nvme_loop_iod { struct nvme_loop_ctrl { struct nvme_loop_queue *queues; - u32 queue_count; struct blk_mq_tag_set admin_tag_set; struct list_head list; - u64 cap; struct blk_mq_tag_set tag_set; struct nvme_loop_iod async_event_iod; struct nvme_ctrl ctrl; @@ -241,7 +239,7 @@ static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, struct nvme_loop_ctrl *ctrl = data; struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1]; - BUG_ON(hctx_idx >= ctrl->queue_count); + BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); hctx->driver_data = queue; return 0; @@ -307,7 +305,7 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl) { int i; - for (i = 1; i < ctrl->queue_count; i++) + for (i = 1; i < ctrl->ctrl.queue_count; i++) nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); } @@ -330,7 +328,7 @@ static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) if (ret) goto out_destroy_queues; - ctrl->queue_count++; + ctrl->ctrl.queue_count++; } return 0; @@ -344,7 +342,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl) { int i, ret; - for (i = 1; i < ctrl->queue_count; i++) { + for (i = 1; i < ctrl->ctrl.queue_count; i++) { ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) return ret; @@ -372,7 +370,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) error = nvmet_sq_init(&ctrl->queues[0].nvme_sq); if (error) return error; - ctrl->queue_count = 1; + ctrl->ctrl.queue_count = 1; error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); if (error) @@ -388,7 +386,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) if (error) goto out_cleanup_queue; - error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); + error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); if (error) { dev_err(ctrl->ctrl.device, "prop_get NVME_REG_CAP failed\n"); @@ -396,9 +394,9 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) } ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize); + min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); - error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); + error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); if (error) goto out_cleanup_queue; @@ -409,8 +407,6 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) if (error) goto out_cleanup_queue; - nvme_start_keep_alive(&ctrl->ctrl); - return 0; out_cleanup_queue: @@ -424,9 +420,7 @@ out_free_sq: static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) { - nvme_stop_keep_alive(&ctrl->ctrl); - - if (ctrl->queue_count > 1) { + if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); @@ -436,9 +430,10 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) if (ctrl->ctrl.state == NVME_CTRL_LIVE) nvme_shutdown_ctrl(&ctrl->ctrl); - blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_loop_destroy_admin_queue(ctrl); } @@ -447,8 +442,10 @@ static void nvme_loop_del_ctrl_work(struct work_struct *work) struct nvme_loop_ctrl *ctrl = container_of(work, struct nvme_loop_ctrl, delete_work); - nvme_uninit_ctrl(&ctrl->ctrl); + nvme_stop_ctrl(&ctrl->ctrl); + nvme_remove_namespaces(&ctrl->ctrl); nvme_loop_shutdown_ctrl(ctrl); + nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); } @@ -496,6 +493,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) bool changed; int ret; + nvme_stop_ctrl(&ctrl->ctrl); nvme_loop_shutdown_ctrl(ctrl); ret = nvme_loop_configure_admin_queue(ctrl); @@ -510,13 +508,13 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) if (ret) goto out_destroy_io; + blk_mq_update_nr_hw_queues(&ctrl->tag_set, + ctrl->ctrl.queue_count - 1); + changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); - - nvme_start_queues(&ctrl->ctrl); + nvme_start_ctrl(&ctrl->ctrl); return; @@ -559,7 +557,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) + SG_CHUNK_SIZE * sizeof(struct scatterlist); ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; + ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; ctrl->tag_set.timeout = NVME_IO_TIMEOUT; ctrl->ctrl.tagset = &ctrl->tag_set; @@ -651,10 +649,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, list_add_tail(&ctrl->list, &nvme_loop_ctrl_list); mutex_unlock(&nvme_loop_ctrl_mutex); - if (opts->nr_io_queues) { - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); - } + nvme_start_ctrl(&ctrl->ctrl); return &ctrl->ctrl; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 747bbdb4f9c6..e3b244c7e443 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -112,7 +112,6 @@ struct nvmet_ctrl { struct mutex lock; u64 cap; - u64 serial; u32 cc; u32 csts; @@ -152,6 +151,7 @@ struct nvmet_subsys { u16 max_qid; u64 ver; + u64 serial; char *subsysnqn; struct config_group group; |