diff options
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/core.c | 46 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 3 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 4 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 15 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 45 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 53 | ||||
-rw-r--r-- | drivers/nvme/target/admin-cmd.c | 6 | ||||
-rw-r--r-- | drivers/nvme/target/fc.c | 217 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd.c | 6 | ||||
-rw-r--r-- | drivers/nvme/target/rdma.c | 5 |
10 files changed, 276 insertions, 124 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c49f1f8b2e57..c596dd3c58b1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -336,7 +336,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl, c.directive.opcode = nvme_admin_directive_recv; c.directive.nsid = cpu_to_le32(nsid); - c.directive.numd = cpu_to_le32(sizeof(*s)); + c.directive.numd = cpu_to_le32((sizeof(*s) >> 2) - 1); c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM; c.directive.dtype = NVME_DIR_STREAMS; @@ -613,11 +613,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, if (!disk) goto submit; - bio->bi_bdev = bdget_disk(disk, 0); - if (!bio->bi_bdev) { - ret = -ENODEV; - goto out_unmap; - } + bio->bi_disk = disk; if (meta_buffer && meta_len) { struct bio_integrity_payload *bip; @@ -668,11 +664,8 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, out_free_meta: kfree(meta); out_unmap: - if (bio) { - if (disk && bio->bi_bdev) - bdput(bio->bi_bdev); + if (bio) blk_rq_unmap_user(bio); - } out: blk_mq_free_request(req); return ret; @@ -1509,7 +1502,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_write_cache(q, vwc, vwc); } -static void nvme_configure_apst(struct nvme_ctrl *ctrl) +static int nvme_configure_apst(struct nvme_ctrl *ctrl) { /* * APST (Autonomous Power State Transition) lets us program a @@ -1538,16 +1531,16 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) * then don't do anything. */ if (!ctrl->apsta) - return; + return 0; if (ctrl->npss > 31) { dev_warn(ctrl->device, "NPSS is invalid; not using APST\n"); - return; + return 0; } table = kzalloc(sizeof(*table), GFP_KERNEL); if (!table) - return; + return 0; if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) { /* Turn off APST. */ @@ -1629,6 +1622,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) dev_err(ctrl->device, "failed to set APST feature (%d)\n", ret); kfree(table); + return ret; } static void nvme_set_latency_tolerance(struct device *dev, s32 val) @@ -1835,13 +1829,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) * In fabrics we need to verify the cntlid matches the * admin connect */ - if (ctrl->cntlid != le16_to_cpu(id->cntlid)) + if (ctrl->cntlid != le16_to_cpu(id->cntlid)) { ret = -EINVAL; + goto out_free; + } if (!ctrl->opts->discovery_nqn && !ctrl->kas) { dev_err(ctrl->device, "keep-alive support is mandatory for fabrics\n"); ret = -EINVAL; + goto out_free; } } else { ctrl->cntlid = le16_to_cpu(id->cntlid); @@ -1856,11 +1853,20 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) else if (!ctrl->apst_enabled && prev_apst_enabled) dev_pm_qos_hide_latency_tolerance(ctrl->device); - nvme_configure_apst(ctrl); - nvme_configure_directives(ctrl); + ret = nvme_configure_apst(ctrl); + if (ret < 0) + return ret; + + ret = nvme_configure_directives(ctrl); + if (ret < 0) + return ret; ctrl->identified = true; + return 0; + +out_free: + kfree(id); return ret; } EXPORT_SYMBOL_GPL(nvme_init_identify); @@ -2004,9 +2010,11 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, if (memchr_inv(ns->eui, 0, sizeof(ns->eui))) return sprintf(buf, "eui.%8phN\n", ns->eui); - while (ctrl->serial[serial_len - 1] == ' ') + while (serial_len > 0 && (ctrl->serial[serial_len - 1] == ' ' || + ctrl->serial[serial_len - 1] == '\0')) serial_len--; - while (ctrl->model[model_len - 1] == ' ') + while (model_len > 0 && (ctrl->model[model_len - 1] == ' ' || + ctrl->model[model_len - 1] == '\0')) model_len--; return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", ctrl->vid, diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 2e582a240943..5f5cd306f76d 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -794,7 +794,8 @@ static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts, int i; for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) { - if (opt_tokens[i].token & ~allowed_opts) { + if ((opt_tokens[i].token & opts->mask) && + (opt_tokens[i].token & ~allowed_opts)) { pr_warn("invalid parameter '%s'\n", opt_tokens[i].pattern); } diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 5c2a08ef08ba..1438be649866 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2168,7 +2168,6 @@ static const struct blk_mq_ops nvme_fc_mq_ops = { .complete = nvme_fc_complete_rq, .init_request = nvme_fc_init_request, .exit_request = nvme_fc_exit_request, - .reinit_request = nvme_fc_reinit_request, .init_hctx = nvme_fc_init_hctx, .poll = nvme_fc_poll, .timeout = nvme_fc_timeout, @@ -2269,7 +2268,7 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) nvme_fc_init_io_queues(ctrl); - ret = blk_mq_reinit_tagset(&ctrl->tag_set); + ret = blk_mq_reinit_tagset(&ctrl->tag_set, nvme_fc_reinit_request); if (ret) goto out_free_io_queues; @@ -2655,7 +2654,6 @@ static const struct blk_mq_ops nvme_fc_admin_mq_ops = { .complete = nvme_fc_complete_rq, .init_request = nvme_fc_init_request, .exit_request = nvme_fc_exit_request, - .reinit_request = nvme_fc_reinit_request, .init_hctx = nvme_fc_init_admin_hctx, .timeout = nvme_fc_timeout, }; diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index be8541335e31..c1a28569e843 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -643,17 +643,9 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q, vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma); } - if (!disk) - goto submit; - - bio->bi_bdev = bdget_disk(disk, 0); - if (!bio->bi_bdev) { - ret = -ENODEV; - goto err_meta; - } + bio->bi_disk = disk; } -submit: blk_execute_rq(q, NULL, rq, 0); if (nvme_req(rq)->flags & NVME_REQ_CANCELLED) @@ -673,11 +665,8 @@ err_meta: if (meta_buf && meta_len) dma_pool_free(dev->dma_pool, metadata, metadata_dma); err_map: - if (bio) { - if (disk && bio->bi_bdev) - bdput(bio->bi_bdev); + if (bio) blk_rq_unmap_user(bio); - } err_ppa: if (ppa_buf && ppa_len) dma_pool_free(dev->dma_pool, ppa_list, ppa_dma); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index cd888a47d0fc..ea892e732268 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -109,6 +109,7 @@ struct nvme_dev { /* host memory buffer support: */ u64 host_mem_size; u32 nr_host_mem_descs; + dma_addr_t host_mem_descs_dma; struct nvme_host_mem_buf_desc *host_mem_descs; void **host_mem_desc_bufs; }; @@ -801,6 +802,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, return; } + nvmeq->cqe_seen = 1; req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id); nvme_end_request(req, cqe->status, cqe->result); } @@ -830,10 +832,8 @@ static void nvme_process_cq(struct nvme_queue *nvmeq) consumed++; } - if (consumed) { + if (consumed) nvme_ring_cq_doorbell(nvmeq); - nvmeq->cqe_seen = 1; - } } static irqreturn_t nvme_irq(int irq, void *data) @@ -1558,26 +1558,18 @@ static inline void nvme_release_cmb(struct nvme_dev *dev) if (dev->cmb) { iounmap(dev->cmb); dev->cmb = NULL; - if (dev->cmbsz) { - sysfs_remove_file_from_group(&dev->ctrl.device->kobj, - &dev_attr_cmb.attr, NULL); - dev->cmbsz = 0; - } + sysfs_remove_file_from_group(&dev->ctrl.device->kobj, + &dev_attr_cmb.attr, NULL); + dev->cmbsz = 0; } } static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) { - size_t len = dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs); + u64 dma_addr = dev->host_mem_descs_dma; struct nvme_command c; - u64 dma_addr; int ret; - dma_addr = dma_map_single(dev->dev, dev->host_mem_descs, len, - DMA_TO_DEVICE); - if (dma_mapping_error(dev->dev, dma_addr)) - return -ENOMEM; - memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_set_features; c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF); @@ -1594,7 +1586,6 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) "failed to set host mem (err %d, flags %#x).\n", ret, bits); } - dma_unmap_single(dev->dev, dma_addr, len, DMA_TO_DEVICE); return ret; } @@ -1612,7 +1603,9 @@ static void nvme_free_host_mem(struct nvme_dev *dev) kfree(dev->host_mem_desc_bufs); dev->host_mem_desc_bufs = NULL; - kfree(dev->host_mem_descs); + dma_free_coherent(dev->dev, + dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), + dev->host_mem_descs, dev->host_mem_descs_dma); dev->host_mem_descs = NULL; } @@ -1620,6 +1613,7 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) { struct nvme_host_mem_buf_desc *descs; u32 chunk_size, max_entries, len; + dma_addr_t descs_dma; int i = 0; void **bufs; u64 size = 0, tmp; @@ -1630,7 +1624,8 @@ retry: tmp = (preferred + chunk_size - 1); do_div(tmp, chunk_size); max_entries = tmp; - descs = kcalloc(max_entries, sizeof(*descs), GFP_KERNEL); + descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs), + &descs_dma, GFP_KERNEL); if (!descs) goto out; @@ -1664,6 +1659,7 @@ retry: dev->nr_host_mem_descs = i; dev->host_mem_size = size; dev->host_mem_descs = descs; + dev->host_mem_descs_dma = descs_dma; dev->host_mem_desc_bufs = bufs; return 0; @@ -1677,7 +1673,8 @@ out_free_bufs: kfree(bufs); out_free_descs: - kfree(descs); + dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs, + descs_dma); out: /* try a smaller chunk size if we failed early */ if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) { @@ -1953,16 +1950,14 @@ static int nvme_pci_enable(struct nvme_dev *dev) /* * CMBs can currently only exist on >=1.2 PCIe devices. We only - * populate sysfs if a CMB is implemented. Note that we add the - * CMB attribute to the nvme_ctrl kobj which removes the need to remove - * it on exit. Since nvme_dev_attrs_group has no name we can pass - * NULL as final argument to sysfs_add_file_to_group. + * populate sysfs if a CMB is implemented. Since nvme_dev_attrs_group + * has no name we can pass NULL as final argument to + * sysfs_add_file_to_group. */ if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2, 0)) { dev->cmb = nvme_map_cmb(dev); - - if (dev->cmbsz) { + if (dev->cmb) { if (sysfs_add_file_to_group(&dev->ctrl.device->kobj, &dev_attr_cmb.attr, NULL)) dev_warn(dev->ctrl.device, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index da04df1af231..bf42d31484d4 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -19,6 +19,7 @@ #include <linux/string.h> #include <linux/atomic.h> #include <linux/blk-mq.h> +#include <linux/blk-mq-rdma.h> #include <linux/types.h> #include <linux/list.h> #include <linux/mutex.h> @@ -463,14 +464,10 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) ibdev = queue->device->dev; /* - * The admin queue is barely used once the controller is live, so don't - * bother to spread it out. + * Spread I/O queues completion vectors according their queue index. + * Admin queues can always go on completion vector 0. */ - if (idx == 0) - comp_vector = 0; - else - comp_vector = idx % ibdev->num_comp_vectors; - + comp_vector = idx == 0 ? idx : idx - 1; /* +1 for ib_stop_cq */ queue->ib_cq = ib_alloc_cq(ibdev, queue, @@ -611,10 +608,20 @@ out_free_queues: static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + struct ib_device *ibdev = ctrl->device->dev; unsigned int nr_io_queues; int i, ret; nr_io_queues = min(opts->nr_io_queues, num_online_cpus()); + + /* + * we map queues according to the device irq vectors for + * optimal locality so we don't need more queues than + * completion vectors. + */ + nr_io_queues = min_t(unsigned int, nr_io_queues, + ibdev->num_comp_vectors); + ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) return ret; @@ -704,14 +711,16 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) if (ctrl->ctrl.queue_count > 1) { nvme_rdma_free_io_queues(ctrl); - ret = blk_mq_reinit_tagset(&ctrl->tag_set); + ret = blk_mq_reinit_tagset(&ctrl->tag_set, + nvme_rdma_reinit_request); if (ret) goto requeue; } nvme_rdma_stop_and_free_queue(&ctrl->queues[0]); - ret = blk_mq_reinit_tagset(&ctrl->admin_tag_set); + ret = blk_mq_reinit_tagset(&ctrl->admin_tag_set, + nvme_rdma_reinit_request); if (ret) goto requeue; @@ -920,7 +929,11 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; int nr; - nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, PAGE_SIZE); + /* + * Align the MR to a 4K page size to match the ctrl page size and + * the block virtual boundary. + */ + nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); if (nr < count) { if (nr < 0) return nr; @@ -1498,15 +1511,22 @@ static void nvme_rdma_complete_rq(struct request *rq) nvme_complete_rq(rq); } +static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) +{ + struct nvme_rdma_ctrl *ctrl = set->driver_data; + + return blk_mq_rdma_map_queues(set, ctrl->device->dev, 0); +} + static const struct blk_mq_ops nvme_rdma_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, .init_request = nvme_rdma_init_request, .exit_request = nvme_rdma_exit_request, - .reinit_request = nvme_rdma_reinit_request, .init_hctx = nvme_rdma_init_hctx, .poll = nvme_rdma_poll, .timeout = nvme_rdma_timeout, + .map_queues = nvme_rdma_map_queues, }; static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { @@ -1514,7 +1534,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { .complete = nvme_rdma_complete_rq, .init_request = nvme_rdma_init_request, .exit_request = nvme_rdma_exit_request, - .reinit_request = nvme_rdma_reinit_request, .init_hctx = nvme_rdma_init_admin_hctx, .timeout = nvme_rdma_timeout, }; @@ -1583,7 +1602,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) goto out_cleanup_queue; ctrl->ctrl.max_hw_sectors = - (ctrl->max_fr_pages - 1) << (PAGE_SHIFT - 9); + (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); error = nvme_init_identify(&ctrl->ctrl); if (error) @@ -1712,7 +1731,8 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) } if (ctrl->ctrl.queue_count > 1) { - ret = blk_mq_reinit_tagset(&ctrl->tag_set); + ret = blk_mq_reinit_tagset(&ctrl->tag_set, + nvme_rdma_reinit_request); if (ret) goto del_dead_ctrl; @@ -1946,10 +1966,6 @@ static struct nvmf_transport_ops nvme_rdma_transport = { .create_ctrl = nvme_rdma_create_ctrl, }; -static void nvme_rdma_add_one(struct ib_device *ib_device) -{ -} - static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) { struct nvme_rdma_ctrl *ctrl; @@ -1971,7 +1987,6 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) static struct ib_client nvme_rdma_ib_client = { .name = "nvme_rdma", - .add = nvme_rdma_add_one, .remove = nvme_rdma_remove_one }; diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 2d7a98ab53fb..a53bb6635b83 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -199,12 +199,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) copy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1); copy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE)); - memset(id->mn, ' ', sizeof(id->mn)); - strncpy((char *)id->mn, "Linux", sizeof(id->mn)); - - memset(id->fr, ' ', sizeof(id->fr)); - strncpy((char *)id->fr, UTS_RELEASE, sizeof(id->fr)); - id->rab = 6; /* diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 31ca55dfcb1d..309c84aa7595 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -114,6 +114,11 @@ struct nvmet_fc_tgtport { struct kref ref; }; +struct nvmet_fc_defer_fcp_req { + struct list_head req_list; + struct nvmefc_tgt_fcp_req *fcp_req; +}; + struct nvmet_fc_tgt_queue { bool ninetypercent; u16 qid; @@ -132,6 +137,8 @@ struct nvmet_fc_tgt_queue { struct nvmet_fc_tgt_assoc *assoc; struct nvmet_fc_fcp_iod *fod; /* array of fcp_iods */ struct list_head fod_list; + struct list_head pending_cmd_list; + struct list_head avail_defer_list; struct workqueue_struct *work_q; struct kref ref; } __aligned(sizeof(unsigned long long)); @@ -223,6 +230,8 @@ static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue); static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport); static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); +static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_fcp_iod *fod); /* *********************** FC-NVME DMA Handling **************************** */ @@ -385,7 +394,7 @@ nvmet_fc_free_ls_iodlist(struct nvmet_fc_tgtport *tgtport) static struct nvmet_fc_ls_iod * nvmet_fc_alloc_ls_iod(struct nvmet_fc_tgtport *tgtport) { - static struct nvmet_fc_ls_iod *iod; + struct nvmet_fc_ls_iod *iod; unsigned long flags; spin_lock_irqsave(&tgtport->lock, flags); @@ -462,10 +471,10 @@ nvmet_fc_destroy_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, static struct nvmet_fc_fcp_iod * nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue) { - static struct nvmet_fc_fcp_iod *fod; - unsigned long flags; + struct nvmet_fc_fcp_iod *fod; + + lockdep_assert_held(&queue->qlock); - spin_lock_irqsave(&queue->qlock, flags); fod = list_first_entry_or_null(&queue->fod_list, struct nvmet_fc_fcp_iod, fcp_list); if (fod) { @@ -477,17 +486,37 @@ nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue) * will "inherit" that reference. */ } - spin_unlock_irqrestore(&queue->qlock, flags); return fod; } static void +nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_tgt_queue *queue, + struct nvmefc_tgt_fcp_req *fcpreq) +{ + struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + + /* + * put all admin cmds on hw queue id 0. All io commands go to + * the respective hw queue based on a modulo basis + */ + fcpreq->hwqid = queue->qid ? + ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0; + + if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) + queue_work_on(queue->cpu, queue->work_q, &fod->work); + else + nvmet_fc_handle_fcp_rqst(tgtport, fod); +} + +static void nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, struct nvmet_fc_fcp_iod *fod) { struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; struct nvmet_fc_tgtport *tgtport = fod->tgtport; + struct nvmet_fc_defer_fcp_req *deferfcp; unsigned long flags; fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, @@ -495,21 +524,56 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, fcpreq->nvmet_fc_private = NULL; - spin_lock_irqsave(&queue->qlock, flags); - list_add_tail(&fod->fcp_list, &fod->queue->fod_list); fod->active = false; fod->abort = false; fod->aborted = false; fod->writedataactive = false; fod->fcpreq = NULL; + + tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); + + spin_lock_irqsave(&queue->qlock, flags); + deferfcp = list_first_entry_or_null(&queue->pending_cmd_list, + struct nvmet_fc_defer_fcp_req, req_list); + if (!deferfcp) { + list_add_tail(&fod->fcp_list, &fod->queue->fod_list); + spin_unlock_irqrestore(&queue->qlock, flags); + + /* Release reference taken at queue lookup and fod allocation */ + nvmet_fc_tgt_q_put(queue); + return; + } + + /* Re-use the fod for the next pending cmd that was deferred */ + list_del(&deferfcp->req_list); + + fcpreq = deferfcp->fcp_req; + + /* deferfcp can be reused for another IO at a later date */ + list_add_tail(&deferfcp->req_list, &queue->avail_defer_list); + spin_unlock_irqrestore(&queue->qlock, flags); + /* Save NVME CMD IO in fod */ + memcpy(&fod->cmdiubuf, fcpreq->rspaddr, fcpreq->rsplen); + + /* Setup new fcpreq to be processed */ + fcpreq->rspaddr = NULL; + fcpreq->rsplen = 0; + fcpreq->nvmet_fc_private = fod; + fod->fcpreq = fcpreq; + fod->active = true; + + /* inform LLDD IO is now being processed */ + tgtport->ops->defer_rcv(&tgtport->fc_target_port, fcpreq); + + /* Submit deferred IO for processing */ + nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq); + /* - * release the reference taken at queue lookup and fod allocation + * Leave the queue lookup get reference taken when + * fod was originally allocated. */ - nvmet_fc_tgt_q_put(queue); - - tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); } static int @@ -569,6 +633,8 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, queue->port = assoc->tgtport->port; queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid); INIT_LIST_HEAD(&queue->fod_list); + INIT_LIST_HEAD(&queue->avail_defer_list); + INIT_LIST_HEAD(&queue->pending_cmd_list); atomic_set(&queue->connected, 0); atomic_set(&queue->sqtail, 0); atomic_set(&queue->rsn, 1); @@ -638,6 +704,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) { struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport; struct nvmet_fc_fcp_iod *fod = queue->fod; + struct nvmet_fc_defer_fcp_req *deferfcp, *tempptr; unsigned long flags; int i, writedataactive; bool disconnect; @@ -666,6 +733,36 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) } } } + + /* Cleanup defer'ed IOs in queue */ + list_for_each_entry_safe(deferfcp, tempptr, &queue->avail_defer_list, + req_list) { + list_del(&deferfcp->req_list); + kfree(deferfcp); + } + + for (;;) { + deferfcp = list_first_entry_or_null(&queue->pending_cmd_list, + struct nvmet_fc_defer_fcp_req, req_list); + if (!deferfcp) + break; + + list_del(&deferfcp->req_list); + spin_unlock_irqrestore(&queue->qlock, flags); + + tgtport->ops->defer_rcv(&tgtport->fc_target_port, + deferfcp->fcp_req); + + tgtport->ops->fcp_abort(&tgtport->fc_target_port, + deferfcp->fcp_req); + + tgtport->ops->fcp_req_release(&tgtport->fc_target_port, + deferfcp->fcp_req); + + kfree(deferfcp); + + spin_lock_irqsave(&queue->qlock, flags); + } spin_unlock_irqrestore(&queue->qlock, flags); flush_workqueue(queue->work_q); @@ -2172,11 +2269,38 @@ nvmet_fc_handle_fcp_rqst_work(struct work_struct *work) * Pass a FC-NVME FCP CMD IU received from the FC link to the nvmet-fc * layer for processing. * - * The nvmet-fc layer will copy cmd payload to an internal structure for - * processing. As such, upon completion of the routine, the LLDD may - * immediately free/reuse the CMD IU buffer passed in the call. + * The nvmet_fc layer allocates a local job structure (struct + * nvmet_fc_fcp_iod) from the queue for the io and copies the + * CMD IU buffer to the job structure. As such, on a successful + * completion (returns 0), the LLDD may immediately free/reuse + * the CMD IU buffer passed in the call. * - * If this routine returns error, the lldd should abort the exchange. + * However, in some circumstances, due to the packetized nature of FC + * and the api of the FC LLDD which may issue a hw command to send the + * response, but the LLDD may not get the hw completion for that command + * and upcall the nvmet_fc layer before a new command may be + * asynchronously received - its possible for a command to be received + * before the LLDD and nvmet_fc have recycled the job structure. It gives + * the appearance of more commands received than fits in the sq. + * To alleviate this scenario, a temporary queue is maintained in the + * transport for pending LLDD requests waiting for a queue job structure. + * In these "overrun" cases, a temporary queue element is allocated + * the LLDD request and CMD iu buffer information remembered, and the + * routine returns a -EOVERFLOW status. Subsequently, when a queue job + * structure is freed, it is immediately reallocated for anything on the + * pending request list. The LLDDs defer_rcv() callback is called, + * informing the LLDD that it may reuse the CMD IU buffer, and the io + * is then started normally with the transport. + * + * The LLDD, when receiving an -EOVERFLOW completion status, is to treat + * the completion as successful but must not reuse the CMD IU buffer + * until the LLDD's defer_rcv() callback has been called for the + * corresponding struct nvmefc_tgt_fcp_req pointer. + * + * If there is any other condition in which an error occurs, the + * transport will return a non-zero status indicating the error. + * In all cases other than -EOVERFLOW, the transport has not accepted the + * request and the LLDD should abort the exchange. * * @target_port: pointer to the (registered) target port the FCP CMD IU * was received on. @@ -2194,6 +2318,8 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, struct nvme_fc_cmd_iu *cmdiu = cmdiubuf; struct nvmet_fc_tgt_queue *queue; struct nvmet_fc_fcp_iod *fod; + struct nvmet_fc_defer_fcp_req *deferfcp; + unsigned long flags; /* validate iu, so the connection id can be used to find the queue */ if ((cmdiubuf_len != sizeof(*cmdiu)) || @@ -2214,29 +2340,60 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, * when the fod is freed. */ + spin_lock_irqsave(&queue->qlock, flags); + fod = nvmet_fc_alloc_fcp_iod(queue); - if (!fod) { + if (fod) { + spin_unlock_irqrestore(&queue->qlock, flags); + + fcpreq->nvmet_fc_private = fod; + fod->fcpreq = fcpreq; + + memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len); + + nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq); + + return 0; + } + + if (!tgtport->ops->defer_rcv) { + spin_unlock_irqrestore(&queue->qlock, flags); /* release the queue lookup reference */ nvmet_fc_tgt_q_put(queue); return -ENOENT; } - fcpreq->nvmet_fc_private = fod; - fod->fcpreq = fcpreq; - /* - * put all admin cmds on hw queue id 0. All io commands go to - * the respective hw queue based on a modulo basis - */ - fcpreq->hwqid = queue->qid ? - ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0; - memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len); + deferfcp = list_first_entry_or_null(&queue->avail_defer_list, + struct nvmet_fc_defer_fcp_req, req_list); + if (deferfcp) { + /* Just re-use one that was previously allocated */ + list_del(&deferfcp->req_list); + } else { + spin_unlock_irqrestore(&queue->qlock, flags); - if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) - queue_work_on(queue->cpu, queue->work_q, &fod->work); - else - nvmet_fc_handle_fcp_rqst(tgtport, fod); + /* Now we need to dynamically allocate one */ + deferfcp = kmalloc(sizeof(*deferfcp), GFP_KERNEL); + if (!deferfcp) { + /* release the queue lookup reference */ + nvmet_fc_tgt_q_put(queue); + return -ENOMEM; + } + spin_lock_irqsave(&queue->qlock, flags); + } - return 0; + /* For now, use rspaddr / rsplen to save payload information */ + fcpreq->rspaddr = cmdiubuf; + fcpreq->rsplen = cmdiubuf_len; + deferfcp->fcp_req = fcpreq; + + /* defer processing till a fod becomes available */ + list_add_tail(&deferfcp->req_list, &queue->pending_cmd_list); + + /* NOTE: the queue lookup reference is still valid */ + + spin_unlock_irqrestore(&queue->qlock, flags); + + return -EOVERFLOW; } EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req); diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 3b4d47a6abdb..0d4c23dc4532 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -68,7 +68,7 @@ static void nvmet_execute_rw(struct nvmet_req *req) nvmet_inline_bio_init(req); bio = &req->inline_bio; - bio->bi_bdev = req->ns->bdev; + bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; bio->bi_private = req; bio->bi_end_io = nvmet_bio_done; @@ -80,7 +80,7 @@ static void nvmet_execute_rw(struct nvmet_req *req) struct bio *prev = bio; bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); - bio->bi_bdev = req->ns->bdev; + bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; bio_set_op_attrs(bio, op, op_flags); @@ -104,7 +104,7 @@ static void nvmet_execute_flush(struct nvmet_req *req) nvmet_inline_bio_init(req); bio = &req->inline_bio; - bio->bi_bdev = req->ns->bdev; + bio_set_dev(bio, req->ns->bdev); bio->bi_private = req; bio->bi_end_io = nvmet_bio_done; bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 56a4cba690b5..76d2bb793afe 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1510,10 +1510,6 @@ static struct nvmet_fabrics_ops nvmet_rdma_ops = { .delete_ctrl = nvmet_rdma_delete_ctrl, }; -static void nvmet_rdma_add_one(struct ib_device *ib_device) -{ -} - static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data) { struct nvmet_rdma_queue *queue; @@ -1534,7 +1530,6 @@ static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data static struct ib_client nvmet_rdma_ib_client = { .name = "nvmet_rdma", - .add = nvmet_rdma_add_one, .remove = nvmet_rdma_remove_one }; |