From 3d0641015bf73aaa1cb54c936674959e7805070f Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 19 Jun 2018 15:34:09 +0300 Subject: nvme-rdma: fix possible double free condition when failing to create a controller Failures after nvme_init_ctrl will defer resource cleanups to .free_ctrl when the reference is released, hence we should not free the controller queues for these failures. Fix that by moving controller queues allocation before controller initialization and correctly freeing them for failures before initialization and skip them for failures after initialization. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c9424da0d23e..bcb0e5d6343d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -888,9 +888,9 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) list_del(&ctrl->list); mutex_unlock(&nvme_rdma_ctrl_mutex); - kfree(ctrl->queues); nvmf_free_options(nctrl->opts); free_ctrl: + kfree(ctrl->queues); kfree(ctrl); } @@ -1932,11 +1932,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, goto out_free_ctrl; } - ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops, - 0 /* no quirks, we're perfect! */); - if (ret) - goto out_free_ctrl; - INIT_DELAYED_WORK(&ctrl->reconnect_work, nvme_rdma_reconnect_ctrl_work); INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); @@ -1950,14 +1945,19 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues), GFP_KERNEL); if (!ctrl->queues) - goto out_uninit_ctrl; + goto out_free_ctrl; + + ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops, + 0 /* no quirks, we're perfect! */); + if (ret) + goto out_kfree_queues; changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING); WARN_ON_ONCE(!changed); ret = nvme_rdma_configure_admin_queue(ctrl, true); if (ret) - goto out_kfree_queues; + goto out_uninit_ctrl; /* sanity check icdoff */ if (ctrl->ctrl.icdoff) { @@ -2014,14 +2014,14 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, out_remove_admin_queue: nvme_rdma_destroy_admin_queue(ctrl, true); -out_kfree_queues: - kfree(ctrl->queues); out_uninit_ctrl: nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); if (ret > 0) ret = -EIO; return ERR_PTR(ret); +out_kfree_queues: + kfree(ctrl->queues); out_free_ctrl: kfree(ctrl); return ERR_PTR(ret); -- cgit v1.2.1 From 94e42213cc1ae41c57819539c0130f8dfc69d718 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 19 Jun 2018 15:34:10 +0300 Subject: nvme-rdma: fix possible free of a non-allocated async event buffer If nvme_rdma_configure_admin_queue fails before we allocated the async event buffer, we will falsly free it because nvme_rdma_free_queue is freeing it. Fix it by allocating the buffer right after nvme_rdma_alloc_queue and free it right before nvme_rdma_queue_free to maintain orderly reverse cleanup sequence. Reported-by: Israel Rukshin Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index bcb0e5d6343d..f9affb71ac85 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -560,12 +560,6 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) return; - if (nvme_rdma_queue_idx(queue) == 0) { - nvme_rdma_free_qe(queue->device->dev, - &queue->ctrl->async_event_sqe, - sizeof(struct nvme_command), DMA_TO_DEVICE); - } - nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); } @@ -739,6 +733,8 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, blk_cleanup_queue(ctrl->ctrl.admin_q); nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); } + nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, + sizeof(struct nvme_command), DMA_TO_DEVICE); nvme_rdma_free_queue(&ctrl->queues[0]); } @@ -755,11 +751,16 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); + error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe, + sizeof(struct nvme_command), DMA_TO_DEVICE); + if (error) + goto out_free_queue; + if (new) { ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); if (IS_ERR(ctrl->ctrl.admin_tagset)) { error = PTR_ERR(ctrl->ctrl.admin_tagset); - goto out_free_queue; + goto out_free_async_qe; } ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); @@ -795,12 +796,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (error) goto out_stop_queue; - error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev, - &ctrl->async_event_sqe, sizeof(struct nvme_command), - DMA_TO_DEVICE); - if (error) - goto out_stop_queue; - return 0; out_stop_queue: @@ -811,6 +806,9 @@ out_cleanup_queue: out_free_tagset: if (new) nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); +out_free_async_qe: + nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, + sizeof(struct nvme_command), DMA_TO_DEVICE); out_free_queue: nvme_rdma_free_queue(&ctrl->queues[0]); return error; -- cgit v1.2.1 From c947657b15379505a9bba36a02005882b66abe57 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 Jun 2018 15:34:11 +0300 Subject: nvme-rdma: Fix command completion race at error recovery The race is between completing the request at error recovery work and rdma completions. If we cancel the request before getting the good rdma completion we get a NULL deref of the request MR at nvme_rdma_process_nvme_rsp(). When Canceling the request we return its mr to the mr pool (set mr to NULL) and also unmap its data. Canceling the requests while the rdma queues are active is not safe. Because rdma queues are active and we get good rdma completions that can use the mr pointer which may be NULL. Completing the request too soon may lead also to performing DMA to/from user buffers which might have been already unmapped. The commit fixes the race by draining the QP before starting the abort commands mechanism. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index f9affb71ac85..2815749f4dfb 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -728,7 +728,6 @@ out: static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, bool remove) { - nvme_rdma_stop_queue(&ctrl->queues[0]); if (remove) { blk_cleanup_queue(ctrl->ctrl.admin_q); nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); @@ -817,7 +816,6 @@ out_free_queue: static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, bool remove) { - nvme_rdma_stop_io_queues(ctrl); if (remove) { blk_cleanup_queue(ctrl->ctrl.connect_q); nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); @@ -947,6 +945,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) return; destroy_admin: + nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_rdma_destroy_admin_queue(ctrl, false); requeue: dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", @@ -963,12 +962,14 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); + nvme_rdma_stop_io_queues(ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); nvme_rdma_destroy_io_queues(ctrl, false); } blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + nvme_rdma_stop_queue(&ctrl->queues[0]); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); nvme_rdma_destroy_admin_queue(ctrl, false); @@ -1734,6 +1735,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); + nvme_rdma_stop_io_queues(ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); nvme_rdma_destroy_io_queues(ctrl, shutdown); @@ -1745,6 +1747,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + nvme_rdma_stop_queue(&ctrl->queues[0]); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); @@ -2011,6 +2014,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, return &ctrl->ctrl; out_remove_admin_queue: + nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_rdma_destroy_admin_queue(ctrl, true); out_uninit_ctrl: nvme_uninit_ctrl(&ctrl->ctrl); -- cgit v1.2.1 From 5e77d61cbc7e766778037127dab69e6410a8fc48 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 19 Jun 2018 15:34:13 +0300 Subject: nvme-rdma: don't override opts->queue_size That is user argument, and theoretically controller limits can change over time (over reconnects/resets). Instead, use the sqsize controller attribute to check queue depth boundaries and use it to the tagset allocation. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 2815749f4dfb..9544625c0b7d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -692,7 +692,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set = &ctrl->tag_set; memset(set, 0, sizeof(*set)); set->ops = &nvme_rdma_mq_ops; - set->queue_depth = nctrl->opts->queue_size; + set->queue_depth = nctrl->sqsize + 1; set->reserved_tags = 1; /* fabric connect */ set->numa_node = NUMA_NO_NODE; set->flags = BLK_MQ_F_SHOULD_MERGE; @@ -1975,20 +1975,19 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, goto out_remove_admin_queue; } - if (opts->queue_size > ctrl->ctrl.maxcmd) { - /* warn if maxcmd is lower than queue_size */ - dev_warn(ctrl->ctrl.device, - "queue_size %zu > ctrl maxcmd %u, clamping down\n", - opts->queue_size, ctrl->ctrl.maxcmd); - opts->queue_size = ctrl->ctrl.maxcmd; - } - + /* only warn if argument is too large here, will clamp later */ if (opts->queue_size > ctrl->ctrl.sqsize + 1) { - /* warn if sqsize is lower than queue_size */ dev_warn(ctrl->ctrl.device, "queue_size %zu > ctrl sqsize %u, clamping down\n", opts->queue_size, ctrl->ctrl.sqsize + 1); - opts->queue_size = ctrl->ctrl.sqsize + 1; + } + + /* warn if maxcmd is lower than sqsize+1 */ + if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) { + dev_warn(ctrl->ctrl.device, + "sqsize %u > ctrl maxcmd %u, clamping down\n", + ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd); + ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1; } if (opts->nr_io_queues) { -- cgit v1.2.1 From d68a90e148f5a82aa67654c5012071e31c0e4baa Mon Sep 17 00:00:00 2001 From: Max Gurtuvoy Date: Tue, 19 Jun 2018 15:45:33 +0300 Subject: nvmet: reset keep alive timer in controller enable Controllers that are not yet enabled should not really enforce keep alive timeouts, but we still want to track a timeout and cleanup in case a host died before it enabled the controller. Hence, simply reset the keep alive timer when the controller is enabled. Suggested-by: Max Gurtovoy Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a03da764ecae..74d4b785d2da 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -686,6 +686,14 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) } ctrl->csts = NVME_CSTS_RDY; + + /* + * Controllers that are not yet enabled should not really enforce the + * keep alive timeout, but we still want to track a timeout and cleanup + * in case a host died before it enabled the controller. Hence, simply + * reset the keep alive timer when the controller is enabled. + */ + mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); } static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) -- cgit v1.2.1 From 02d62a8bc48e92171c46540722e2d52ce77d87af Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 20 Jun 2018 07:44:12 -0700 Subject: nvme-fc: release io queues to allow fast fail Rather than leaving io queues quiesced after tearing down an association, restart them. This allows ios to be replayed, with fastfail ios terminating and non-fastfail getting into loops of retry. This follows rdma's lead. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b528a2f5826c..41d45a1b5c62 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2790,6 +2790,9 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) /* re-enable the admin_q so anything new can fast fail */ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); + /* resume the io queues so that things will fast fail */ + nvme_start_queues(&ctrl->ctrl); + nvme_fc_ctlr_inactive_on_rport(ctrl); } @@ -2804,9 +2807,6 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) * waiting for io to terminate */ nvme_fc_delete_association(ctrl); - - /* resume the io queues so that things will fast fail */ - nvme_start_queues(nctrl); } static void -- cgit v1.2.1 From 9f9cafc14016f23f982d3ce18f9057923bd3037a Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Wed, 20 Jun 2018 13:42:22 +0800 Subject: nvme-pci: move nvme_kill_queues to nvme_remove_dead_ctrl There is race between nvme_remove and nvme_reset_work that can lead to io hang. nvme_remove nvme_reset_work -> nvme_remove_dead_ctrl -> nvme_dev_disable -> quiesce request_queue -> queue remove_work -> cancel_work_sync reset_work -> nvme_remove_namespaces -> splice ctrl->namespaces nvme_remove_dead_ctrl_work -> nvme_kill_queues -> nvme_ns_remove do nothing -> blk_cleanup_queue -> blk_freeze_queue Finally, the request_queue is quiesced state when wait freeze, we will get io hang here. To fix it, move the nvme_kill_queues from nvme_remove_dead_ctrl_work to nvme_remove_dead_ctrl. Suggested-by: Keith Busch Signed-off-by: Jianchao Wang Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index fc33804662e7..73a97fcea364 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2289,6 +2289,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) nvme_get_ctrl(&dev->ctrl); nvme_dev_disable(dev, false); + nvme_kill_queues(&dev->ctrl); if (!queue_work(nvme_wq, &dev->remove_work)) nvme_put_ctrl(&dev->ctrl); } @@ -2405,7 +2406,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work) struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); struct pci_dev *pdev = to_pci_dev(dev->dev); - nvme_kill_queues(&dev->ctrl); if (pci_get_drvdata(pdev)) device_release_driver(&pdev->dev); nvme_put_ctrl(&dev->ctrl); -- cgit v1.2.1 From 943e942e6266f22babee5efeb00f8f672fbff5bd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 21 Jun 2018 09:49:37 -0600 Subject: nvme-pci: limit max IO size and segments to avoid high order allocations nvme requires an sg table allocation for each request. If the request is large, then the allocation can become quite large. For instance, with our default software settings of 1280KB IO size, we'll need 10248 bytes of sg table. That turns into a 2nd order allocation, which we can't always guarantee. If we fail the allocation, blk-mq will retry it later. But there's no guarantee that we'll EVER be able to allocate that much contigious memory. Limit the IO size such that we never need more than a single page of memory. That's a lot faster and more reliable. Then back that allocation with a mempool, so that we know we'll always be able to succeed the allocation at some point. Signed-off-by: Jens Axboe Acked-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/pci.c | 42 +++++++++++++++++++++++++++++++++++++----- 3 files changed, 39 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 21710a7460c8..46df030b2c3f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1808,6 +1808,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, u32 max_segments = (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1; + max_segments = min_not_zero(max_segments, ctrl->max_segments); blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 231807cbc849..0c4a33df3b2f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -170,6 +170,7 @@ struct nvme_ctrl { u64 cap; u32 page_size; u32 max_hw_sectors; + u32 max_segments; u16 oncs; u16 oacs; u16 nssa; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 73a97fcea364..ba943f211687 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -38,6 +38,13 @@ #define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) +/* + * These can be higher, but we need to ensure that any command doesn't + * require an sg allocation that needs more than a page of data. + */ +#define NVME_MAX_KB_SZ 4096 +#define NVME_MAX_SEGS 127 + static int use_threaded_interrupts; module_param(use_threaded_interrupts, int, 0); @@ -100,6 +107,8 @@ struct nvme_dev { struct nvme_ctrl ctrl; struct completion ioq_wait; + mempool_t *iod_mempool; + /* shadow doorbell buffer support: */ u32 *dbbuf_dbs; dma_addr_t dbbuf_dbs_dma_addr; @@ -477,10 +486,7 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev) iod->use_sgl = nvme_pci_use_sgls(dev, rq); if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { - size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg, - iod->use_sgl); - - iod->sg = kmalloc(alloc_size, GFP_ATOMIC); + iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); if (!iod->sg) return BLK_STS_RESOURCE; } else { @@ -526,7 +532,7 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req) } if (iod->sg != iod->inline_sg) - kfree(iod->sg); + mempool_free(iod->sg, dev->iod_mempool); } #ifdef CONFIG_BLK_DEV_INTEGRITY @@ -2280,6 +2286,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) blk_put_queue(dev->ctrl.admin_q); kfree(dev->queues); free_opal_dev(dev->ctrl.opal_dev); + mempool_destroy(dev->iod_mempool); kfree(dev); } @@ -2334,6 +2341,13 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out; + /* + * Limit the max command size to prevent iod->sg allocations going + * over a single page. + */ + dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; + dev->ctrl.max_segments = NVME_MAX_SEGS; + result = nvme_init_identify(&dev->ctrl); if (result) goto out; @@ -2509,6 +2523,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) int node, result = -ENOMEM; struct nvme_dev *dev; unsigned long quirks = id->driver_data; + size_t alloc_size; node = dev_to_node(&pdev->dev); if (node == NUMA_NO_NODE) @@ -2546,6 +2561,23 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto release_pools; + /* + * Double check that our mempool alloc size will cover the biggest + * command we support. + */ + alloc_size = nvme_pci_iod_alloc_size(dev, NVME_MAX_KB_SZ, + NVME_MAX_SEGS, true); + WARN_ON_ONCE(alloc_size > PAGE_SIZE); + + dev->iod_mempool = mempool_create_node(1, mempool_kmalloc, + mempool_kfree, + (void *) alloc_size, + GFP_KERNEL, node); + if (!dev->iod_mempool) { + result = -ENOMEM; + goto release_pools; + } + dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); nvme_get_ctrl(&dev->ctrl); -- cgit v1.2.1 From 682630f00a219a1b0696abe9c0967e660068187b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 25 Jun 2018 20:58:17 +0300 Subject: nvme-rdma: fix possible double free of controller async event buffer If reconnect/reset failed where the controller async event buffer was freed, we might end up freeing it again as we call nvme_rdma_destroy_admin_queue again in the remove path. Given that the sequence is guaranteed to serialize by .ctrl_stop, we simply set ctrl->async_event_sqe.data to NULL and don't free it in future visits. Reported-by: Max Gurtovoy Tested-by: Max Gurtovoy Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 9544625c0b7d..518c5b09038c 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -732,8 +732,11 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, blk_cleanup_queue(ctrl->ctrl.admin_q); nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); } - nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, - sizeof(struct nvme_command), DMA_TO_DEVICE); + if (ctrl->async_event_sqe.data) { + nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, + sizeof(struct nvme_command), DMA_TO_DEVICE); + ctrl->async_event_sqe.data = NULL; + } nvme_rdma_free_queue(&ctrl->queues[0]); } -- cgit v1.2.1 From b6e44b4c74ef75f729f0147d43d189173fe463c9 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 11 Jul 2018 16:44:44 -0600 Subject: nvme-pci: fix memory leak on probe failure The nvme driver specific structures need to be initialized prior to enabling the generic controller so we can unwind on failure with out using the reference counting callbacks so that 'probe' and 'remove' can be symmetric. The newly added iod_mempool is the only resource that was being allocated out of order, and a failure there would leak the generic controller memory. This patch just moves that allocation above the controller initialization. Fixes: 943e942e6266f ("nvme-pci: limit max IO size and segments to avoid high order allocations") Reported-by: Weiping Zhang Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ba943f211687..ddd441b1516a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2556,11 +2556,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) quirks |= check_vendor_combination_bug(pdev); - result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, - quirks); - if (result) - goto release_pools; - /* * Double check that our mempool alloc size will cover the biggest * command we support. @@ -2578,6 +2573,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto release_pools; } + result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, + quirks); + if (result) + goto release_mempool; + dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); nvme_get_ctrl(&dev->ctrl); @@ -2585,6 +2585,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; + release_mempool: + mempool_destroy(dev->iod_mempool); release_pools: nvme_release_prp_pools(dev); unmap: -- cgit v1.2.1 From cf39a6bc342b980f10f344d88035829638a89a48 Mon Sep 17 00:00:00 2001 From: Scott Bauer Date: Fri, 29 Jun 2018 13:03:28 -0600 Subject: nvme: ensure forward progress during Admin passthru If the controller supports effects and goes down during the passthru admin command we will deadlock during namespace revalidation. [ 363.488275] INFO: task kworker/u16:5:231 blocked for more than 120 seconds. [ 363.488290] Not tainted 4.17.0+ #2 [ 363.488296] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 363.488303] kworker/u16:5 D 0 231 2 0x80000000 [ 363.488331] Workqueue: nvme-reset-wq nvme_reset_work [nvme] [ 363.488338] Call Trace: [ 363.488385] schedule+0x75/0x190 [ 363.488396] rwsem_down_read_failed+0x1c3/0x2f0 [ 363.488481] call_rwsem_down_read_failed+0x14/0x30 [ 363.488504] down_read+0x1d/0x80 [ 363.488523] nvme_stop_queues+0x1e/0xa0 [nvme_core] [ 363.488536] nvme_dev_disable+0xae4/0x1620 [nvme] [ 363.488614] nvme_reset_work+0xd1e/0x49d9 [nvme] [ 363.488911] process_one_work+0x81a/0x1400 [ 363.488934] worker_thread+0x87/0xe80 [ 363.488955] kthread+0x2db/0x390 [ 363.488977] ret_from_fork+0x35/0x40 Fixes: 84fef62d135b6 ("nvme: check admin passthru command effects") Signed-off-by: Scott Bauer Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 50 +++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 24 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 46df030b2c3f..e7668c4bb4dd 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -100,6 +100,22 @@ static struct class *nvme_subsys_class; static void nvme_ns_remove(struct nvme_ns *ns); static int nvme_revalidate_disk(struct gendisk *disk); static void nvme_put_subsystem(struct nvme_subsystem *subsys); +static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, + unsigned nsid); + +static void nvme_set_queue_dying(struct nvme_ns *ns) +{ + /* + * Revalidating a dead namespace sets capacity to 0. This will end + * buffered writers dirtying pages that can't be synced. + */ + if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) + return; + revalidate_disk(ns->disk); + blk_set_queue_dying(ns->queue); + /* Forcibly unquiesce queues to avoid blocking dispatch */ + blk_mq_unquiesce_queue(ns->queue); +} static void nvme_queue_scan(struct nvme_ctrl *ctrl) { @@ -1151,19 +1167,15 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, static void nvme_update_formats(struct nvme_ctrl *ctrl) { - struct nvme_ns *ns, *next; - LIST_HEAD(rm_list); + struct nvme_ns *ns; - down_write(&ctrl->namespaces_rwsem); - list_for_each_entry(ns, &ctrl->namespaces, list) { - if (ns->disk && nvme_revalidate_disk(ns->disk)) { - list_move_tail(&ns->list, &rm_list); - } - } - up_write(&ctrl->namespaces_rwsem); + down_read(&ctrl->namespaces_rwsem); + list_for_each_entry(ns, &ctrl->namespaces, list) + if (ns->disk && nvme_revalidate_disk(ns->disk)) + nvme_set_queue_dying(ns); + up_read(&ctrl->namespaces_rwsem); - list_for_each_entry_safe(ns, next, &rm_list, list) - nvme_ns_remove(ns); + nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL); } static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) @@ -3138,7 +3150,7 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, down_write(&ctrl->namespaces_rwsem); list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { - if (ns->head->ns_id > nsid) + if (ns->head->ns_id > nsid || test_bit(NVME_NS_DEAD, &ns->flags)) list_move_tail(&ns->list, &rm_list); } up_write(&ctrl->namespaces_rwsem); @@ -3542,19 +3554,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) if (ctrl->admin_q) blk_mq_unquiesce_queue(ctrl->admin_q); - list_for_each_entry(ns, &ctrl->namespaces, list) { - /* - * Revalidating a dead namespace sets capacity to 0. This will - * end buffered writers dirtying pages that can't be synced. - */ - if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) - continue; - revalidate_disk(ns->disk); - blk_set_queue_dying(ns->queue); + list_for_each_entry(ns, &ctrl->namespaces, list) + nvme_set_queue_dying(ns); - /* Forcibly unquiesce queues to avoid blocking dispatch */ - blk_mq_unquiesce_queue(ns->queue); - } up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_kill_queues); -- cgit v1.2.1 From fa441b71aa27d06fa79d5e7f7c329981dccd94d1 Mon Sep 17 00:00:00 2001 From: Weiping Zhang Date: Tue, 3 Jul 2018 00:34:38 +0800 Subject: nvme: don't enable AEN if not supported Avoid excuting set_feature command if there is no supported bit in Optional Asynchronous Events Supported (OAES). Fixes: c0561f82 ("nvme: submit AEN event configuration on startup") Reviewed-by: Sagi Grimberg Signed-off-by: Weiping Zhang Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e7668c4bb4dd..57f33733237f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1060,14 +1060,17 @@ EXPORT_SYMBOL_GPL(nvme_set_queue_count); static void nvme_enable_aen(struct nvme_ctrl *ctrl) { - u32 result; + u32 result, supported_aens = ctrl->oaes & NVME_AEN_SUPPORTED; int status; - status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT, - ctrl->oaes & NVME_AEN_SUPPORTED, NULL, 0, &result); + if (!supported_aens) + return; + + status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT, supported_aens, + NULL, 0, &result); if (status) dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n", - ctrl->oaes & NVME_AEN_SUPPORTED); + supported_aens); } static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) -- cgit v1.2.1 From 9b382768135ee3ff282f828c906574a8478e036b Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 19 Jul 2018 20:07:59 -0700 Subject: nvme: fix handling of metadata_len for NVME_IOCTL_IO_CMD The old code in nvme_user_cmd() passed the userspace virtual address from nvme_passthru_cmd.metadata as the length of the metadata buffer as well as the address to nvme_submit_user_cmd(). Fixes: 63263d60 ("nvme: Use metadata for passthrough commands") Signed-off-by: Roland Dreier Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 57f33733237f..bf65501e6ed6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1233,7 +1233,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, effects = nvme_passthru_start(ctrl, ns, cmd.opcode); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, (void __user *)(uintptr_t)cmd.addr, cmd.data_len, - (void __user *)(uintptr_t)cmd.metadata, cmd.metadata, + (void __user *)(uintptr_t)cmd.metadata, cmd.metadata_len, 0, &cmd.result, timeout); nvme_passthru_end(ctrl, effects); -- cgit v1.2.1 From d082dc1562a2ff0947b214796f12faaa87e816a9 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 16 Jul 2018 14:38:14 -0700 Subject: nvmet-fc: fix target sgl list on large transfers The existing code to carve up the sg list expected an sg element-per-page which can be very incorrect with iommu's remapping multiple memory pages to fewer bus addresses. To hit this error required a large io payload (greater than 256k) and a system that maps on a per-page basis. It's possible that large ios could get by fine if the system condensed the sgl list into the first 64 elements. This patch corrects the sg list handling by specifically walking the sg list element by element and attempting to divide the transfer up on a per-sg element boundary. While doing so, it still tries to keep sequences under 256k, but will exceed that rule if a single sg element is larger than 256k. Fixes: 48fa362b6c3f ("nvmet-fc: simplify sg list handling") Cc: # 4.14 Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 408279cb6f2c..29b4b236afd8 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -58,8 +58,8 @@ struct nvmet_fc_ls_iod { struct work_struct work; } __aligned(sizeof(unsigned long long)); +/* desired maximum for a single sequence - if sg list allows it */ #define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024) -#define NVMET_FC_MAX_XFR_SGENTS (NVMET_FC_MAX_SEQ_LENGTH / PAGE_SIZE) enum nvmet_fcp_datadir { NVMET_FCP_NODATA, @@ -74,6 +74,7 @@ struct nvmet_fc_fcp_iod { struct nvme_fc_cmd_iu cmdiubuf; struct nvme_fc_ersp_iu rspiubuf; dma_addr_t rspdma; + struct scatterlist *next_sg; struct scatterlist *data_sg; int data_sg_cnt; u32 offset; @@ -1025,8 +1026,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, INIT_LIST_HEAD(&newrec->assoc_list); kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); - newrec->max_sg_cnt = min_t(u32, NVMET_FC_MAX_XFR_SGENTS, - template->max_sgl_segments); + newrec->max_sg_cnt = template->max_sgl_segments; ret = nvmet_fc_alloc_ls_iodlist(newrec); if (ret) { @@ -1722,6 +1722,7 @@ nvmet_fc_alloc_tgt_pgs(struct nvmet_fc_fcp_iod *fod) ((fod->io_dir == NVMET_FCP_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE)); /* note: write from initiator perspective */ + fod->next_sg = fod->data_sg; return 0; @@ -1866,24 +1867,49 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod, u8 op) { struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + struct scatterlist *sg = fod->next_sg; unsigned long flags; - u32 tlen; + u32 remaininglen = fod->req.transfer_len - fod->offset; + u32 tlen = 0; int ret; fcpreq->op = op; fcpreq->offset = fod->offset; fcpreq->timeout = NVME_FC_TGTOP_TIMEOUT_SEC; - tlen = min_t(u32, tgtport->max_sg_cnt * PAGE_SIZE, - (fod->req.transfer_len - fod->offset)); + /* + * for next sequence: + * break at a sg element boundary + * attempt to keep sequence length capped at + * NVMET_FC_MAX_SEQ_LENGTH but allow sequence to + * be longer if a single sg element is larger + * than that amount. This is done to avoid creating + * a new sg list to use for the tgtport api. + */ + fcpreq->sg = sg; + fcpreq->sg_cnt = 0; + while (tlen < remaininglen && + fcpreq->sg_cnt < tgtport->max_sg_cnt && + tlen + sg_dma_len(sg) < NVMET_FC_MAX_SEQ_LENGTH) { + fcpreq->sg_cnt++; + tlen += sg_dma_len(sg); + sg = sg_next(sg); + } + if (tlen < remaininglen && fcpreq->sg_cnt == 0) { + fcpreq->sg_cnt++; + tlen += min_t(u32, sg_dma_len(sg), remaininglen); + sg = sg_next(sg); + } + if (tlen < remaininglen) + fod->next_sg = sg; + else + fod->next_sg = NULL; + fcpreq->transfer_length = tlen; fcpreq->transferred_length = 0; fcpreq->fcp_error = 0; fcpreq->rsplen = 0; - fcpreq->sg = &fod->data_sg[fod->offset / PAGE_SIZE]; - fcpreq->sg_cnt = DIV_ROUND_UP(tlen, PAGE_SIZE); - /* * If the last READDATA request: check if LLDD supports * combined xfr with response. -- cgit v1.2.1 From 6cdefc6e2ad52170f89a8d0e8b1a1339f91834dc Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 20 Jul 2018 15:49:48 -0700 Subject: nvme: if_ready checks to fail io to deleting controller The revised if_ready checks skipped over the case of returning error when the controller is being deleted. Instead it was returning BUSY, which caused the ios to retry, which caused the ns delete to hang waiting for the ios to drain. Stack trace of hang looks like: kworker/u64:2 D 0 74 2 0x80000000 Workqueue: nvme-delete-wq nvme_delete_ctrl_work [nvme_core] Call Trace: ? __schedule+0x26d/0x820 schedule+0x32/0x80 blk_mq_freeze_queue_wait+0x36/0x80 ? remove_wait_queue+0x60/0x60 blk_cleanup_queue+0x72/0x160 nvme_ns_remove+0x106/0x140 [nvme_core] nvme_remove_namespaces+0x7e/0xa0 [nvme_core] nvme_delete_ctrl_work+0x4d/0x80 [nvme_core] process_one_work+0x160/0x350 worker_thread+0x1c3/0x3d0 kthread+0xf5/0x130 ? process_one_work+0x350/0x350 ? kthread_bind+0x10/0x10 ret_from_fork+0x1f/0x30 Extend nvmf_fail_nonready_command() to supply the controller pointer so that the controller state can be looked at. Fail any io to a controller that is deleting. Fixes: 3bc32bb1186c ("nvme-fabrics: refactor queue ready check") Fixes: 35897b920c8a ("nvme-fabrics: fix and refine state checks in __nvmf_check_ready") Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Tested-by: Ewan D. Milne Reviewed-by: Ewan D. Milne --- drivers/nvme/host/fabrics.c | 10 +++++++--- drivers/nvme/host/fabrics.h | 3 ++- drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/rdma.c | 2 +- drivers/nvme/target/loop.c | 2 +- 5 files changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 903eb4545e26..f7efe5a58cc7 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -539,14 +539,18 @@ static struct nvmf_transport_ops *nvmf_lookup_transport( /* * For something we're not in a state to send to the device the default action * is to busy it and retry it after the controller state is recovered. However, - * anything marked for failfast or nvme multipath is immediately failed. + * if the controller is deleting or if anything is marked for failfast or + * nvme multipath it is immediately failed. * * Note: commands used to initialize the controller will be marked for failfast. * Note: nvme cli/ioctl commands are marked for failfast. */ -blk_status_t nvmf_fail_nonready_command(struct request *rq) +blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, + struct request *rq) { - if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) + if (ctrl->state != NVME_CTRL_DELETING && + ctrl->state != NVME_CTRL_DEAD && + !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) return BLK_STS_RESOURCE; nvme_req(rq)->status = NVME_SC_ABORT_REQ; return BLK_STS_IOERR; diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index e1818a27aa2d..aa2fdb2a2e8f 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -162,7 +162,8 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops); void nvmf_free_options(struct nvmf_ctrl_options *opts); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); -blk_status_t nvmf_fail_nonready_command(struct request *rq); +blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, + struct request *rq); bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, bool queue_live); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 41d45a1b5c62..9bac912173ba 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2272,7 +2272,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) - return nvmf_fail_nonready_command(rq); + return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); ret = nvme_setup_cmd(ns, rq, sqe); if (ret) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 518c5b09038c..66ec5985c9f3 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1639,7 +1639,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, WARN_ON_ONCE(rq->tag < 0); if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) - return nvmf_fail_nonready_command(rq); + return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); dev = queue->device->dev; ib_dma_sync_single_for_cpu(dev, sqe->dma, diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index d8d91f04bd7e..ae7586b8be07 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -162,7 +162,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, blk_status_t ret; if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready)) - return nvmf_fail_nonready_command(req); + return nvmf_fail_nonready_command(&queue->ctrl->ctrl, req); ret = nvme_setup_cmd(ns, req, &iod->cmd); if (ret) -- cgit v1.2.1 From 5613d31214eb4c5c04cdfce4966bb661c8b43191 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 25 Jul 2018 08:35:17 +0200 Subject: nvmet: fixup crash on NULL device path When writing an empty string into the device_path attribute the kernel will crash with nvmet: failed to open block device (null): (-22) BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 This patch sanitizes the error handling for invalid device path settings. Fixes: a07b4970 ("nvmet: add a generic NVMe target") Signed-off-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index d3f3b3ec4d1a..ebea1373d1b7 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -282,6 +282,7 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item, { struct nvmet_ns *ns = to_nvmet_ns(item); struct nvmet_subsys *subsys = ns->subsys; + size_t len; int ret; mutex_lock(&subsys->lock); @@ -289,10 +290,14 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item, if (ns->enabled) goto out_unlock; - kfree(ns->device_path); + ret = -EINVAL; + len = strcspn(page, "\n"); + if (!len) + goto out_unlock; + kfree(ns->device_path); ret = -ENOMEM; - ns->device_path = kstrndup(page, strcspn(page, "\n"), GFP_KERNEL); + ns->device_path = kstrndup(page, len, GFP_KERNEL); if (!ns->device_path) goto out_unlock; -- cgit v1.2.1 From 405a7519607e7a364114896264440c0f87b325c0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 25 Jul 2018 08:34:45 +0200 Subject: nvmet: only check for filebacking on -ENOTBLK We only need to check for a file-backed namespace if nvmet_bdev_ns_enable() returns -ENOTBLK. For any other error it's pointless as the open() error will remain the same. Fixes: d5eff33e ("nvmet: add simple file backed ns support") Signed-off-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 74d4b785d2da..9838103f2d62 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -339,7 +339,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns) goto out_unlock; ret = nvmet_bdev_ns_enable(ns); - if (ret) + if (ret == -ENOTBLK) ret = nvmet_file_ns_enable(ns); if (ret) goto out_unlock; -- cgit v1.2.1