diff options
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r-- | drivers/nvme/host/pci.c | 79 |
1 files changed, 48 insertions, 31 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d234de5505ea..ba943f211687 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -38,11 +38,18 @@ #define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) +/* + * These can be higher, but we need to ensure that any command doesn't + * require an sg allocation that needs more than a page of data. + */ +#define NVME_MAX_KB_SZ 4096 +#define NVME_MAX_SEGS 127 + static int use_threaded_interrupts; module_param(use_threaded_interrupts, int, 0); static bool use_cmb_sqes = true; -module_param(use_cmb_sqes, bool, 0644); +module_param(use_cmb_sqes, bool, 0444); MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes"); static unsigned int max_host_mem_size_mb = 128; @@ -100,6 +107,8 @@ struct nvme_dev { struct nvme_ctrl ctrl; struct completion ioq_wait; + mempool_t *iod_mempool; + /* shadow doorbell buffer support: */ u32 *dbbuf_dbs; dma_addr_t dbbuf_dbs_dma_addr; @@ -477,10 +486,7 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev) iod->use_sgl = nvme_pci_use_sgls(dev, rq); if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { - size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg, - iod->use_sgl); - - iod->sg = kmalloc(alloc_size, GFP_ATOMIC); + iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); if (!iod->sg) return BLK_STS_RESOURCE; } else { @@ -526,7 +532,7 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req) } if (iod->sg != iod->inline_sg) - kfree(iod->sg); + mempool_free(iod->sg, dev->iod_mempool); } #ifdef CONFIG_BLK_DEV_INTEGRITY @@ -920,11 +926,9 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) { u16 head = nvmeq->cq_head; - if (likely(nvmeq->cq_vector >= 0)) { - if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db, - nvmeq->dbbuf_cq_ei)) - writel(head, nvmeq->q_db + nvmeq->dev->db_stride); - } + if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db, + nvmeq->dbbuf_cq_ei)) + writel(head, nvmeq->q_db + nvmeq->dev->db_stride); } static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) @@ -1477,11 +1481,13 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) */ vector = dev->num_vecs == 1 ? 0 : qid; result = adapter_alloc_cq(dev, qid, nvmeq, vector); - if (result < 0) - goto out; + if (result) + return result; result = adapter_alloc_sq(dev, qid, nvmeq); if (result < 0) + return result; + else if (result) goto release_cq; /* @@ -1503,7 +1509,6 @@ release_sq: adapter_delete_sq(dev, qid); release_cq: adapter_delete_cq(dev, qid); -out: return result; } @@ -2012,13 +2017,7 @@ static void nvme_del_cq_end(struct request *req, blk_status_t error) if (!error) { unsigned long flags; - /* - * We might be called with the AQ cq_lock held - * and the I/O queue cq_lock should always - * nest inside the AQ one. - */ - spin_lock_irqsave_nested(&nvmeq->cq_lock, flags, - SINGLE_DEPTH_NESTING); + spin_lock_irqsave(&nvmeq->cq_lock, flags); nvme_process_cq(nvmeq, &start, &end, -1); spin_unlock_irqrestore(&nvmeq->cq_lock, flags); @@ -2231,14 +2230,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_stop_queues(&dev->ctrl); if (!dead && dev->ctrl.queue_count > 0) { - /* - * If the controller is still alive tell it to stop using the - * host memory buffer. In theory the shutdown / reset should - * make sure that it doesn't access the host memoery anymore, - * but I'd rather be safe than sorry.. - */ - if (dev->host_mem_descs) - nvme_set_host_mem(dev, 0); nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); } @@ -2295,6 +2286,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) blk_put_queue(dev->ctrl.admin_q); kfree(dev->queues); free_opal_dev(dev->ctrl.opal_dev); + mempool_destroy(dev->iod_mempool); kfree(dev); } @@ -2304,6 +2296,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) nvme_get_ctrl(&dev->ctrl); nvme_dev_disable(dev, false); + nvme_kill_queues(&dev->ctrl); if (!queue_work(nvme_wq, &dev->remove_work)) nvme_put_ctrl(&dev->ctrl); } @@ -2348,6 +2341,13 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out; + /* + * Limit the max command size to prevent iod->sg allocations going + * over a single page. + */ + dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; + dev->ctrl.max_segments = NVME_MAX_SEGS; + result = nvme_init_identify(&dev->ctrl); if (result) goto out; @@ -2420,7 +2420,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work) struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); struct pci_dev *pdev = to_pci_dev(dev->dev); - nvme_kill_queues(&dev->ctrl); if (pci_get_drvdata(pdev)) device_release_driver(&pdev->dev); nvme_put_ctrl(&dev->ctrl); @@ -2524,6 +2523,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) int node, result = -ENOMEM; struct nvme_dev *dev; unsigned long quirks = id->driver_data; + size_t alloc_size; node = dev_to_node(&pdev->dev); if (node == NUMA_NO_NODE) @@ -2561,6 +2561,23 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto release_pools; + /* + * Double check that our mempool alloc size will cover the biggest + * command we support. + */ + alloc_size = nvme_pci_iod_alloc_size(dev, NVME_MAX_KB_SZ, + NVME_MAX_SEGS, true); + WARN_ON_ONCE(alloc_size > PAGE_SIZE); + + dev->iod_mempool = mempool_create_node(1, mempool_kmalloc, + mempool_kfree, + (void *) alloc_size, + GFP_KERNEL, node); + if (!dev->iod_mempool) { + result = -ENOMEM; + goto release_pools; + } + dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); nvme_get_ctrl(&dev->ctrl); @@ -2614,7 +2631,7 @@ static void nvme_remove(struct pci_dev *pdev) if (!pci_device_is_present(pdev)) { nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); - nvme_dev_disable(dev, false); + nvme_dev_disable(dev, true); } flush_work(&dev->ctrl.reset_work); |