diff options
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r-- | drivers/nvme/host/pci.c | 294 |
1 files changed, 186 insertions, 108 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6bd9b1033965..9c80f9f08149 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -28,8 +28,8 @@ #include "trace.h" #include "nvme.h" -#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) -#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) +#define SQ_SIZE(q) ((q)->q_depth << (q)->sqes) +#define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion)) #define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) @@ -68,14 +68,14 @@ static int io_queue_depth = 1024; module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); -static int write_queues; -module_param(write_queues, int, 0644); +static unsigned int write_queues; +module_param(write_queues, uint, 0644); MODULE_PARM_DESC(write_queues, "Number of queues to use for writes. If not set, reads and writes " "will share a queue set."); -static int poll_queues; -module_param(poll_queues, int, 0644); +static unsigned int poll_queues; +module_param(poll_queues, uint, 0644); MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO."); struct nvme_dev; @@ -100,6 +100,7 @@ struct nvme_dev { unsigned io_queues[HCTX_MAX_TYPES]; unsigned int num_vecs; int q_depth; + int io_sqes; u32 db_stride; void __iomem *bar; unsigned long bar_mapped_size; @@ -162,11 +163,10 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) struct nvme_queue { struct nvme_dev *dev; spinlock_t sq_lock; - struct nvme_command *sq_cmds; + void *sq_cmds; /* only used for poll queues: */ spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; volatile struct nvme_completion *cqes; - struct blk_mq_tags **tags; dma_addr_t sq_dma_addr; dma_addr_t cq_dma_addr; u32 __iomem *q_db; @@ -175,9 +175,9 @@ struct nvme_queue { u16 sq_tail; u16 last_sq_tail; u16 cq_head; - u16 last_cq_head; u16 qid; u8 cq_phase; + u8 sqes; unsigned long flags; #define NVMEQ_ENABLED 0 #define NVMEQ_SQ_CMB 1 @@ -375,29 +375,17 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, WARN_ON(hctx_idx != 0); WARN_ON(dev->admin_tagset.tags[0] != hctx->tags); - WARN_ON(nvmeq->tags); hctx->driver_data = nvmeq; - nvmeq->tags = &dev->admin_tagset.tags[0]; return 0; } -static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) -{ - struct nvme_queue *nvmeq = hctx->driver_data; - - nvmeq->tags = NULL; -} - static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { struct nvme_dev *dev = data; struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1]; - if (!nvmeq->tags) - nvmeq->tags = &dev->tagset.tags[hctx_idx]; - WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags); hctx->driver_data = nvmeq; return 0; @@ -488,7 +476,8 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, bool write_sq) { spin_lock(&nvmeq->sq_lock); - memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd)); + memcpy(nvmeq->sq_cmds + (nvmeq->sq_tail << nvmeq->sqes), + cmd, sizeof(*cmd)); if (++nvmeq->sq_tail == nvmeq->q_depth) nvmeq->sq_tail = 0; nvme_write_sq_db(nvmeq, write_sq); @@ -534,21 +523,22 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - enum dma_data_direction dma_dir = rq_data_dir(req) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE; const int last_prp = dev->ctrl.page_size / sizeof(__le64) - 1; dma_addr_t dma_addr = iod->first_dma, next_dma_addr; int i; if (iod->dma_len) { - dma_unmap_page(dev->dev, dma_addr, iod->dma_len, dma_dir); + dma_unmap_page(dev->dev, dma_addr, iod->dma_len, + rq_dma_dir(req)); return; } WARN_ON_ONCE(!iod->nents); - /* P2PDMA requests do not need to be unmapped */ - if (!is_pci_p2pdma_page(sg_page(iod->sg))) + if (is_pci_p2pdma_page(sg_page(iod->sg))) + pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents, + rq_dma_dir(req)); + else dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req)); @@ -769,7 +759,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, struct bio_vec *bv) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - unsigned int first_prp_len = dev->ctrl.page_size - bv->bv_offset; + unsigned int offset = bv->bv_offset & (dev->ctrl.page_size - 1); + unsigned int first_prp_len = dev->ctrl.page_size - offset; iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0); if (dma_mapping_error(dev->dev, iod->first_dma)) @@ -832,8 +823,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, goto out; if (is_pci_p2pdma_page(sg_page(iod->sg))) - nr_mapped = pci_p2pdma_map_sg(dev->dev, iod->sg, iod->nents, - rq_dma_dir(req)); + nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg, + iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN); else nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN); @@ -920,7 +911,6 @@ static void nvme_pci_complete_rq(struct request *req) struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_dev *dev = iod->nvmeq->dev; - nvme_cleanup_cmd(req); if (blk_integrity_rq(req)) dma_unmap_page(dev->dev, iod->meta_dma, rq_integrity_vec(req)->bv_len, rq_data_dir(req)); @@ -945,6 +935,13 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) writel(head, nvmeq->q_db + nvmeq->dev->db_stride); } +static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) +{ + if (!nvmeq->qid) + return nvmeq->dev->admin_tagset.tags[0]; + return nvmeq->dev->tagset.tags[nvmeq->qid - 1]; +} + static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) { volatile struct nvme_completion *cqe = &nvmeq->cqes[idx]; @@ -963,14 +960,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvmeq->qid == 0 && - cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) { + if (unlikely(nvme_is_aen_req(nvmeq->qid, cqe->command_id))) { nvme_complete_async_event(&nvmeq->dev->ctrl, cqe->status, &cqe->result); return; } - req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id); + req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id); trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); nvme_end_request(req, cqe->status, cqe->result); } @@ -1023,10 +1019,7 @@ static irqreturn_t nvme_irq(int irq, void *data) * the irq handler, even if that was on another CPU. */ rmb(); - if (nvmeq->cq_head != nvmeq->last_cq_head) - ret = IRQ_HANDLED; nvme_process_cq(nvmeq, &start, &end, -1); - nvmeq->last_cq_head = nvmeq->cq_head; wmb(); if (start != end) { @@ -1344,16 +1337,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) static void nvme_free_queue(struct nvme_queue *nvmeq) { - dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq->q_depth), + dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq), (void *)nvmeq->cqes, nvmeq->cq_dma_addr); if (!nvmeq->sq_cmds) return; if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) { pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev), - nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth)); + nvmeq->sq_cmds, SQ_SIZE(nvmeq)); } else { - dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq->q_depth), + dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq), nvmeq->sq_cmds, nvmeq->sq_dma_addr); } } @@ -1403,11 +1396,28 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) if (shutdown) nvme_shutdown_ctrl(&dev->ctrl); else - nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap); + nvme_disable_ctrl(&dev->ctrl); nvme_poll_irqdisable(nvmeq, -1); } +/* + * Called only on a device that has been disabled and after all other threads + * that can check this device's completion queues have synced. This is the + * last chance for the driver to see a natural completion before + * nvme_cancel_request() terminates all incomplete requests. + */ +static void nvme_reap_pending_cqes(struct nvme_dev *dev) +{ + u16 start, end; + int i; + + for (i = dev->ctrl.queue_count - 1; i > 0; i--) { + nvme_process_cq(&dev->queues[i], &start, &end, -1); + nvme_complete_cqes(&dev->queues[i], start, end); + } +} + static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, int entry_size) { @@ -1433,12 +1443,12 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, } static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, - int qid, int depth) + int qid) { struct pci_dev *pdev = to_pci_dev(dev->dev); if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { - nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth)); + nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(nvmeq)); if (nvmeq->sq_cmds) { nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev, nvmeq->sq_cmds); @@ -1447,11 +1457,11 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, return 0; } - pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(depth)); + pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(nvmeq)); } } - nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), + nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(nvmeq), &nvmeq->sq_dma_addr, GFP_KERNEL); if (!nvmeq->sq_cmds) return -ENOMEM; @@ -1465,12 +1475,14 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) if (dev->ctrl.queue_count > qid) return 0; - nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(depth), + nvmeq->sqes = qid ? dev->io_sqes : NVME_ADM_SQES; + nvmeq->q_depth = depth; + nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(nvmeq), &nvmeq->cq_dma_addr, GFP_KERNEL); if (!nvmeq->cqes) goto free_nvmeq; - if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth)) + if (nvme_alloc_sq_cmds(dev, nvmeq, qid)) goto free_cqdma; nvmeq->dev = dev; @@ -1479,15 +1491,14 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; - nvmeq->q_depth = depth; nvmeq->qid = qid; dev->ctrl.queue_count++; return 0; free_cqdma: - dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes, - nvmeq->cq_dma_addr); + dma_free_coherent(dev->dev, CQ_SIZE(nvmeq), (void *)nvmeq->cqes, + nvmeq->cq_dma_addr); free_nvmeq: return -ENOMEM; } @@ -1515,7 +1526,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; - memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); + memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq)); nvme_dbbuf_init(dev, nvmeq, qid); dev->online_queues++; wmb(); /* ensure the first interrupt sees the initialization */ @@ -1545,14 +1556,13 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) result = adapter_alloc_sq(dev, qid, nvmeq); if (result < 0) return result; - else if (result) + if (result) goto release_cq; nvmeq->cq_vector = vector; nvme_init_queue(nvmeq, qid); if (!polled) { - nvmeq->cq_vector = vector; result = queue_request_irq(nvmeq); if (result < 0) goto release_sq; @@ -1573,7 +1583,6 @@ static const struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_pci_complete_rq, .init_hctx = nvme_admin_init_hctx, - .exit_hctx = nvme_admin_exit_hctx, .init_request = nvme_init_request, .timeout = nvme_timeout, }; @@ -1679,7 +1688,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO)) writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS); - result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap); + result = nvme_disable_ctrl(&dev->ctrl); if (result < 0) return result; @@ -1695,7 +1704,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ); lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ); - result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap); + result = nvme_enable_ctrl(&dev->ctrl); if (result) return result; @@ -2055,7 +2064,6 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) .priv = dev, }; unsigned int irq_queues, this_p_queues; - unsigned int nr_cpus = num_possible_cpus(); /* * Poll queues don't need interrupts, but we need at least one IO @@ -2066,10 +2074,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) this_p_queues = nr_io_queues - 1; irq_queues = 1; } else { - if (nr_cpus < nr_io_queues - this_p_queues) - irq_queues = nr_cpus + 1; - else - irq_queues = nr_io_queues - this_p_queues + 1; + irq_queues = nr_io_queues - this_p_queues + 1; } dev->io_queues[HCTX_TYPE_POLL] = this_p_queues; @@ -2077,6 +2082,13 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) dev->io_queues[HCTX_TYPE_DEFAULT] = 1; dev->io_queues[HCTX_TYPE_READ] = 0; + /* + * Some Apple controllers require all queues to use the + * first vector. + */ + if (dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR) + irq_queues = 1; + return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues, PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); } @@ -2095,6 +2107,14 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) unsigned long size; nr_io_queues = max_io_queues(); + + /* + * If tags are shared with admin queue (Apple bug), then + * make sure we only use one IO queue. + */ + if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) + nr_io_queues = 1; + result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) return result; @@ -2232,11 +2252,6 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) if (timeout == 0) return false; - /* handle any remaining CQEs */ - if (opcode == nvme_admin_delete_cq && - !test_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags)) - nvme_poll_irqdisable(nvmeq, -1); - sent--; if (nr_queues) goto retry; @@ -2244,10 +2259,7 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) return true; } -/* - * return error value only when tagset allocation failed - */ -static int nvme_dev_add(struct nvme_dev *dev) +static void nvme_dev_add(struct nvme_dev *dev) { int ret; @@ -2265,11 +2277,19 @@ static int nvme_dev_add(struct nvme_dev *dev) dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; dev->tagset.driver_data = dev; + /* + * Some Apple controllers requires tags to be unique + * across admin and IO queue, so reserve the first 32 + * tags of the IO queue. + */ + if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) + dev->tagset.reserved_tags = NVME_AQ_DEPTH; + ret = blk_mq_alloc_tag_set(&dev->tagset); if (ret) { dev_warn(dev->ctrl.device, "IO queues tagset allocation failed %d\n", ret); - return ret; + return; } dev->ctrl.tagset = &dev->tagset; } else { @@ -2280,7 +2300,6 @@ static int nvme_dev_add(struct nvme_dev *dev) } nvme_dbbuf_set(dev); - return 0; } static int nvme_pci_enable(struct nvme_dev *dev) @@ -2314,10 +2333,21 @@ static int nvme_pci_enable(struct nvme_dev *dev) dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1, io_queue_depth); + dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */ dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap); dev->dbs = dev->bar + 4096; /* + * Some Apple controllers require a non-standard SQE size. + * Interestingly they also seem to ignore the CC:IOSQES register + * so we don't bother updating it here. + */ + if (dev->ctrl.quirks & NVME_QUIRK_128_BYTES_SQES) + dev->io_sqes = 7; + else + dev->io_sqes = NVME_NVM_IOSQES; + + /* * Temporary fix for the Apple controller found in the MacBook8,1 and * some MacBook7,1 to avoid controller resets and data loss. */ @@ -2334,6 +2364,18 @@ static int nvme_pci_enable(struct nvme_dev *dev) "set queue depth=%u\n", dev->q_depth); } + /* + * Controllers with the shared tags quirk need the IO queue to be + * big enough so that we get 32 tags for the admin queue + */ + if ((dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) && + (dev->q_depth < (NVME_AQ_DEPTH + 2))) { + dev->q_depth = NVME_AQ_DEPTH + 2; + dev_warn(dev->ctrl.device, "IO queue depth clamped to %d\n", + dev->q_depth); + } + + nvme_map_cmb(dev); pci_enable_pcie_error_reporting(pdev); @@ -2398,9 +2440,12 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_suspend_io_queues(dev); nvme_suspend_queue(&dev->queues[0]); nvme_pci_disable(dev); + nvme_reap_pending_cqes(dev); blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl); + blk_mq_tagset_wait_completed_request(&dev->tagset); + blk_mq_tagset_wait_completed_request(&dev->admin_tagset); /* * The driver will not be starting up queues again if shutting down so @@ -2415,6 +2460,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) mutex_unlock(&dev->shutdown_lock); } +static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown) +{ + if (!nvme_wait_reset(&dev->ctrl)) + return -EBUSY; + nvme_dev_disable(dev, shutdown); + return 0; +} + static int nvme_setup_prp_pools(struct nvme_dev *dev) { dev->prp_page_pool = dma_pool_create("prp list page", dev->dev, @@ -2438,14 +2491,20 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) dma_pool_destroy(dev->prp_small_pool); } +static void nvme_free_tagset(struct nvme_dev *dev) +{ + if (dev->tagset.tags) + blk_mq_free_tag_set(&dev->tagset); + dev->ctrl.tagset = NULL; +} + static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) { struct nvme_dev *dev = to_nvme_dev(ctrl); nvme_dbbuf_dma_free(dev); put_device(dev->dev); - if (dev->tagset.tags) - blk_mq_free_tag_set(&dev->tagset); + nvme_free_tagset(dev); if (dev->ctrl.admin_q) blk_put_queue(dev->ctrl.admin_q); kfree(dev->queues); @@ -2456,6 +2515,11 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) static void nvme_remove_dead_ctrl(struct nvme_dev *dev) { + /* + * Set state to deleting now to avoid blocking nvme_wait_reset(), which + * may be holding this pci_dev's device lock. + */ + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); nvme_get_ctrl(&dev->ctrl); nvme_dev_disable(dev, false); nvme_kill_queues(&dev->ctrl); @@ -2469,7 +2533,6 @@ static void nvme_reset_work(struct work_struct *work) container_of(work, struct nvme_dev, ctrl.reset_work); bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); int result; - enum nvme_ctrl_state new_state = NVME_CTRL_LIVE; if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) { result = -ENODEV; @@ -2563,13 +2626,11 @@ static void nvme_reset_work(struct work_struct *work) dev_warn(dev->ctrl.device, "IO queues not created\n"); nvme_kill_queues(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl); - new_state = NVME_CTRL_ADMIN_ONLY; + nvme_free_tagset(dev); } else { nvme_start_queues(&dev->ctrl); nvme_wait_freeze(&dev->ctrl); - /* hit this only when allocate tagset fails */ - if (nvme_dev_add(dev)) - new_state = NVME_CTRL_ADMIN_ONLY; + nvme_dev_add(dev); nvme_unfreeze(&dev->ctrl); } @@ -2577,9 +2638,9 @@ static void nvme_reset_work(struct work_struct *work) * If only admin queue live, keep it to do further investigation or * recovery. */ - if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) { + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) { dev_warn(dev->ctrl.device, - "failed to mark controller state %d\n", new_state); + "failed to mark controller live state\n"); result = -ENODEV; goto out; } @@ -2620,7 +2681,7 @@ static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) { - *val = readq(to_nvme_dev(ctrl)->bar + off); + *val = lo_hi_readq(to_nvme_dev(ctrl)->bar + off); return 0; } @@ -2784,19 +2845,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) static void nvme_reset_prepare(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); - nvme_dev_disable(dev, false); + + /* + * We don't need to check the return value from waiting for the reset + * state as pci_dev device lock is held, making it impossible to race + * with ->remove(). + */ + nvme_disable_prepare_reset(dev, false); + nvme_sync_queues(&dev->ctrl); } static void nvme_reset_done(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); - nvme_reset_ctrl_sync(&dev->ctrl); + + if (!nvme_try_sched_reset(&dev->ctrl)) + flush_work(&dev->ctrl.reset_work); } static void nvme_shutdown(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); - nvme_dev_disable(dev, true); + nvme_disable_prepare_reset(dev, true); } /* @@ -2849,7 +2919,7 @@ static int nvme_resume(struct device *dev) if (ndev->last_ps == U32_MAX || nvme_set_power_state(ctrl, ndev->last_ps) != 0) - nvme_reset_ctrl(ctrl); + return nvme_try_sched_reset(&ndev->ctrl); return 0; } @@ -2876,43 +2946,43 @@ static int nvme_suspend(struct device *dev) * state (which may not be possible if the link is up). */ if (pm_suspend_via_firmware() || !ctrl->npss || - !pcie_aspm_enabled(pdev)) { - nvme_dev_disable(ndev, true); - return 0; - } + !pcie_aspm_enabled(pdev) || + (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) + return nvme_disable_prepare_reset(ndev, true); nvme_start_freeze(ctrl); nvme_wait_freeze(ctrl); nvme_sync_queues(ctrl); - if (ctrl->state != NVME_CTRL_LIVE && - ctrl->state != NVME_CTRL_ADMIN_ONLY) + if (ctrl->state != NVME_CTRL_LIVE) goto unfreeze; ret = nvme_get_power_state(ctrl, &ndev->last_ps); if (ret < 0) goto unfreeze; + /* + * A saved state prevents pci pm from generically controlling the + * device's power. If we're using protocol specific settings, we don't + * want pci interfering. + */ + pci_save_state(pdev); + ret = nvme_set_power_state(ctrl, ctrl->npss); if (ret < 0) goto unfreeze; if (ret) { + /* discard the saved state */ + pci_load_saved_state(pdev, NULL); + /* * Clearing npss forces a controller reset on resume. The - * correct value will be resdicovered then. + * correct value will be rediscovered then. */ - nvme_dev_disable(ndev, true); + ret = nvme_disable_prepare_reset(ndev, true); ctrl->npss = 0; - ret = 0; - goto unfreeze; } - /* - * A saved state prevents pci pm from generically controlling the - * device's power. If we're using protocol specific settings, we don't - * want pci interfering. - */ - pci_save_state(pdev); unfreeze: nvme_unfreeze(ctrl); return ret; @@ -2921,9 +2991,7 @@ unfreeze: static int nvme_simple_suspend(struct device *dev) { struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); - - nvme_dev_disable(ndev, true); - return 0; + return nvme_disable_prepare_reset(ndev, true); } static int nvme_simple_resume(struct device *dev) @@ -2931,8 +2999,7 @@ static int nvme_simple_resume(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct nvme_dev *ndev = pci_get_drvdata(pdev); - nvme_reset_ctrl(&ndev->ctrl); - return 0; + return nvme_try_sched_reset(&ndev->ctrl); } static const struct dev_pm_ops nvme_dev_pm_ops = { @@ -3011,7 +3078,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | - NVME_QUIRK_MEDIUM_PRIO_SQ }, + NVME_QUIRK_MEDIUM_PRIO_SQ | + NVME_QUIRK_NO_TEMP_THRESH_CHANGE }, { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ @@ -3037,9 +3105,16 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, + { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005), + .driver_data = NVME_QUIRK_SINGLE_VECTOR | + NVME_QUIRK_128_BYTES_SQES | + NVME_QUIRK_SHARED_TAGS }, { 0, } }; MODULE_DEVICE_TABLE(pci, nvme_id_table); @@ -3065,6 +3140,9 @@ static int __init nvme_init(void) BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); + + write_queues = min(write_queues, num_possible_cpus()); + poll_queues = min(poll_queues, num_possible_cpus()); return pci_register_driver(&nvme_driver); } |