summaryrefslogtreecommitdiffstats
path: root/drivers/nvme
diff options
context:
space:
mode:
authorMax Gurtovoy <maxg@mellanox.com>2017-11-28 18:28:44 +0200
committerChristoph Hellwig <hch@lst.de>2017-11-28 08:49:22 -0800
commiteb1bd249ba016284ed762d87c1989dd822500773 (patch)
tree520a3360446477d4e142447dd8afab8a2040700c /drivers/nvme
parentf41725bbe16b0773302c0cc7dc2e89f54828712d (diff)
downloadtalos-obmc-linux-eb1bd249ba016284ed762d87c1989dd822500773.tar.gz
talos-obmc-linux-eb1bd249ba016284ed762d87c1989dd822500773.zip
nvme-rdma: fix memory leak during queue allocation
In case nvme_rdma_wait_for_cm timeout expires before we get an established or rejected event (rdma_connect succeeded) from rdma_cm, we end up with leaking the ib transport resources for dedicated queue. This scenario can easily reproduced using traffic test during port toggling. Also, in order to protect from parallel ib queue destruction, that may be invoked from different context's, introduce new flag that stands for transport readiness. While we're here, protect also against a situation that we can receive rdma_cm events during ib queue destruction. Signed-off-by: Max Gurtovoy <maxg@mellanox.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/rdma.c21
1 files changed, 18 insertions, 3 deletions
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 02ef0771f6b9..37af56596be6 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -77,6 +77,7 @@ struct nvme_rdma_request {
enum nvme_rdma_queue_flags {
NVME_RDMA_Q_ALLOCATED = 0,
NVME_RDMA_Q_LIVE = 1,
+ NVME_RDMA_Q_TR_READY = 2,
};
struct nvme_rdma_queue {
@@ -390,12 +391,23 @@ out_err:
static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
{
- struct nvme_rdma_device *dev = queue->device;
- struct ib_device *ibdev = dev->dev;
+ struct nvme_rdma_device *dev;
+ struct ib_device *ibdev;
+
+ if (!test_and_clear_bit(NVME_RDMA_Q_TR_READY, &queue->flags))
+ return;
+
+ dev = queue->device;
+ ibdev = dev->dev;
ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
- rdma_destroy_qp(queue->cm_id);
+ /*
+ * The cm_id object might have been destroyed during RDMA connection
+ * establishment error flow to avoid getting other cma events, thus
+ * the destruction of the QP shouldn't use rdma_cm API.
+ */
+ ib_destroy_qp(queue->qp);
ib_free_cq(queue->ib_cq);
nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
@@ -463,6 +475,8 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
goto out_destroy_ring;
}
+ set_bit(NVME_RDMA_Q_TR_READY, &queue->flags);
+
return 0;
out_destroy_ring:
@@ -529,6 +543,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
out_destroy_cm_id:
rdma_destroy_id(queue->cm_id);
+ nvme_rdma_destroy_queue_ib(queue);
return ret;
}
OpenPOWER on IntegriCloud