summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx5
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c32
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c135
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c548
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c166
-rw-r--r--drivers/infiniband/hw/mlx5/main.c572
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h223
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c601
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c39
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c1295
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c54
-rw-r--r--drivers/infiniband/hw/mlx5/user.h70
12 files changed, 3302 insertions, 435 deletions
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 27a70159e2ea..4e851889355a 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
-mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o
+mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 66080580e24d..745efa4cfc71 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -32,8 +32,10 @@
#include "mlx5_ib.h"
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
- struct mlx5_ib_ah *ah)
+static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_ah *ah,
+ struct ib_ah_attr *ah_attr,
+ enum rdma_link_layer ll)
{
if (ah_attr->ah_flags & IB_AH_GRH) {
memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
@@ -44,9 +46,20 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
ah->av.tclass = ah_attr->grh.traffic_class;
}
- ah->av.rlid = cpu_to_be16(ah_attr->dlid);
- ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
- ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf);
+ ah->av.stat_rate_sl = (ah_attr->static_rate << 4);
+
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ memcpy(ah->av.rmac, ah_attr->dmac, sizeof(ah_attr->dmac));
+ ah->av.udp_sport =
+ mlx5_get_roce_udp_sport(dev,
+ ah_attr->port_num,
+ ah_attr->grh.sgid_index);
+ ah->av.stat_rate_sl |= (ah_attr->sl & 0x7) << 1;
+ } else {
+ ah->av.rlid = cpu_to_be16(ah_attr->dlid);
+ ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
+ ah->av.stat_rate_sl |= (ah_attr->sl & 0xf);
+ }
return &ah->ibah;
}
@@ -54,12 +67,19 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
{
struct mlx5_ib_ah *ah;
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ enum rdma_link_layer ll;
+
+ ll = pd->device->get_link_layer(pd->device, ah_attr->port_num);
+
+ if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
+ return ERR_PTR(-EINVAL);
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
- return create_ib_ah(ah_attr, ah); /* never fails */
+ return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */
}
int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 92ddae101ecc..a00ba4418de9 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -154,9 +154,6 @@ static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wc->opcode = IB_WC_MASKED_FETCH_ADD;
wc->byte_len = 8;
break;
- case MLX5_OPCODE_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
case MLX5_OPCODE_UMR:
wc->opcode = get_umr_comp(wq, idx);
break;
@@ -171,6 +168,7 @@ enum {
static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
struct mlx5_ib_qp *qp)
{
+ enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
struct mlx5_ib_srq *srq;
struct mlx5_ib_wq *wq;
@@ -209,7 +207,10 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
break;
case MLX5_CQE_RESP_SEND:
wc->opcode = IB_WC_RECV;
- wc->wc_flags = 0;
+ wc->wc_flags = IB_WC_IP_CSUM_OK;
+ if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) &&
+ (cqe->hds_ip_ext & CQE_L4_OK))))
+ wc->wc_flags = 0;
break;
case MLX5_CQE_RESP_SEND_IMM:
wc->opcode = IB_WC_RECV;
@@ -236,6 +237,22 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
} else {
wc->pkey_index = 0;
}
+
+ if (ll != IB_LINK_LAYER_ETHERNET)
+ return;
+
+ switch (wc->sl & 0x3) {
+ case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
+ wc->network_hdr_type = RDMA_NETWORK_IB;
+ break;
+ case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
+ wc->network_hdr_type = RDMA_NETWORK_IPV6;
+ break;
+ case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4:
+ wc->network_hdr_type = RDMA_NETWORK_IPV4;
+ break;
+ }
+ wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
}
static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
@@ -417,7 +434,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_core_qp *mqp;
struct mlx5_ib_wq *wq;
struct mlx5_sig_err_cqe *sig_err_cqe;
- struct mlx5_core_mr *mmr;
+ struct mlx5_core_mkey *mmkey;
struct mlx5_ib_mr *mr;
uint8_t opcode;
uint32_t qpn;
@@ -522,17 +539,17 @@ repoll:
case MLX5_CQE_SIG_ERR:
sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
- read_lock(&dev->mdev->priv.mr_table.lock);
- mmr = __mlx5_mr_lookup(dev->mdev,
- mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
- if (unlikely(!mmr)) {
- read_unlock(&dev->mdev->priv.mr_table.lock);
+ read_lock(&dev->mdev->priv.mkey_table.lock);
+ mmkey = __mlx5_mr_lookup(dev->mdev,
+ mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
+ if (unlikely(!mmkey)) {
+ read_unlock(&dev->mdev->priv.mkey_table.lock);
mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
return -EINVAL;
}
- mr = to_mibmr(mmr);
+ mr = to_mibmr(mmkey);
get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
mr->sig->sig_err_exists = true;
mr->sig->sigerr_count++;
@@ -544,25 +561,51 @@ repoll:
mr->sig->err_item.expected,
mr->sig->err_item.actual);
- read_unlock(&dev->mdev->priv.mr_table.lock);
+ read_unlock(&dev->mdev->priv.mkey_table.lock);
goto repoll;
}
return 0;
}
+static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries,
+ struct ib_wc *wc)
+{
+ struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
+ struct mlx5_ib_wc *soft_wc, *next;
+ int npolled = 0;
+
+ list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) {
+ if (npolled >= num_entries)
+ break;
+
+ mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n",
+ cq->mcq.cqn);
+
+ wc[npolled++] = soft_wc->wc;
+ list_del(&soft_wc->list);
+ kfree(soft_wc);
+ }
+
+ return npolled;
+}
+
int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
{
struct mlx5_ib_cq *cq = to_mcq(ibcq);
struct mlx5_ib_qp *cur_qp = NULL;
unsigned long flags;
+ int soft_polled = 0;
int npolled;
int err = 0;
spin_lock_irqsave(&cq->lock, flags);
- for (npolled = 0; npolled < num_entries; npolled++) {
- err = mlx5_poll_one(cq, &cur_qp, wc + npolled);
+ if (unlikely(!list_empty(&cq->wc_list)))
+ soft_polled = poll_soft_wc(cq, num_entries, wc);
+
+ for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
+ err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled);
if (err)
break;
}
@@ -573,7 +616,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
spin_unlock_irqrestore(&cq->lock, flags);
if (err == 0 || err == -EAGAIN)
- return npolled;
+ return soft_polled + npolled;
else
return err;
}
@@ -581,16 +624,27 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
void __iomem *uar_page = mdev->priv.uuari.uars[0].map;
+ unsigned long irq_flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&cq->lock, irq_flags);
+ if (cq->notify_flags != IB_CQ_NEXT_COMP)
+ cq->notify_flags = flags & IB_CQ_SOLICITED_MASK;
- mlx5_cq_arm(&to_mcq(ibcq)->mcq,
+ if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list))
+ ret = 1;
+ spin_unlock_irqrestore(&cq->lock, irq_flags);
+
+ mlx5_cq_arm(&cq->mcq,
(flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
uar_page,
MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock),
to_mcq(ibcq)->mcq.cons_index);
- return 0;
+ return ret;
}
static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
@@ -743,6 +797,14 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
mlx5_db_free(dev->mdev, &cq->db);
}
+static void notify_soft_wc_handler(struct work_struct *work)
+{
+ struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq,
+ notify_work);
+
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
@@ -760,12 +822,12 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
int eqn;
int err;
- if (attr->flags)
- return ERR_PTR(-EINVAL);
-
if (entries < 0)
return ERR_PTR(-EINVAL);
+ if (check_cq_create_flags(attr->flags))
+ return ERR_PTR(-EOPNOTSUPP);
+
entries = roundup_pow_of_two(entries + 1);
if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
return ERR_PTR(-EINVAL);
@@ -779,6 +841,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
spin_lock_init(&cq->lock);
cq->resize_buf = NULL;
cq->resize_umem = NULL;
+ cq->create_flags = attr->flags;
if (context) {
err = create_cq_user(dev, udata, context, cq, entries,
@@ -792,10 +855,16 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
&index, &inlen);
if (err)
goto err_create;
+
+ INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
}
cq->cqe_size = cqe_size;
cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+
+ if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
+ cqb->ctx.cqe_sz_flags |= (1 << 1);
+
cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
if (err)
@@ -813,6 +882,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
cq->mcq.comp = mlx5_ib_cq_comp;
cq->mcq.event = mlx5_ib_cq_event;
+ INIT_LIST_HEAD(&cq->wc_list);
+
if (context)
if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
err = -EFAULT;
@@ -1200,3 +1271,27 @@ int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
cq = to_mcq(ibcq);
return cq->cqe_size;
}
+
+/* Called from atomic context */
+int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc)
+{
+ struct mlx5_ib_wc *soft_wc;
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
+ unsigned long flags;
+
+ soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC);
+ if (!soft_wc)
+ return -ENOMEM;
+
+ soft_wc->wc = *wc;
+ spin_lock_irqsave(&cq->lock, flags);
+ list_add_tail(&soft_wc->list, &cq->wc_list);
+ if (cq->notify_flags == IB_CQ_NEXT_COMP ||
+ wc->status != IB_WC_SUCCESS) {
+ cq->notify_flags = 0;
+ schedule_work(&cq->notify_work);
+ }
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
new file mode 100644
index 000000000000..53e03c8ede79
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mlx5_ib.h"
+
+struct mlx5_ib_gsi_wr {
+ struct ib_cqe cqe;
+ struct ib_wc wc;
+ int send_flags;
+ bool completed:1;
+};
+
+struct mlx5_ib_gsi_qp {
+ struct ib_qp ibqp;
+ struct ib_qp *rx_qp;
+ u8 port_num;
+ struct ib_qp_cap cap;
+ enum ib_sig_type sq_sig_type;
+ /* Serialize qp state modifications */
+ struct mutex mutex;
+ struct ib_cq *cq;
+ struct mlx5_ib_gsi_wr *outstanding_wrs;
+ u32 outstanding_pi, outstanding_ci;
+ int num_qps;
+ /* Protects access to the tx_qps. Post send operations synchronize
+ * with tx_qp creation in setup_qp(). Also protects the
+ * outstanding_wrs array and indices.
+ */
+ spinlock_t lock;
+ struct ib_qp **tx_qps;
+};
+
+static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp)
+{
+ return container_of(qp, struct mlx5_ib_gsi_qp, ibqp);
+}
+
+static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
+{
+ return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
+}
+
+static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index)
+{
+ return ++index % gsi->cap.max_send_wr;
+}
+
+#define for_each_outstanding_wr(gsi, index) \
+ for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \
+ index = next_outstanding(gsi, index))
+
+/* Call with gsi->lock locked */
+static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
+{
+ struct ib_cq *gsi_cq = gsi->ibqp.send_cq;
+ struct mlx5_ib_gsi_wr *wr;
+ u32 index;
+
+ for_each_outstanding_wr(gsi, index) {
+ wr = &gsi->outstanding_wrs[index];
+
+ if (!wr->completed)
+ break;
+
+ if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR ||
+ wr->send_flags & IB_SEND_SIGNALED)
+ WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
+
+ wr->completed = false;
+ }
+
+ gsi->outstanding_ci = index;
+}
+
+static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
+ struct mlx5_ib_gsi_wr *wr =
+ container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
+ u64 wr_id;
+ unsigned long flags;
+
+ spin_lock_irqsave(&gsi->lock, flags);
+ wr->completed = true;
+ wr_id = wr->wc.wr_id;
+ wr->wc = *wc;
+ wr->wc.wr_id = wr_id;
+ wr->wc.qp = &gsi->ibqp;
+
+ generate_completions(gsi);
+ spin_unlock_irqrestore(&gsi->lock, flags);
+}
+
+struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_gsi_qp *gsi;
+ struct ib_qp_init_attr hw_init_attr = *init_attr;
+ const u8 port_num = init_attr->port_num;
+ const int num_pkeys = pd->device->attrs.max_pkeys;
+ const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0;
+ int ret;
+
+ mlx5_ib_dbg(dev, "creating GSI QP\n");
+
+ if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) {
+ mlx5_ib_warn(dev,
+ "invalid port number %d during GSI QP creation\n",
+ port_num);
+ return ERR_PTR(-EINVAL);
+ }
+
+ gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
+ if (!gsi)
+ return ERR_PTR(-ENOMEM);
+
+ gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
+ if (!gsi->tx_qps) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+
+ gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr,
+ sizeof(*gsi->outstanding_wrs),
+ GFP_KERNEL);
+ if (!gsi->outstanding_wrs) {
+ ret = -ENOMEM;
+ goto err_free_tx;
+ }
+
+ mutex_init(&gsi->mutex);
+
+ mutex_lock(&dev->devr.mutex);
+
+ if (dev->devr.ports[port_num - 1].gsi) {
+ mlx5_ib_warn(dev, "GSI QP already exists on port %d\n",
+ port_num);
+ ret = -EBUSY;
+ goto err_free_wrs;
+ }
+ gsi->num_qps = num_qps;
+ spin_lock_init(&gsi->lock);
+
+ gsi->cap = init_attr->cap;
+ gsi->sq_sig_type = init_attr->sq_sig_type;
+ gsi->ibqp.qp_num = 1;
+ gsi->port_num = port_num;
+
+ gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0,
+ IB_POLL_SOFTIRQ);
+ if (IS_ERR(gsi->cq)) {
+ mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
+ PTR_ERR(gsi->cq));
+ ret = PTR_ERR(gsi->cq);
+ goto err_free_wrs;
+ }
+
+ hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI;
+ hw_init_attr.send_cq = gsi->cq;
+ if (num_qps) {
+ hw_init_attr.cap.max_send_wr = 0;
+ hw_init_attr.cap.max_send_sge = 0;
+ hw_init_attr.cap.max_inline_data = 0;
+ }
+ gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
+ if (IS_ERR(gsi->rx_qp)) {
+ mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
+ PTR_ERR(gsi->rx_qp));
+ ret = PTR_ERR(gsi->rx_qp);
+ goto err_destroy_cq;
+ }
+
+ dev->devr.ports[init_attr->port_num - 1].gsi = gsi;
+
+ mutex_unlock(&dev->devr.mutex);
+
+ return &gsi->ibqp;
+
+err_destroy_cq:
+ ib_free_cq(gsi->cq);
+err_free_wrs:
+ mutex_unlock(&dev->devr.mutex);
+ kfree(gsi->outstanding_wrs);
+err_free_tx:
+ kfree(gsi->tx_qps);
+err_free:
+ kfree(gsi);
+ return ERR_PTR(ret);
+}
+
+int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ const int port_num = gsi->port_num;
+ int qp_index;
+ int ret;
+
+ mlx5_ib_dbg(dev, "destroying GSI QP\n");
+
+ mutex_lock(&dev->devr.mutex);
+ ret = ib_destroy_qp(gsi->rx_qp);
+ if (ret) {
+ mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
+ ret);
+ mutex_unlock(&dev->devr.mutex);
+ return ret;
+ }
+ dev->devr.ports[port_num - 1].gsi = NULL;
+ mutex_unlock(&dev->devr.mutex);
+ gsi->rx_qp = NULL;
+
+ for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) {
+ if (!gsi->tx_qps[qp_index])
+ continue;
+ WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index]));
+ gsi->tx_qps[qp_index] = NULL;
+ }
+
+ ib_free_cq(gsi->cq);
+
+ kfree(gsi->outstanding_wrs);
+ kfree(gsi->tx_qps);
+ kfree(gsi);
+
+ return 0;
+}
+
+static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
+{
+ struct ib_pd *pd = gsi->rx_qp->pd;
+ struct ib_qp_init_attr init_attr = {
+ .event_handler = gsi->rx_qp->event_handler,
+ .qp_context = gsi->rx_qp->qp_context,
+ .send_cq = gsi->cq,
+ .recv_cq = gsi->rx_qp->recv_cq,
+ .cap = {
+ .max_send_wr = gsi->cap.max_send_wr,
+ .max_send_sge = gsi->cap.max_send_sge,
+ .max_inline_data = gsi->cap.max_inline_data,
+ },
+ .sq_sig_type = gsi->sq_sig_type,
+ .qp_type = IB_QPT_UD,
+ .create_flags = mlx5_ib_create_qp_sqpn_qp1(),
+ };
+
+ return ib_create_qp(pd, &init_attr);
+}
+
+static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
+ u16 qp_index)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct ib_qp_attr attr;
+ int mask;
+ int ret;
+
+ mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT;
+ attr.qp_state = IB_QPS_INIT;
+ attr.pkey_index = qp_index;
+ attr.qkey = IB_QP1_QKEY;
+ attr.port_num = gsi->port_num;
+ ret = ib_modify_qp(qp, &attr, mask);
+ if (ret) {
+ mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n",
+ qp->qp_num, ret);
+ return ret;
+ }
+
+ attr.qp_state = IB_QPS_RTR;
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret) {
+ mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n",
+ qp->qp_num, ret);
+ return ret;
+ }
+
+ attr.qp_state = IB_QPS_RTS;
+ attr.sq_psn = 0;
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
+ if (ret) {
+ mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n",
+ qp->qp_num, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
+{
+ struct ib_device *device = gsi->rx_qp->device;
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct ib_qp *qp;
+ unsigned long flags;
+ u16 pkey;
+ int ret;
+
+ ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey);
+ if (ret) {
+ mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n",
+ gsi->port_num, qp_index);
+ return;
+ }
+
+ if (!pkey) {
+ mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d. Skipping.\n",
+ gsi->port_num, qp_index);
+ return;
+ }
+
+ spin_lock_irqsave(&gsi->lock, flags);
+ qp = gsi->tx_qps[qp_index];
+ spin_unlock_irqrestore(&gsi->lock, flags);
+ if (qp) {
+ mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n",
+ gsi->port_num, qp_index);
+ return;
+ }
+
+ qp = create_gsi_ud_qp(gsi);
+ if (IS_ERR(qp)) {
+ mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n",
+ PTR_ERR(qp));
+ return;
+ }
+
+ ret = modify_to_rts(gsi, qp, qp_index);
+ if (ret)
+ goto err_destroy_qp;
+
+ spin_lock_irqsave(&gsi->lock, flags);
+ WARN_ON_ONCE(gsi->tx_qps[qp_index]);
+ gsi->tx_qps[qp_index] = qp;
+ spin_unlock_irqrestore(&gsi->lock, flags);
+
+ return;
+
+err_destroy_qp:
+ WARN_ON_ONCE(qp);
+}
+
+static void setup_qps(struct mlx5_ib_gsi_qp *gsi)
+{
+ u16 qp_index;
+
+ for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
+ setup_qp(gsi, qp_index);
+}
+
+int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ int ret;
+
+ mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
+
+ mutex_lock(&gsi->mutex);
+ ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
+ if (ret) {
+ mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
+ goto unlock;
+ }
+
+ if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS)
+ setup_qps(gsi);
+
+unlock:
+ mutex_unlock(&gsi->mutex);
+
+ return ret;
+}
+
+int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ int ret;
+
+ mutex_lock(&gsi->mutex);
+ ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
+ qp_init_attr->cap = gsi->cap;
+ mutex_unlock(&gsi->mutex);
+
+ return ret;
+}
+
+/* Call with gsi->lock locked */
+static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
+ struct ib_ud_wr *wr, struct ib_wc *wc)
+{
+ struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
+ struct mlx5_ib_gsi_wr *gsi_wr;
+
+ if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) {
+ mlx5_ib_warn(dev, "no available GSI work request.\n");
+ return -ENOMEM;
+ }
+
+ gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi];
+ gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi);
+
+ if (!wc) {
+ memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
+ gsi_wr->wc.pkey_index = wr->pkey_index;
+ gsi_wr->wc.wr_id = wr->wr.wr_id;
+ } else {
+ gsi_wr->wc = *wc;
+ gsi_wr->completed = true;
+ }
+
+ gsi_wr->cqe.done = &handle_single_completion;
+ wr->wr.wr_cqe = &gsi_wr->cqe;
+
+ return 0;
+}
+
+/* Call with gsi->lock locked */
+static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
+ struct ib_ud_wr *wr)
+{
+ struct ib_wc wc = {
+ { .wr_id = wr->wr.wr_id },
+ .status = IB_WC_SUCCESS,
+ .opcode = IB_WC_SEND,
+ .qp = &gsi->ibqp,
+ };
+ int ret;
+
+ ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc);
+ if (ret)
+ return ret;
+
+ generate_completions(gsi);
+
+ return 0;
+}
+
+/* Call with gsi->lock locked */
+static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
+ int qp_index = wr->pkey_index;
+
+ if (!mlx5_ib_deth_sqpn_cap(dev))
+ return gsi->rx_qp;
+
+ if (qp_index >= gsi->num_qps)
+ return NULL;
+
+ return gsi->tx_qps[qp_index];
+}
+
+int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct ib_qp *tx_qp;
+ unsigned long flags;
+ int ret;
+
+ for (; wr; wr = wr->next) {
+ struct ib_ud_wr cur_wr = *ud_wr(wr);
+
+ cur_wr.wr.next = NULL;
+
+ spin_lock_irqsave(&gsi->lock, flags);
+ tx_qp = get_tx_qp(gsi, &cur_wr);
+ if (!tx_qp) {
+ ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr);
+ if (ret)
+ goto err;
+ spin_unlock_irqrestore(&gsi->lock, flags);
+ continue;
+ }
+
+ ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL);
+ if (ret)
+ goto err;
+
+ ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
+ if (ret) {
+ /* Undo the effect of adding the outstanding wr */
+ gsi->outstanding_pi = (gsi->outstanding_pi - 1) %
+ gsi->cap.max_send_wr;
+ goto err;
+ }
+ spin_unlock_irqrestore(&gsi->lock, flags);
+ }
+
+ return 0;
+
+err:
+ spin_unlock_irqrestore(&gsi->lock, flags);
+ *bad_wr = wr;
+ return ret;
+}
+
+int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+
+ return ib_post_recv(gsi->rx_qp, wr, bad_wr);
+}
+
+void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
+{
+ if (!gsi)
+ return;
+
+ mutex_lock(&gsi->mutex);
+ setup_qps(gsi);
+ mutex_unlock(&gsi->mutex);
+}
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index b84d13a487cc..41d8a0036465 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -31,8 +31,10 @@
*/
#include <linux/mlx5/cmd.h>
+#include <linux/mlx5/vport.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
+#include <rdma/ib_pma.h>
#include "mlx5_ib.h"
enum {
@@ -57,20 +59,12 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port);
}
-int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
- const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index)
+static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad)
{
u16 slid;
int err;
- const struct ib_mad *in_mad = (const struct ib_mad *)in;
- struct ib_mad *out_mad = (struct ib_mad *)out;
-
- if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
- *out_mad_size != sizeof(*out_mad)))
- return IB_MAD_RESULT_FAILURE;
slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
@@ -117,6 +111,156 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
+static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext,
+ void *out)
+{
+#define MLX5_SUM_CNT(p, cntr1, cntr2) \
+ (MLX5_GET64(query_vport_counter_out, p, cntr1) + \
+ MLX5_GET64(query_vport_counter_out, p, cntr2))
+
+ pma_cnt_ext->port_xmit_data =
+ cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets,
+ transmitted_ib_multicast.octets) >> 2);
+ pma_cnt_ext->port_xmit_data =
+ cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets,
+ received_ib_multicast.octets) >> 2);
+ pma_cnt_ext->port_xmit_packets =
+ cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.packets,
+ transmitted_ib_multicast.packets));
+ pma_cnt_ext->port_rcv_packets =
+ cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.packets,
+ received_ib_multicast.packets));
+ pma_cnt_ext->port_unicast_xmit_packets =
+ MLX5_GET64_BE(query_vport_counter_out,
+ out, transmitted_ib_unicast.packets);
+ pma_cnt_ext->port_unicast_rcv_packets =
+ MLX5_GET64_BE(query_vport_counter_out,
+ out, received_ib_unicast.packets);
+ pma_cnt_ext->port_multicast_xmit_packets =
+ MLX5_GET64_BE(query_vport_counter_out,
+ out, transmitted_ib_multicast.packets);
+ pma_cnt_ext->port_multicast_rcv_packets =
+ MLX5_GET64_BE(query_vport_counter_out,
+ out, received_ib_multicast.packets);
+}
+
+static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
+ void *out)
+{
+ /* Traffic counters will be reported in
+ * their 64bit form via ib_pma_portcounters_ext by default.
+ */
+ void *out_pma = MLX5_ADDR_OF(ppcnt_reg, out,
+ counter_set);
+
+#define MLX5_ASSIGN_PMA_CNTR(counter_var, counter_name) { \
+ counter_var = MLX5_GET_BE(typeof(counter_var), \
+ ib_port_cntrs_grp_data_layout, \
+ out_pma, counter_name); \
+ }
+
+ MLX5_ASSIGN_PMA_CNTR(pma_cnt->symbol_error_counter,
+ symbol_error_counter);
+ MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_error_recovery_counter,
+ link_error_recovery_counter);
+ MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_downed_counter,
+ link_downed_counter);
+ MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_errors,
+ port_rcv_errors);
+ MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_remphys_errors,
+ port_rcv_remote_physical_errors);
+ MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_switch_relay_errors,
+ port_rcv_switch_relay_errors);
+ MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_discards,
+ port_xmit_discards);
+ MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_constraint_errors,
+ port_xmit_constraint_errors);
+ MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_constraint_errors,
+ port_rcv_constraint_errors);
+ MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_overrun_errors,
+ link_overrun_errors);
+ MLX5_ASSIGN_PMA_CNTR(pma_cnt->vl15_dropped,
+ vl_15_dropped);
+}
+
+static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ int err;
+ void *out_cnt;
+
+ /* Decalring support of extended counters */
+ if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) {
+ struct ib_class_port_info cpi = {};
+
+ cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+ memcpy((out_mad->data + 40), &cpi, sizeof(cpi));
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ }
+
+ if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) {
+ struct ib_pma_portcounters_ext *pma_cnt_ext =
+ (struct ib_pma_portcounters_ext *)(out_mad->data + 40);
+ int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+
+ out_cnt = mlx5_vzalloc(sz);
+ if (!out_cnt)
+ return IB_MAD_RESULT_FAILURE;
+
+ err = mlx5_core_query_vport_counter(dev->mdev, 0,
+ port_num, out_cnt, sz);
+ if (!err)
+ pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
+ } else {
+ struct ib_pma_portcounters *pma_cnt =
+ (struct ib_pma_portcounters *)(out_mad->data + 40);
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+ out_cnt = mlx5_vzalloc(sz);
+ if (!out_cnt)
+ return IB_MAD_RESULT_FAILURE;
+
+ err = mlx5_core_query_ib_ppcnt(dev->mdev, port_num,
+ out_cnt, sz);
+ if (!err)
+ pma_cnt_assign(pma_cnt, out_cnt);
+ }
+
+ kvfree(out_cnt);
+ if (err)
+ return IB_MAD_RESULT_FAILURE;
+
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+ const struct ib_mad_hdr *in, size_t in_mad_size,
+ struct ib_mad_hdr *out, size_t *out_mad_size,
+ u16 *out_mad_pkey_index)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ const struct ib_mad *in_mad = (const struct ib_mad *)in;
+ struct ib_mad *out_mad = (struct ib_mad *)out;
+
+ if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
+ *out_mad_size != sizeof(*out_mad)))
+ return IB_MAD_RESULT_FAILURE;
+
+ memset(out_mad->data, 0, sizeof(out_mad->data));
+
+ if (MLX5_CAP_GEN(mdev, vport_counters) &&
+ in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
+ in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) {
+ return process_pma_cmd(ibdev, port_num, in_mad, out_mad);
+ } else {
+ return process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
+ in_mad, out_mad);
+ }
+}
+
int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
{
struct ib_smp *in_mad = NULL;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b0ec175cc6ba..5afbb697e691 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -40,6 +40,8 @@
#include <linux/io-mapping.h>
#include <linux/sched.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
#include <linux/mlx5/vport.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
@@ -66,12 +68,14 @@ static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
+enum {
+ MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
+};
+
static enum rdma_link_layer
-mlx5_ib_port_link_layer(struct ib_device *device)
+mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
{
- struct mlx5_ib_dev *dev = to_mdev(device);
-
- switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
+ switch (port_type_cap) {
case MLX5_CAP_PORT_TYPE_IB:
return IB_LINK_LAYER_INFINIBAND;
case MLX5_CAP_PORT_TYPE_ETH:
@@ -81,6 +85,202 @@ mlx5_ib_port_link_layer(struct ib_device *device)
}
}
+static enum rdma_link_layer
+mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
+
+ return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+}
+
+static int mlx5_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
+ roce.nb);
+
+ if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER))
+ return NOTIFY_DONE;
+
+ write_lock(&ibdev->roce.netdev_lock);
+ if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
+ ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev;
+ write_unlock(&ibdev->roce.netdev_lock);
+
+ return NOTIFY_DONE;
+}
+
+static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
+ u8 port_num)
+{
+ struct mlx5_ib_dev *ibdev = to_mdev(device);
+ struct net_device *ndev;
+
+ /* Ensure ndev does not disappear before we invoke dev_hold()
+ */
+ read_lock(&ibdev->roce.netdev_lock);
+ ndev = ibdev->roce.netdev;
+ if (ndev)
+ dev_hold(ndev);
+ read_unlock(&ibdev->roce.netdev_lock);
+
+ return ndev;
+}
+
+static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+ struct ib_port_attr *props)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct net_device *ndev;
+ enum ib_mtu ndev_ib_mtu;
+ u16 qkey_viol_cntr;
+
+ memset(props, 0, sizeof(*props));
+
+ props->port_cap_flags |= IB_PORT_CM_SUP;
+ props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
+
+ props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
+ roce_address_table_size);
+ props->max_mtu = IB_MTU_4096;
+ props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
+ props->pkey_tbl_len = 1;
+ props->state = IB_PORT_DOWN;
+ props->phys_state = 3;
+
+ mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
+ props->qkey_viol_cntr = qkey_viol_cntr;
+
+ ndev = mlx5_ib_get_netdev(device, port_num);
+ if (!ndev)
+ return 0;
+
+ if (netif_running(ndev) && netif_carrier_ok(ndev)) {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = 5;
+ }
+
+ ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
+
+ dev_put(ndev);
+
+ props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
+
+ props->active_width = IB_WIDTH_4X; /* TODO */
+ props->active_speed = IB_SPEED_QDR; /* TODO */
+
+ return 0;
+}
+
+static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ void *mlx5_addr)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+ char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+ source_l3_address);
+ void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+ source_mac_47_32);
+
+ if (!gid)
+ return;
+
+ ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
+
+ if (is_vlan_dev(attr->ndev)) {
+ MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
+ MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
+ }
+
+ switch (attr->gid_type) {
+ case IB_GID_TYPE_IB:
+ MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
+ break;
+ case IB_GID_TYPE_ROCE_UDP_ENCAP:
+ MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
+ break;
+
+ default:
+ WARN_ON(true);
+ }
+
+ if (attr->gid_type != IB_GID_TYPE_IB) {
+ if (ipv6_addr_v4mapped((void *)gid))
+ MLX5_SET_RA(mlx5_addr, roce_l3_type,
+ MLX5_ROCE_L3_TYPE_IPV4);
+ else
+ MLX5_SET_RA(mlx5_addr, roce_l3_type,
+ MLX5_ROCE_L3_TYPE_IPV6);
+ }
+
+ if ((attr->gid_type == IB_GID_TYPE_IB) ||
+ !ipv6_addr_v4mapped((void *)gid))
+ memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
+ else
+ memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
+}
+
+static int set_roce_addr(struct ib_device *device, u8 port_num,
+ unsigned int index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ u32 in[MLX5_ST_SZ_DW(set_roce_address_in)];
+ u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
+ void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
+
+ if (ll != IB_LINK_LAYER_ETHERNET)
+ return -EINVAL;
+
+ memset(in, 0, sizeof(in));
+
+ ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
+
+ MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+ MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+
+ memset(out, 0, sizeof(out));
+ return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ __always_unused void **context)
+{
+ return set_roce_addr(device, port_num, index, gid, attr);
+}
+
+static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, __always_unused void **context)
+{
+ return set_roce_addr(device, port_num, index, NULL, NULL);
+}
+
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+ int index)
+{
+ struct ib_gid_attr attr;
+ union ib_gid gid;
+
+ if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
+ return 0;
+
+ if (!attr.ndev)
+ return 0;
+
+ dev_put(attr.ndev);
+
+ if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ return 0;
+
+ return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
+}
+
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
return !dev->mdev->issi;
@@ -97,13 +297,35 @@ static int mlx5_get_vport_access_method(struct ib_device *ibdev)
if (mlx5_use_mad_ifc(to_mdev(ibdev)))
return MLX5_VPORT_ACCESS_METHOD_MAD;
- if (mlx5_ib_port_link_layer(ibdev) ==
+ if (mlx5_ib_port_link_layer(ibdev, 1) ==
IB_LINK_LAYER_ETHERNET)
return MLX5_VPORT_ACCESS_METHOD_NIC;
return MLX5_VPORT_ACCESS_METHOD_HCA;
}
+static void get_atomic_caps(struct mlx5_ib_dev *dev,
+ struct ib_device_attr *props)
+{
+ u8 tmp;
+ u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
+ u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
+ u8 atomic_req_8B_endianness_mode =
+ MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
+
+ /* Check if HW supports 8 bytes standard atomic operations and capable
+ * of host endianness respond
+ */
+ tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
+ if (((atomic_operations & tmp) == tmp) &&
+ (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) &&
+ (atomic_req_8B_endianness_mode)) {
+ props->atomic_cap = IB_ATOMIC_HCA;
+ } else {
+ props->atomic_cap = IB_ATOMIC_NONE;
+ }
+}
+
static int mlx5_query_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid)
{
@@ -119,13 +341,21 @@ static int mlx5_query_system_image_guid(struct ib_device *ibdev,
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
- if (!err)
- *sys_image_guid = cpu_to_be64(tmp);
- return err;
+ break;
+
+ case MLX5_VPORT_ACCESS_METHOD_NIC:
+ err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
+ break;
default:
return -EINVAL;
}
+
+ if (!err)
+ *sys_image_guid = cpu_to_be64(tmp);
+
+ return err;
+
}
static int mlx5_query_max_pkeys(struct ib_device *ibdev,
@@ -179,13 +409,20 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
- if (!err)
- *node_guid = cpu_to_be64(tmp);
- return err;
+ break;
+
+ case MLX5_VPORT_ACCESS_METHOD_NIC:
+ err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
+ break;
default:
return -EINVAL;
}
+
+ if (!err)
+ *node_guid = cpu_to_be64(tmp);
+
+ return err;
}
struct mlx5_reg_node_desc {
@@ -250,6 +487,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
if (MLX5_CAP_GEN(mdev, xrc))
props->device_cap_flags |= IB_DEVICE_XRC;
+ if (MLX5_CAP_GEN(mdev, imaicl)) {
+ props->device_cap_flags |= IB_DEVICE_MEM_WINDOW |
+ IB_DEVICE_MEM_WINDOW_TYPE_2B;
+ props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
+ /* We support 'Gappy' memory registration too */
+ props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
+ }
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, sho)) {
props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
@@ -263,6 +507,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, block_lb_mc))
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ (MLX5_CAP_ETH(dev->mdev, csum_cap)))
+ props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+
+ if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
+ props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
+ props->device_cap_flags |= IB_DEVICE_UD_TSO;
+ }
+
props->vendor_part_id = mdev->pdev->device;
props->hw_ver = mdev->pdev->revision;
@@ -278,7 +531,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_sge = min(max_rq_sg, max_sq_sg);
props->max_sge_rd = props->max_sge;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
- props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1;
+ props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
@@ -288,14 +541,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
props->max_srq_sge = max_rq_sg - 1;
- props->max_fast_reg_page_list_len = (unsigned int)-1;
- props->atomic_cap = IB_ATOMIC_NONE;
+ props->max_fast_reg_page_list_len =
+ 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
+ get_atomic_caps(dev, props);
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
+ props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
+ props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (MLX5_CAP_GEN(mdev, pg))
@@ -303,6 +559,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->odp_caps = dev->odp_caps;
#endif
+ if (MLX5_CAP_GEN(mdev, cd))
+ props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
+
return 0;
}
@@ -483,6 +742,9 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
case MLX5_VPORT_ACCESS_METHOD_HCA:
return mlx5_query_hca_port(ibdev, port, props);
+ case MLX5_VPORT_ACCESS_METHOD_NIC:
+ return mlx5_query_port_roce(ibdev, port, props);
+
default:
return -EINVAL;
}
@@ -583,8 +845,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_alloc_ucontext_req_v2 req;
- struct mlx5_ib_alloc_ucontext_resp resp;
+ struct mlx5_ib_alloc_ucontext_req_v2 req = {};
+ struct mlx5_ib_alloc_ucontext_resp resp = {};
struct mlx5_ib_ucontext *context;
struct mlx5_uuar_info *uuari;
struct mlx5_uar *uars;
@@ -595,24 +857,28 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
int err;
int i;
size_t reqlen;
+ size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
+ max_cqe_version);
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
- memset(&req, 0, sizeof(req));
+ if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr))
+ return ERR_PTR(-EINVAL);
+
reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
ver = 0;
- else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
+ else if (reqlen >= min_req_v2)
ver = 2;
else
return ERR_PTR(-EINVAL);
- err = ib_copy_from_udata(&req, udata, reqlen);
+ err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
if (err)
return ERR_PTR(err);
- if (req.flags || req.reserved)
+ if (req.flags)
return ERR_PTR(-EINVAL);
if (req.total_num_uuars > MLX5_MAX_UUARS)
@@ -621,6 +887,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (req.total_num_uuars == 0)
return ERR_PTR(-EINVAL);
+ if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (reqlen > sizeof(req) &&
+ !ib_is_udata_cleared(udata, sizeof(req),
+ reqlen - sizeof(req)))
+ return ERR_PTR(-EOPNOTSUPP);
+
req.total_num_uuars = ALIGN(req.total_num_uuars,
MLX5_NON_FP_BF_REGS_PER_PAGE);
if (req.num_low_latency_uuars > req.total_num_uuars - 1)
@@ -636,6 +910,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ resp.cqe_version = min_t(__u8,
+ (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
+ req.max_cqe_version);
+ resp.response_length = min(offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length), udata->outlen);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
@@ -681,22 +960,49 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
+ if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
+ err = mlx5_core_alloc_transport_domain(dev->mdev,
+ &context->tdn);
+ if (err)
+ goto out_uars;
+ }
+
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
resp.tot_uuars = req.total_num_uuars;
resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
- err = ib_copy_to_udata(udata, &resp,
- sizeof(resp) - sizeof(resp.reserved));
+
+ if (field_avail(typeof(resp), cqe_version, udata->outlen))
+ resp.response_length += sizeof(resp.cqe_version);
+
+ if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
+ resp.comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+ resp.hca_core_clock_offset =
+ offsetof(struct mlx5_init_seg, internal_timer_h) %
+ PAGE_SIZE;
+ resp.response_length += sizeof(resp.hca_core_clock_offset) +
+ sizeof(resp.reserved2) +
+ sizeof(resp.reserved3);
+ }
+
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
- goto out_uars;
+ goto out_td;
uuari->ver = ver;
uuari->num_low_latency_uuars = req.num_low_latency_uuars;
uuari->uars = uars;
uuari->num_uars = num_uars;
+ context->cqe_version = resp.cqe_version;
+
return &context->ibucontext;
+out_td:
+ if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
out_uars:
for (i--; i >= 0; i--)
mlx5_cmd_free_uar(dev->mdev, uars[i].index);
@@ -721,6 +1027,9 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
struct mlx5_uuar_info *uuari = &context->uuari;
int i;
+ if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
for (i = 0; i < uuari->num_uars; i++) {
if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
@@ -790,6 +1099,30 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
return -ENOSYS;
+ case MLX5_IB_MMAP_CORE_CLOCK:
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+ return -EPERM;
+
+ /* Don't expose to user-space information it shouldn't have */
+ if (PAGE_SIZE > 4096)
+ return -EOPNOTSUPP;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ pfn = (dev->mdev->iseg_base +
+ offsetof(struct mlx5_init_seg, internal_timer_h)) >>
+ PAGE_SHIFT;
+ if (io_remap_pfn_range(vma, vma->vm_start, pfn,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+
+ mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
+ vma->vm_start,
+ (unsigned long long)pfn << PAGE_SHIFT);
+ break;
+
default:
return -EINVAL;
}
@@ -1049,11 +1382,20 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
return 0;
}
+static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
+{
+ priority *= 2;
+ if (!dont_trap)
+ priority++;
+ return priority;
+}
+
#define MLX5_FS_MAX_TYPES 10
#define MLX5_FS_MAX_ENTRIES 32000UL
static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
struct ib_flow_attr *flow_attr)
{
+ bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio;
struct mlx5_flow_table *ft;
@@ -1063,10 +1405,12 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
int err = 0;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- if (flow_is_multicast_only(flow_attr))
+ if (flow_is_multicast_only(flow_attr) &&
+ !dont_trap)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
- priority = flow_attr->priority;
+ priority = ib_prio_to_core_prio(flow_attr->priority,
+ dont_trap);
ns = mlx5_get_flow_namespace(dev->mdev,
MLX5_FLOW_NAMESPACE_BYPASS);
num_entries = MLX5_FS_MAX_ENTRIES;
@@ -1114,6 +1458,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
unsigned int spec_index;
u32 *match_c;
u32 *match_v;
+ u32 action;
int err = 0;
if (!is_valid_attr(flow_attr))
@@ -1139,9 +1484,11 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
/* Outer header support only */
match_criteria_enable = (!outer_header_zero(match_c)) << 0;
+ action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
match_c, match_v,
- MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ action,
MLX5_FS_DEFAULT_FLOW_TAG,
dst);
@@ -1161,6 +1508,29 @@ free:
return err ? ERR_PTR(err) : handler;
}
+static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_ib_flow_handler *handler_dst = NULL;
+ struct mlx5_ib_flow_handler *handler = NULL;
+
+ handler = create_flow_rule(dev, ft_prio, flow_attr, NULL);
+ if (!IS_ERR(handler)) {
+ handler_dst = create_flow_rule(dev, ft_prio,
+ flow_attr, dst);
+ if (IS_ERR(handler_dst)) {
+ mlx5_del_flow_rule(handler->rule);
+ kfree(handler);
+ handler = handler_dst;
+ } else {
+ list_add(&handler_dst->list, &handler->list);
+ }
+ }
+
+ return handler;
+}
enum {
LEFTOVERS_MC,
LEFTOVERS_UC,
@@ -1238,7 +1608,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
if (domain != IB_FLOW_DOMAIN_USER ||
flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
- flow_attr->flags)
+ (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
return ERR_PTR(-EINVAL);
dst = kzalloc(sizeof(*dst), GFP_KERNEL);
@@ -1257,8 +1627,13 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- handler = create_flow_rule(dev, ft_prio, flow_attr,
- dst);
+ if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
+ handler = create_dont_trap_rule(dev, ft_prio,
+ flow_attr, dst);
+ } else {
+ handler = create_flow_rule(dev, ft_prio, flow_attr,
+ dst);
+ }
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
handler = create_leftovers_rule(dev, ft_prio, flow_attr,
@@ -1396,6 +1771,17 @@ static struct device_attribute *mlx5_class_attributes[] = {
&dev_attr_reg_pages,
};
+static void pkey_change_handler(struct work_struct *work)
+{
+ struct mlx5_ib_port_resources *ports =
+ container_of(work, struct mlx5_ib_port_resources,
+ pkey_change_work);
+
+ mutex_lock(&ports->devr->mutex);
+ mlx5_ib_gsi_pkey_change(ports->gsi);
+ mutex_unlock(&ports->devr->mutex);
+}
+
static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param)
{
@@ -1432,6 +1818,8 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
case MLX5_DEV_EVENT_PKEY_CHANGE:
ibev.event = IB_EVENT_PKEY_CHANGE;
port = (u8)param;
+
+ schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
break;
case MLX5_DEV_EVENT_GUID_CHANGE:
@@ -1518,7 +1906,7 @@ static void destroy_umrc_res(struct mlx5_ib_dev *dev)
mlx5_ib_warn(dev, "mr cache cleanup failed\n");
mlx5_ib_destroy_qp(dev->umrc.qp);
- ib_destroy_cq(dev->umrc.cq);
+ ib_free_cq(dev->umrc.cq);
ib_dealloc_pd(dev->umrc.pd);
}
@@ -1533,7 +1921,6 @@ static int create_umr_res(struct mlx5_ib_dev *dev)
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
- struct ib_cq_init_attr cq_attr = {};
int ret;
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
@@ -1550,15 +1937,12 @@ static int create_umr_res(struct mlx5_ib_dev *dev)
goto error_0;
}
- cq_attr.cqe = 128;
- cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL,
- &cq_attr);
+ cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
if (IS_ERR(cq)) {
mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
ret = PTR_ERR(cq);
goto error_2;
}
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
init_attr->send_cq = cq;
init_attr->recv_cq = cq;
@@ -1625,7 +2009,7 @@ error_4:
mlx5_ib_destroy_qp(qp);
error_3:
- ib_destroy_cq(cq);
+ ib_free_cq(cq);
error_2:
ib_dealloc_pd(pd);
@@ -1641,10 +2025,13 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
struct ib_srq_init_attr attr;
struct mlx5_ib_dev *dev;
struct ib_cq_init_attr cq_attr = {.cqe = 1};
+ int port;
int ret = 0;
dev = container_of(devr, struct mlx5_ib_dev, devr);
+ mutex_init(&devr->mutex);
+
devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
if (IS_ERR(devr->p0)) {
ret = PTR_ERR(devr->p0);
@@ -1732,6 +2119,12 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s0->usecnt, 0);
+ for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
+ INIT_WORK(&devr->ports[port].pkey_change_work,
+ pkey_change_handler);
+ devr->ports[port].devr = devr;
+ }
+
return 0;
error5:
@@ -1750,12 +2143,46 @@ error0:
static void destroy_dev_resources(struct mlx5_ib_resources *devr)
{
+ struct mlx5_ib_dev *dev =
+ container_of(devr, struct mlx5_ib_dev, devr);
+ int port;
+
mlx5_ib_destroy_srq(devr->s1);
mlx5_ib_destroy_srq(devr->s0);
mlx5_ib_dealloc_xrcd(devr->x0);
mlx5_ib_dealloc_xrcd(devr->x1);
mlx5_ib_destroy_cq(devr->c0);
mlx5_ib_dealloc_pd(devr->p0);
+
+ /* Make sure no change P_Key work items are still executing */
+ for (port = 0; port < dev->num_ports; ++port)
+ cancel_work_sync(&devr->ports[port].pkey_change_work);
+}
+
+static u32 get_core_cap_flags(struct ib_device *ibdev)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
+ u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
+ u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
+ u32 ret = 0;
+
+ if (ll == IB_LINK_LAYER_INFINIBAND)
+ return RDMA_CORE_PORT_IBA_IB;
+
+ if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
+ return 0;
+
+ if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
+ return 0;
+
+ if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
+ ret |= RDMA_CORE_PORT_IBA_ROCE;
+
+ if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
+ ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ return ret;
}
static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
@@ -1770,20 +2197,50 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+ immutable->core_cap_flags = get_core_cap_flags(ibdev);
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
+static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
+{
+ int err;
+
+ dev->roce.nb.notifier_call = mlx5_netdev_event;
+ err = register_netdevice_notifier(&dev->roce.nb);
+ if (err)
+ return err;
+
+ err = mlx5_nic_vport_enable_roce(dev->mdev);
+ if (err)
+ goto err_unregister_netdevice_notifier;
+
+ return 0;
+
+err_unregister_netdevice_notifier:
+ unregister_netdevice_notifier(&dev->roce.nb);
+ return err;
+}
+
+static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
+{
+ mlx5_nic_vport_disable_roce(dev->mdev);
+ unregister_netdevice_notifier(&dev->roce.nb);
+}
+
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
+ enum rdma_link_layer ll;
+ int port_type_cap;
int err;
int i;
- /* don't create IB instance over Eth ports, no RoCE yet! */
- if (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+ if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce))
return NULL;
printk_once(KERN_INFO "%s", mlx5_version);
@@ -1794,6 +2251,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->mdev = mdev;
+ rwlock_init(&dev->roce.netdev_lock);
err = get_port_caps(dev);
if (err)
goto err_dealloc;
@@ -1821,6 +2279,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_REREG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
@@ -1839,11 +2298,18 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
(1ull << IB_USER_VERBS_CMD_OPEN_QP);
dev->ib_dev.uverbs_ex_cmd_mask =
- (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
+ (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
dev->ib_dev.query_device = mlx5_ib_query_device;
dev->ib_dev.query_port = mlx5_ib_query_port;
+ dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
dev->ib_dev.query_gid = mlx5_ib_query_gid;
+ dev->ib_dev.add_gid = mlx5_ib_add_gid;
+ dev->ib_dev.del_gid = mlx5_ib_del_gid;
dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
dev->ib_dev.modify_device = mlx5_ib_modify_device;
dev->ib_dev.modify_port = mlx5_ib_modify_port;
@@ -1874,6 +2340,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
+ dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr;
dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
@@ -1885,6 +2352,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
mlx5_ib_internal_fill_odp_caps(dev);
+ if (MLX5_CAP_GEN(mdev, imaicl)) {
+ dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw;
+ dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw;
+ dev->ib_dev.uverbs_cmd_mask |=
+ (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
+ }
+
if (MLX5_CAP_GEN(mdev, xrc)) {
dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
@@ -1893,7 +2368,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
- if (mlx5_ib_port_link_layer(&dev->ib_dev) ==
+ if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) {
dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
@@ -1908,9 +2383,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ err = mlx5_enable_roce(dev);
+ if (err)
+ goto err_dealloc;
+ }
+
err = create_dev_resources(&dev->devr);
if (err)
- goto err_dealloc;
+ goto err_disable_roce;
err = mlx5_ib_odp_init_one(dev);
if (err)
@@ -1947,6 +2428,10 @@ err_odp:
err_rsrc:
destroy_dev_resources(&dev->devr);
+err_disable_roce:
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ mlx5_disable_roce(dev);
+
err_dealloc:
ib_dealloc_device((struct ib_device *)dev);
@@ -1956,11 +2441,14 @@ err_dealloc:
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
{
struct mlx5_ib_dev *dev = context;
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
ib_unregister_device(&dev->ib_dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ mlx5_disable_roce(dev);
ib_dealloc_device(&dev->ib_dev);
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 1474cccd1e0f..76b2b42e0535 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -42,6 +42,8 @@
#include <linux/mlx5/qp.h>
#include <linux/mlx5/srq.h>
#include <linux/types.h>
+#include <linux/mlx5/transobj.h>
+#include <rdma/ib_user_verbs.h>
#define mlx5_ib_dbg(dev, format, arg...) \
pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
@@ -55,6 +57,11 @@ pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
__LINE__, current->pid, ##arg)
+#define field_avail(type, fld, sz) (offsetof(type, fld) + \
+ sizeof(((type *)0)->fld) <= (sz))
+#define MLX5_IB_DEFAULT_UIDX 0xffffff
+#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
+
enum {
MLX5_IB_MMAP_CMD_SHIFT = 8,
MLX5_IB_MMAP_CMD_MASK = 0xff,
@@ -62,7 +69,9 @@ enum {
enum mlx5_ib_mmap_cmd {
MLX5_IB_MMAP_REGULAR_PAGE = 0,
- MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, /* always last */
+ MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
+ /* 5 is chosen in order to be compatible with old versions of libmlx5 */
+ MLX5_IB_MMAP_CORE_CLOCK = 5,
};
enum {
@@ -85,6 +94,15 @@ enum mlx5_ib_mad_ifc_flags {
MLX5_MAD_IFC_NET_VIEW = 4,
};
+enum {
+ MLX5_CROSS_CHANNEL_UUAR = 0,
+};
+
+enum {
+ MLX5_CQE_VERSION_V0,
+ MLX5_CQE_VERSION_V1,
+};
+
struct mlx5_ib_ucontext {
struct ib_ucontext ibucontext;
struct list_head db_page_list;
@@ -93,6 +111,9 @@ struct mlx5_ib_ucontext {
*/
struct mutex db_page_mutex;
struct mlx5_uuar_info uuari;
+ u8 cqe_version;
+ /* Transport Domain number */
+ u32 tdn;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -106,7 +127,7 @@ struct mlx5_ib_pd {
};
#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1)
-#define MLX5_IB_FLOW_LAST_PRIO (MLX5_IB_FLOW_MCAST_PRIO - 1)
+#define MLX5_IB_FLOW_LAST_PRIO (MLX5_BY_PASS_NUM_REGULAR_PRIOS - 1)
#if (MLX5_IB_FLOW_LAST_PRIO <= 0)
#error "Invalid number of bypass priorities"
#endif
@@ -142,9 +163,31 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START
#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1)
#define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2)
+
+#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 3)
+#define MLX5_IB_SEND_UMR_UPDATE_PD (IB_SEND_RESERVED_START << 4)
+#define MLX5_IB_SEND_UMR_UPDATE_ACCESS IB_SEND_RESERVED_END
+
#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1
+/*
+ * IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI
+ * creates the actual hardware QP.
+ */
+#define MLX5_IB_QPT_HW_GSI IB_QPT_RESERVED2
#define MLX5_IB_WR_UMR IB_WR_RESERVED1
+/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
+ *
+ * These flags are intended for internal use by the mlx5_ib driver, and they
+ * rely on the range reserved for that use in the ib_qp_create_flags enum.
+ */
+
+/* Create a UD QP whose source QP number is 1 */
+static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void)
+{
+ return IB_QP_CREATE_RESERVED_START;
+}
+
struct wr_list {
u16 opcode;
u16 next;
@@ -201,47 +244,70 @@ struct mlx5_ib_pfault {
struct mlx5_pagefault mpfault;
};
+struct mlx5_ib_ubuffer {
+ struct ib_umem *umem;
+ int buf_size;
+ u64 buf_addr;
+};
+
+struct mlx5_ib_qp_base {
+ struct mlx5_ib_qp *container_mibqp;
+ struct mlx5_core_qp mqp;
+ struct mlx5_ib_ubuffer ubuffer;
+};
+
+struct mlx5_ib_qp_trans {
+ struct mlx5_ib_qp_base base;
+ u16 xrcdn;
+ u8 alt_port;
+ u8 atomic_rd_en;
+ u8 resp_depth;
+};
+
struct mlx5_ib_rq {
+ struct mlx5_ib_qp_base base;
+ struct mlx5_ib_wq *rq;
+ struct mlx5_ib_ubuffer ubuffer;
+ struct mlx5_db *doorbell;
u32 tirn;
+ u8 state;
+};
+
+struct mlx5_ib_sq {
+ struct mlx5_ib_qp_base base;
+ struct mlx5_ib_wq *sq;
+ struct mlx5_ib_ubuffer ubuffer;
+ struct mlx5_db *doorbell;
+ u32 tisn;
+ u8 state;
};
struct mlx5_ib_raw_packet_qp {
+ struct mlx5_ib_sq sq;
struct mlx5_ib_rq rq;
};
struct mlx5_ib_qp {
struct ib_qp ibqp;
union {
- struct mlx5_core_qp mqp;
- struct mlx5_ib_raw_packet_qp raw_packet_qp;
+ struct mlx5_ib_qp_trans trans_qp;
+ struct mlx5_ib_raw_packet_qp raw_packet_qp;
};
-
struct mlx5_buf buf;
struct mlx5_db db;
struct mlx5_ib_wq rq;
- u32 doorbell_qpn;
u8 sq_signal_bits;
u8 fm_cache;
- int sq_max_wqes_per_wr;
- int sq_spare_wqes;
struct mlx5_ib_wq sq;
- struct ib_umem *umem;
- int buf_size;
-
/* serialize qp state modifications
*/
struct mutex mutex;
- u16 xrcdn;
u32 flags;
u8 port;
- u8 alt_port;
- u8 atomic_rd_en;
- u8 resp_depth;
u8 state;
- int mlx_type;
int wq_sig;
int scat_cqe;
int max_inline_data;
@@ -282,8 +348,14 @@ struct mlx5_ib_cq_buf {
};
enum mlx5_ib_qp_flags {
- MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0,
- MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1,
+ MLX5_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
+ MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
+ MLX5_IB_QP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL,
+ MLX5_IB_QP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND,
+ MLX5_IB_QP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV,
+ MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5,
+ /* QP uses 1 as its source QP number */
+ MLX5_IB_QP_SQPN_QP1 = 1 << 6,
};
struct mlx5_umr_wr {
@@ -326,6 +398,15 @@ struct mlx5_ib_cq {
struct mlx5_ib_cq_buf *resize_buf;
struct ib_umem *resize_umem;
int cqe_size;
+ u32 create_flags;
+ struct list_head wc_list;
+ enum ib_cq_notify_flags notify_flags;
+ struct work_struct notify_work;
+};
+
+struct mlx5_ib_wc {
+ struct ib_wc wc;
+ struct list_head list;
};
struct mlx5_ib_srq {
@@ -366,7 +447,8 @@ struct mlx5_ib_mr {
int ndescs;
int max_descs;
int desc_size;
- struct mlx5_core_mr mmr;
+ int access_mode;
+ struct mlx5_core_mkey mmkey;
struct ib_umem *umem;
struct mlx5_shared_mr_info *smr_info;
struct list_head list;
@@ -378,19 +460,20 @@ struct mlx5_ib_mr {
struct mlx5_core_sig_ctx *sig;
int live;
void *descs_alloc;
+ int access_flags; /* Needed for rereg MR */
+};
+
+struct mlx5_ib_mw {
+ struct ib_mw ibmw;
+ struct mlx5_core_mkey mmkey;
};
struct mlx5_ib_umr_context {
+ struct ib_cqe cqe;
enum ib_wc_status status;
struct completion done;
};
-static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
-{
- context->status = -1;
- init_completion(&context->done);
-}
-
struct umr_common {
struct ib_pd *pd;
struct ib_cq *cq;
@@ -440,6 +523,14 @@ struct mlx5_mr_cache {
unsigned long last_add;
};
+struct mlx5_ib_gsi_qp;
+
+struct mlx5_ib_port_resources {
+ struct mlx5_ib_resources *devr;
+ struct mlx5_ib_gsi_qp *gsi;
+ struct work_struct pkey_change_work;
+};
+
struct mlx5_ib_resources {
struct ib_cq *c0;
struct ib_xrcd *x0;
@@ -447,11 +538,24 @@ struct mlx5_ib_resources {
struct ib_pd *p0;
struct ib_srq *s0;
struct ib_srq *s1;
+ struct mlx5_ib_port_resources ports[2];
+ /* Protects changes to the port resources */
+ struct mutex mutex;
+};
+
+struct mlx5_roce {
+ /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
+ * netdev pointer
+ */
+ rwlock_t netdev_lock;
+ struct net_device *netdev;
+ struct notifier_block nb;
};
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
+ struct mlx5_roce roce;
MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
int num_ports;
/* serialize update of capability mask
@@ -498,12 +602,12 @@ static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
{
- return container_of(mqp, struct mlx5_ib_qp, mqp);
+ return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp;
}
-static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
+static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey)
{
- return container_of(mmr, struct mlx5_ib_mr, mmr);
+ return container_of(mmkey, struct mlx5_ib_mr, mmkey);
}
static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
@@ -531,6 +635,11 @@ static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
return container_of(ibmr, struct mlx5_ib_mr, ibmr);
}
+static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct mlx5_ib_mw, ibmw);
+}
+
struct mlx5_ib_ah {
struct ib_ah ibah;
struct mlx5_av av;
@@ -550,8 +659,6 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
- struct mlx5_ib_ah *ah);
struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
int mlx5_ib_destroy_ah(struct ib_ah *ah);
@@ -578,7 +685,8 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
- void *buffer, u32 length);
+ void *buffer, u32 length,
+ struct mlx5_ib_qp_base *base);
struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
@@ -592,8 +700,14 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
+struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
+ struct ib_udata *udata);
+int mlx5_ib_dealloc_mw(struct ib_mw *mw);
int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
int npages, int zap);
+int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
+ u64 length, u64 virt_addr, int access_flags,
+ struct ib_pd *pd, struct ib_udata *udata);
int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
@@ -644,7 +758,6 @@ int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
-void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
@@ -680,6 +793,26 @@ static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+ int index);
+
+/* GSI QP helper functions */
+struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr);
+int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp);
+int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask);
+int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr);
+int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi);
+
+int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc);
+
static inline void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
@@ -699,10 +832,34 @@ static inline u8 convert_access(int acc)
static inline int is_qp1(enum ib_qp_type qp_type)
{
- return qp_type == IB_QPT_GSI;
+ return qp_type == MLX5_IB_QPT_HW_GSI;
}
#define MLX5_MAX_UMR_SHIFT 16
#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
+static inline u32 check_cq_create_flags(u32 flags)
+{
+ /*
+ * It returns non-zero value for unsupported CQ
+ * create flags, otherwise it returns zero.
+ */
+ return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
+ IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
+}
+
+static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx,
+ u32 *user_index)
+{
+ if (cqe_version) {
+ if ((cmd_uidx == MLX5_IB_DEFAULT_UIDX) ||
+ (cmd_uidx & ~MLX5_USER_ASSIGNED_UIDX_MASK))
+ return -EINVAL;
+ *user_index = cmd_uidx;
+ } else {
+ *user_index = MLX5_IB_DEFAULT_UIDX;
+ }
+
+ return 0;
+}
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 6000f7aeede9..4d5bff151cdf 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -40,6 +40,7 @@
#include <rdma/ib_umem_odp.h>
#include <rdma/ib_verbs.h>
#include "mlx5_ib.h"
+#include "user.h"
enum {
MAX_PENDING_REG_MR = 8,
@@ -57,7 +58,7 @@ static int clean_mr(struct mlx5_ib_mr *mr);
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
- int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+ int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
/* Wait until all page fault handlers using the mr complete. */
@@ -77,6 +78,40 @@ static int order2idx(struct mlx5_ib_dev *dev, int order)
return order - cache->ent[0].order;
}
+static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
+{
+ return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
+ length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
+}
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static void update_odp_mr(struct mlx5_ib_mr *mr)
+{
+ if (mr->umem->odp_data) {
+ /*
+ * This barrier prevents the compiler from moving the
+ * setting of umem->odp_data->private to point to our
+ * MR, before reg_umr finished, to ensure that the MR
+ * initialization have finished before starting to
+ * handle invalidations.
+ */
+ smp_wmb();
+ mr->umem->odp_data->private = mr;
+ /*
+ * Make sure we will see the new
+ * umem->odp_data->private value in the invalidation
+ * routines, before we can get page faults on the
+ * MR. Page faults can happen once we put the MR in
+ * the tree, below this line. Without the barrier,
+ * there can be a fault handling and an invalidation
+ * before umem->odp_data->private == mr is visible to
+ * the invalidation handler.
+ */
+ smp_wmb();
+ }
+}
+#endif
+
static void reg_mr_callback(int status, void *context)
{
struct mlx5_ib_mr *mr = context;
@@ -86,7 +121,7 @@ static void reg_mr_callback(int status, void *context)
struct mlx5_cache_ent *ent = &cache->ent[c];
u8 key;
unsigned long flags;
- struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
+ struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
int err;
spin_lock_irqsave(&ent->lock, flags);
@@ -113,7 +148,7 @@ static void reg_mr_callback(int status, void *context)
spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
key = dev->mdev->priv.mkey_key++;
spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
- mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
+ mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
cache->last_add = jiffies;
@@ -124,10 +159,10 @@ static void reg_mr_callback(int status, void *context)
spin_unlock_irqrestore(&ent->lock, flags);
write_lock_irqsave(&table->lock, flags);
- err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
- &mr->mmr);
+ err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey);
if (err)
- pr_err("Error inserting to mr tree. 0x%x\n", -err);
+ pr_err("Error inserting to mkey tree. 0x%x\n", -err);
write_unlock_irqrestore(&table->lock, flags);
}
@@ -168,7 +203,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
spin_lock_irq(&ent->lock);
ent->pending++;
spin_unlock_irq(&ent->lock);
- err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in,
+ err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in,
sizeof(*in), reg_mr_callback,
mr, &mr->out);
if (err) {
@@ -657,14 +692,14 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
seg->start_addr = 0;
- err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
+ err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL,
NULL);
if (err)
goto err_in;
kfree(in);
- mr->ibmr.lkey = mr->mmr.key;
- mr->ibmr.rkey = mr->mmr.key;
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
mr->umem = NULL;
return &mr->ibmr;
@@ -693,10 +728,40 @@ static int use_umr(int order)
return order <= MLX5_MAX_UMR_SHIFT;
}
-static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
- struct ib_sge *sg, u64 dma, int n, u32 key,
- int page_shift, u64 virt_addr, u64 len,
- int access_flags)
+static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+ int npages, int page_shift, int *size,
+ __be64 **mr_pas, dma_addr_t *dma)
+{
+ __be64 *pas;
+ struct device *ddev = dev->ib_dev.dma_device;
+
+ /*
+ * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
+ * To avoid copying garbage after the pas array, we allocate
+ * a little more.
+ */
+ *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
+ *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
+ if (!(*mr_pas))
+ return -ENOMEM;
+
+ pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN);
+ mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
+ /* Clear padding after the actual pages. */
+ memset(pas + npages, 0, *size - npages * sizeof(u64));
+
+ *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, *dma)) {
+ kfree(*mr_pas);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr,
+ struct ib_sge *sg, u64 dma, int n, u32 key,
+ int page_shift)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_umr_wr *umrwr = umr_wr(wr);
@@ -706,7 +771,6 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
sg->lkey = dev->umrc.pd->local_dma_lkey;
wr->next = NULL;
- wr->send_flags = 0;
wr->sg_list = sg;
if (n)
wr->num_sge = 1;
@@ -718,6 +782,19 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
umrwr->npages = n;
umrwr->page_shift = page_shift;
umrwr->mkey = key;
+}
+
+static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
+ struct ib_sge *sg, u64 dma, int n, u32 key,
+ int page_shift, u64 virt_addr, u64 len,
+ int access_flags)
+{
+ struct mlx5_umr_wr *umrwr = umr_wr(wr);
+
+ prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift);
+
+ wr->send_flags = 0;
+
umrwr->target.virt_addr = virt_addr;
umrwr->length = len;
umrwr->access_flags = access_flags;
@@ -734,26 +811,45 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
umrwr->mkey = key;
}
-void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
+static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
+ int access_flags, int *npages,
+ int *page_shift, int *ncont, int *order)
{
- struct mlx5_ib_umr_context *context;
- struct ib_wc wc;
- int err;
-
- while (1) {
- err = ib_poll_cq(cq, 1, &wc);
- if (err < 0) {
- pr_warn("poll cq error %d\n", err);
- return;
- }
- if (err == 0)
- break;
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length,
+ access_flags, 0);
+ if (IS_ERR(umem)) {
+ mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
+ return (void *)umem;
+ }
- context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
- context->status = wc.status;
- complete(&context->done);
+ mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order);
+ if (!*npages) {
+ mlx5_ib_warn(dev, "avoid zero region\n");
+ ib_umem_release(umem);
+ return ERR_PTR(-EINVAL);
}
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+
+ mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
+ *npages, *ncont, *order, *page_shift);
+
+ return umem;
+}
+
+static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct mlx5_ib_umr_context *context =
+ container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
+
+ context->status = wc->status;
+ complete(&context->done);
+}
+
+static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
+{
+ context->cqe.done = mlx5_ib_umr_done;
+ context->status = -1;
+ init_completion(&context->done);
}
static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
@@ -764,13 +860,12 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
struct device *ddev = dev->ib_dev.dma_device;
struct umr_common *umrc = &dev->umrc;
struct mlx5_ib_umr_context umr_context;
- struct mlx5_umr_wr umrwr;
+ struct mlx5_umr_wr umrwr = {};
struct ib_send_wr *bad;
struct mlx5_ib_mr *mr;
struct ib_sge sg;
int size;
__be64 *mr_pas;
- __be64 *pas;
dma_addr_t dma;
int err = 0;
int i;
@@ -790,33 +885,17 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
if (!mr)
return ERR_PTR(-EAGAIN);
- /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
- * To avoid copying garbage after the pas array, we allocate
- * a little more. */
- size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
- mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
- if (!mr_pas) {
- err = -ENOMEM;
+ err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas,
+ &dma);
+ if (err)
goto free_mr;
- }
- pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN);
- mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
- /* Clear padding after the actual pages. */
- memset(pas + npages, 0, size - npages * sizeof(u64));
-
- dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
- if (dma_mapping_error(ddev, dma)) {
- err = -ENOMEM;
- goto free_pas;
- }
+ mlx5_ib_init_umr_context(&umr_context);
- memset(&umrwr, 0, sizeof(umrwr));
- umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
- prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key,
+ umrwr.wr.wr_cqe = &umr_context.cqe;
+ prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
page_shift, virt_addr, len, access_flags);
- mlx5_ib_init_umr_context(&umr_context);
down(&umrc->sem);
err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
if (err) {
@@ -830,9 +909,9 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
}
}
- mr->mmr.iova = virt_addr;
- mr->mmr.size = len;
- mr->mmr.pd = to_mpd(pd)->pdn;
+ mr->mmkey.iova = virt_addr;
+ mr->mmkey.size = len;
+ mr->mmkey.pd = to_mpd(pd)->pdn;
mr->live = 1;
@@ -840,7 +919,6 @@ unmap_dma:
up(&umrc->sem);
dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
-free_pas:
kfree(mr_pas);
free_mr:
@@ -929,8 +1007,10 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
+ mlx5_ib_init_umr_context(&umr_context);
+
memset(&wr, 0, sizeof(wr));
- wr.wr.wr_id = (u64)(unsigned long)&umr_context;
+ wr.wr.wr_cqe = &umr_context.cqe;
sg.addr = dma;
sg.length = ALIGN(npages * sizeof(u64),
@@ -944,10 +1024,9 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
wr.wr.opcode = MLX5_IB_WR_UMR;
wr.npages = sg.length / sizeof(u64);
wr.page_shift = PAGE_SHIFT;
- wr.mkey = mr->mmr.key;
+ wr.mkey = mr->mmkey.key;
wr.target.offset = start_page_index;
- mlx5_ib_init_umr_context(&umr_context);
down(&umrc->sem);
err = ib_post_send(umrc->qp, &wr.wr, &bad);
if (err) {
@@ -974,10 +1053,14 @@ free_pas:
}
#endif
-static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
- u64 length, struct ib_umem *umem,
- int npages, int page_shift,
- int access_flags)
+/*
+ * If ibmr is NULL it will be allocated by reg_create.
+ * Else, the given ibmr will be used.
+ */
+static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
+ u64 virt_addr, u64 length,
+ struct ib_umem *umem, int npages,
+ int page_shift, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_create_mkey_mbox_in *in;
@@ -986,7 +1069,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
int err;
bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
- mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@@ -1013,7 +1096,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
1 << page_shift));
- err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL,
+ err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen, NULL,
NULL, NULL);
if (err) {
mlx5_ib_warn(dev, "create mkey failed\n");
@@ -1024,7 +1107,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
mr->live = 1;
kvfree(in);
- mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
+ mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
return mr;
@@ -1032,11 +1115,23 @@ err_2:
kvfree(in);
err_1:
- kfree(mr);
+ if (!ibmr)
+ kfree(mr);
return ERR_PTR(err);
}
+static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+ int npages, u64 length, int access_flags)
+{
+ mr->npages = npages;
+ atomic_add(npages, &dev->mdev->priv.reg_pages);
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+ mr->ibmr.length = length;
+ mr->access_flags = access_flags;
+}
+
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata)
@@ -1052,22 +1147,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
- umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
- 0);
- if (IS_ERR(umem)) {
- mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
- return (void *)umem;
- }
+ umem = mr_umem_get(pd, start, length, access_flags, &npages,
+ &page_shift, &ncont, &order);
- mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
- if (!npages) {
- mlx5_ib_warn(dev, "avoid zero region\n");
- err = -EINVAL;
- goto error;
- }
-
- mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
- npages, ncont, order, page_shift);
+ if (IS_ERR(umem))
+ return (void *)umem;
if (use_umr(order)) {
mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
@@ -1083,45 +1167,21 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
if (!mr)
- mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
- access_flags);
+ mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
+ page_shift, access_flags);
if (IS_ERR(mr)) {
err = PTR_ERR(mr);
goto error;
}
- mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
+ mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
mr->umem = umem;
- mr->npages = npages;
- atomic_add(npages, &dev->mdev->priv.reg_pages);
- mr->ibmr.lkey = mr->mmr.key;
- mr->ibmr.rkey = mr->mmr.key;
+ set_mr_fileds(dev, mr, npages, length, access_flags);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (umem->odp_data) {
- /*
- * This barrier prevents the compiler from moving the
- * setting of umem->odp_data->private to point to our
- * MR, before reg_umr finished, to ensure that the MR
- * initialization have finished before starting to
- * handle invalidations.
- */
- smp_wmb();
- mr->umem->odp_data->private = mr;
- /*
- * Make sure we will see the new
- * umem->odp_data->private value in the invalidation
- * routines, before we can get page faults on the
- * MR. Page faults can happen once we put the MR in
- * the tree, below this line. Without the barrier,
- * there can be a fault handling and an invalidation
- * before umem->odp_data->private == mr is visible to
- * the invalidation handler.
- */
- smp_wmb();
- }
+ update_odp_mr(mr);
#endif
return &mr->ibmr;
@@ -1135,15 +1195,15 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct umr_common *umrc = &dev->umrc;
struct mlx5_ib_umr_context umr_context;
- struct mlx5_umr_wr umrwr;
+ struct mlx5_umr_wr umrwr = {};
struct ib_send_wr *bad;
int err;
- memset(&umrwr.wr, 0, sizeof(umrwr));
- umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
- prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key);
-
mlx5_ib_init_umr_context(&umr_context);
+
+ umrwr.wr.wr_cqe = &umr_context.cqe;
+ prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key);
+
down(&umrc->sem);
err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
if (err) {
@@ -1165,6 +1225,167 @@ error:
return err;
}
+static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
+ u64 length, int npages, int page_shift, int order,
+ int access_flags, int flags)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct device *ddev = dev->ib_dev.dma_device;
+ struct mlx5_ib_umr_context umr_context;
+ struct ib_send_wr *bad;
+ struct mlx5_umr_wr umrwr = {};
+ struct ib_sge sg;
+ struct umr_common *umrc = &dev->umrc;
+ dma_addr_t dma = 0;
+ __be64 *mr_pas = NULL;
+ int size;
+ int err;
+
+ mlx5_ib_init_umr_context(&umr_context);
+
+ umrwr.wr.wr_cqe = &umr_context.cqe;
+ umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
+
+ if (flags & IB_MR_REREG_TRANS) {
+ err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size,
+ &mr_pas, &dma);
+ if (err)
+ return err;
+
+ umrwr.target.virt_addr = virt_addr;
+ umrwr.length = length;
+ umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
+ }
+
+ prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
+ page_shift);
+
+ if (flags & IB_MR_REREG_PD) {
+ umrwr.pd = pd;
+ umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD;
+ }
+
+ if (flags & IB_MR_REREG_ACCESS) {
+ umrwr.access_flags = access_flags;
+ umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS;
+ }
+
+ /* post send request to UMR QP */
+ down(&umrc->sem);
+ err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
+
+ if (err) {
+ mlx5_ib_warn(dev, "post send failed, err %d\n", err);
+ } else {
+ wait_for_completion(&umr_context.done);
+ if (umr_context.status != IB_WC_SUCCESS) {
+ mlx5_ib_warn(dev, "reg umr failed (%u)\n",
+ umr_context.status);
+ err = -EFAULT;
+ }
+ }
+
+ up(&umrc->sem);
+ if (flags & IB_MR_REREG_TRANS) {
+ dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
+ kfree(mr_pas);
+ }
+ return err;
+}
+
+int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
+ u64 length, u64 virt_addr, int new_access_flags,
+ struct ib_pd *new_pd, struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
+ struct mlx5_ib_mr *mr = to_mmr(ib_mr);
+ struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
+ int access_flags = flags & IB_MR_REREG_ACCESS ?
+ new_access_flags :
+ mr->access_flags;
+ u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
+ u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
+ int page_shift = 0;
+ int npages = 0;
+ int ncont = 0;
+ int order = 0;
+ int err;
+
+ mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
+ start, virt_addr, length, access_flags);
+
+ if (flags != IB_MR_REREG_PD) {
+ /*
+ * Replace umem. This needs to be done whether or not UMR is
+ * used.
+ */
+ flags |= IB_MR_REREG_TRANS;
+ ib_umem_release(mr->umem);
+ mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages,
+ &page_shift, &ncont, &order);
+ if (IS_ERR(mr->umem)) {
+ err = PTR_ERR(mr->umem);
+ mr->umem = NULL;
+ return err;
+ }
+ }
+
+ if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
+ /*
+ * UMR can't be used - MKey needs to be replaced.
+ */
+ if (mr->umred) {
+ err = unreg_umr(dev, mr);
+ if (err)
+ mlx5_ib_warn(dev, "Failed to unregister MR\n");
+ } else {
+ err = destroy_mkey(dev, mr);
+ if (err)
+ mlx5_ib_warn(dev, "Failed to destroy MKey\n");
+ }
+ if (err)
+ return err;
+
+ mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
+ page_shift, access_flags);
+
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
+
+ mr->umred = 0;
+ } else {
+ /*
+ * Send a UMR WQE
+ */
+ err = rereg_umr(pd, mr, addr, len, npages, page_shift,
+ order, access_flags, flags);
+ if (err) {
+ mlx5_ib_warn(dev, "Failed to rereg UMR\n");
+ return err;
+ }
+ }
+
+ if (flags & IB_MR_REREG_PD) {
+ ib_mr->pd = pd;
+ mr->mmkey.pd = to_mpd(pd)->pdn;
+ }
+
+ if (flags & IB_MR_REREG_ACCESS)
+ mr->access_flags = access_flags;
+
+ if (flags & IB_MR_REREG_TRANS) {
+ atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
+ set_mr_fileds(dev, mr, npages, len, access_flags);
+ mr->mmkey.iova = addr;
+ mr->mmkey.size = len;
+ }
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ update_odp_mr(mr);
+#endif
+
+ return 0;
+}
+
static int
mlx5_alloc_priv_descs(struct ib_device *device,
struct mlx5_ib_mr *mr,
@@ -1236,7 +1457,7 @@ static int clean_mr(struct mlx5_ib_mr *mr)
err = destroy_mkey(dev, mr);
if (err) {
mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
- mr->mmr.key, err);
+ mr->mmkey.key, err);
return err;
}
} else {
@@ -1300,8 +1521,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_create_mkey_mbox_in *in;
struct mlx5_ib_mr *mr;
- int access_mode, err;
- int ndescs = roundup(max_num_sg, 4);
+ int ndescs = ALIGN(max_num_sg, 4);
+ int err;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
@@ -1319,7 +1540,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
if (mr_type == IB_MR_TYPE_MEM_REG) {
- access_mode = MLX5_ACCESS_MODE_MTT;
+ mr->access_mode = MLX5_ACCESS_MODE_MTT;
in->seg.log2_page_size = PAGE_SHIFT;
err = mlx5_alloc_priv_descs(pd->device, mr,
@@ -1329,6 +1550,15 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
mr->desc_size = sizeof(u64);
mr->max_descs = ndescs;
+ } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
+ mr->access_mode = MLX5_ACCESS_MODE_KLM;
+
+ err = mlx5_alloc_priv_descs(pd->device, mr,
+ ndescs, sizeof(struct mlx5_klm));
+ if (err)
+ goto err_free_in;
+ mr->desc_size = sizeof(struct mlx5_klm);
+ mr->max_descs = ndescs;
} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
u32 psv_index[2];
@@ -1347,7 +1577,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
if (err)
goto err_free_sig;
- access_mode = MLX5_ACCESS_MODE_KLM;
+ mr->access_mode = MLX5_ACCESS_MODE_KLM;
mr->sig->psv_memory.psv_idx = psv_index[0];
mr->sig->psv_wire.psv_idx = psv_index[1];
@@ -1361,14 +1591,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
goto err_free_in;
}
- in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
- err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in),
+ in->seg.flags = MLX5_PERM_UMR_EN | mr->access_mode;
+ err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in),
NULL, NULL, NULL);
if (err)
goto err_destroy_psv;
- mr->ibmr.lkey = mr->mmr.key;
- mr->ibmr.rkey = mr->mmr.key;
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
mr->umem = NULL;
kfree(in);
@@ -1395,6 +1625,88 @@ err_free:
return ERR_PTR(err);
}
+struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_create_mkey_mbox_in *in = NULL;
+ struct mlx5_ib_mw *mw = NULL;
+ int ndescs;
+ int err;
+ struct mlx5_ib_alloc_mw req = {};
+ struct {
+ __u32 comp_mask;
+ __u32 response_length;
+ } resp = {};
+
+ err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
+ if (err)
+ return ERR_PTR(err);
+
+ if (req.comp_mask || req.reserved1 || req.reserved2)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (udata->inlen > sizeof(req) &&
+ !ib_is_udata_cleared(udata, sizeof(req),
+ udata->inlen - sizeof(req)))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
+
+ mw = kzalloc(sizeof(*mw), GFP_KERNEL);
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!mw || !in) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ in->seg.status = MLX5_MKEY_STATUS_FREE;
+ in->seg.xlt_oct_size = cpu_to_be32(ndescs);
+ in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+ in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_KLM |
+ MLX5_PERM_LOCAL_READ;
+ if (type == IB_MW_TYPE_2)
+ in->seg.flags_pd |= cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
+ in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+
+ err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, sizeof(*in),
+ NULL, NULL, NULL);
+ if (err)
+ goto free;
+
+ mw->ibmw.rkey = mw->mmkey.key;
+
+ resp.response_length = min(offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length), udata->outlen);
+ if (resp.response_length) {
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
+ if (err) {
+ mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
+ goto free;
+ }
+ }
+
+ kfree(in);
+ return &mw->ibmw;
+
+free:
+ kfree(mw);
+ kfree(in);
+ return ERR_PTR(err);
+}
+
+int mlx5_ib_dealloc_mw(struct ib_mw *mw)
+{
+ struct mlx5_ib_mw *mmw = to_mmw(mw);
+ int err;
+
+ err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
+ &mmw->mmkey);
+ if (!err)
+ kfree(mmw);
+ return err;
+}
+
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status)
{
@@ -1436,6 +1748,32 @@ done:
return ret;
}
+static int
+mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
+ struct scatterlist *sgl,
+ unsigned short sg_nents)
+{
+ struct scatterlist *sg = sgl;
+ struct mlx5_klm *klms = mr->descs;
+ u32 lkey = mr->ibmr.pd->local_dma_lkey;
+ int i;
+
+ mr->ibmr.iova = sg_dma_address(sg);
+ mr->ibmr.length = 0;
+ mr->ndescs = sg_nents;
+
+ for_each_sg(sgl, sg, sg_nents, i) {
+ if (unlikely(i > mr->max_descs))
+ break;
+ klms[i].va = cpu_to_be64(sg_dma_address(sg));
+ klms[i].bcount = cpu_to_be32(sg_dma_len(sg));
+ klms[i].key = cpu_to_be32(lkey);
+ mr->ibmr.length += sg_dma_len(sg);
+ }
+
+ return i;
+}
+
static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
{
struct mlx5_ib_mr *mr = to_mmr(ibmr);
@@ -1463,7 +1801,10 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
mr->desc_size * mr->max_descs,
DMA_TO_DEVICE);
- n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page);
+ if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+ n = mlx5_ib_sg_to_klms(mr, sg, sg_nents);
+ else
+ n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page);
ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
mr->desc_size * mr->max_descs,
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index aa8391e75385..34e79e709c67 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -142,25 +142,27 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
u32 key)
{
u32 base_key = mlx5_base_mkey(key);
- struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key);
- struct mlx5_ib_mr *mr = container_of(mmr, struct mlx5_ib_mr, mmr);
+ struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key);
+ struct mlx5_ib_mr *mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- if (!mmr || mmr->key != key || !mr->live)
+ if (!mmkey || mmkey->key != key || !mr->live)
return NULL;
- return container_of(mmr, struct mlx5_ib_mr, mmr);
+ return container_of(mmkey, struct mlx5_ib_mr, mmkey);
}
static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
struct mlx5_ib_pfault *pfault,
- int error) {
+ int error)
+{
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
- int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn,
+ u32 qpn = qp->trans_qp.base.mqp.qpn;
+ int ret = mlx5_core_page_fault_resume(dev->mdev,
+ qpn,
pfault->mpfault.flags,
error);
if (ret)
- pr_err("Failed to resolve the page fault on QP 0x%x\n",
- qp->mqp.qpn);
+ pr_err("Failed to resolve the page fault on QP 0x%x\n", qpn);
}
/*
@@ -230,7 +232,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
io_virt += pfault->mpfault.bytes_committed;
bcnt -= pfault->mpfault.bytes_committed;
- start_idx = (io_virt - (mr->mmr.iova & PAGE_MASK)) >> PAGE_SHIFT;
+ start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
if (mr->umem->writable)
access_mask |= ODP_WRITE_ALLOWED_BIT;
@@ -391,6 +393,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
#if defined(DEBUG)
u32 ctrl_wqe_index, ctrl_qpn;
#endif
+ u32 qpn = qp->trans_qp.base.mqp.qpn;
ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
if (ds * MLX5_WQE_DS_UNITS > wqe_length) {
@@ -401,7 +404,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
if (ds == 0) {
mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n",
- wqe_index, qp->mqp.qpn);
+ wqe_index, qpn);
return -EFAULT;
}
@@ -411,16 +414,16 @@ static int mlx5_ib_mr_initiator_pfault_handler(
MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
if (wqe_index != ctrl_wqe_index) {
mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
- wqe_index, qp->mqp.qpn,
+ wqe_index, qpn,
ctrl_wqe_index);
return -EFAULT;
}
ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
MLX5_WQE_CTRL_QPN_SHIFT;
- if (qp->mqp.qpn != ctrl_qpn) {
+ if (qpn != ctrl_qpn) {
mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
- wqe_index, qp->mqp.qpn,
+ wqe_index, qpn,
ctrl_qpn);
return -EFAULT;
}
@@ -537,6 +540,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
int resume_with_error = 0;
u16 wqe_index = pfault->mpfault.wqe.wqe_index;
int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
+ u32 qpn = qp->trans_qp.base.mqp.qpn;
buffer = (char *)__get_free_page(GFP_KERNEL);
if (!buffer) {
@@ -546,10 +550,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
}
ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
- PAGE_SIZE);
+ PAGE_SIZE, &qp->trans_qp.base);
if (ret < 0) {
mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
- -ret, wqe_index, qp->mqp.qpn);
+ -ret, wqe_index, qpn);
resume_with_error = 1;
goto resolve_page_fault;
}
@@ -586,7 +590,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
resolve_page_fault:
mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
- qp->mqp.qpn, resume_with_error, pfault->mpfault.flags);
+ qpn, resume_with_error,
+ pfault->mpfault.flags);
free_page((unsigned long)buffer);
}
@@ -753,7 +758,7 @@ void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
qp->disable_page_faults = 1;
spin_lock_init(&qp->disable_page_faults_lock);
- qp->mqp.pfault_handler = mlx5_ib_pfault_handler;
+ qp->trans_qp.base.mqp.pfault_handler = mlx5_ib_pfault_handler;
for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 307bdbca8938..8dee8bc1e0fe 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -32,6 +32,8 @@
#include <linux/module.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_user_verbs.h>
#include "mlx5_ib.h"
#include "user.h"
@@ -56,6 +58,7 @@ enum {
static const u32 mlx5_ib_opcode[] = {
[IB_WR_SEND] = MLX5_OPCODE_SEND,
+ [IB_WR_LSO] = MLX5_OPCODE_LSO,
[IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM,
[IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE,
[IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM,
@@ -70,6 +73,9 @@ static const u32 mlx5_ib_opcode[] = {
[MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
};
+struct mlx5_wqe_eth_pad {
+ u8 rsvd0[16];
+};
static int is_qp0(enum ib_qp_type qp_type)
{
@@ -114,14 +120,15 @@ void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
* Return: the number of bytes copied, or an error code.
*/
int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
- void *buffer, u32 length)
+ void *buffer, u32 length,
+ struct mlx5_ib_qp_base *base)
{
struct ib_device *ibdev = qp->ibqp.device;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
size_t offset;
size_t wq_end;
- struct ib_umem *umem = qp->umem;
+ struct ib_umem *umem = base->ubuffer.umem;
u32 first_copy_length;
int wqe_length;
int ret;
@@ -172,8 +179,10 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
struct ib_event event;
- if (type == MLX5_EVENT_TYPE_PATH_MIG)
- to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
+ if (type == MLX5_EVENT_TYPE_PATH_MIG) {
+ /* This event is only valid for trans_qps */
+ to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
+ }
if (ibqp->event_handler) {
event.device = ibqp->device;
@@ -255,18 +264,20 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
return 0;
}
-static int sq_overhead(enum ib_qp_type qp_type)
+static int sq_overhead(struct ib_qp_init_attr *attr)
{
int size = 0;
- switch (qp_type) {
+ switch (attr->qp_type) {
case IB_QPT_XRC_INI:
size += sizeof(struct mlx5_wqe_xrc_seg);
/* fall through */
case IB_QPT_RC:
size += sizeof(struct mlx5_wqe_ctrl_seg) +
- sizeof(struct mlx5_wqe_atomic_seg) +
- sizeof(struct mlx5_wqe_raddr_seg);
+ max(sizeof(struct mlx5_wqe_atomic_seg) +
+ sizeof(struct mlx5_wqe_raddr_seg),
+ sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+ sizeof(struct mlx5_mkey_seg));
break;
case IB_QPT_XRC_TGT:
@@ -274,14 +285,18 @@ static int sq_overhead(enum ib_qp_type qp_type)
case IB_QPT_UC:
size += sizeof(struct mlx5_wqe_ctrl_seg) +
- sizeof(struct mlx5_wqe_raddr_seg) +
- sizeof(struct mlx5_wqe_umr_ctrl_seg) +
- sizeof(struct mlx5_mkey_seg);
+ max(sizeof(struct mlx5_wqe_raddr_seg),
+ sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+ sizeof(struct mlx5_mkey_seg));
break;
case IB_QPT_UD:
+ if (attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
+ size += sizeof(struct mlx5_wqe_eth_pad) +
+ sizeof(struct mlx5_wqe_eth_seg);
+ /* fall through */
case IB_QPT_SMI:
- case IB_QPT_GSI:
+ case MLX5_IB_QPT_HW_GSI:
size += sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_datagram_seg);
break;
@@ -304,7 +319,7 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
int inl_size = 0;
int size;
- size = sq_overhead(attr->qp_type);
+ size = sq_overhead(attr);
if (size < 0)
return size;
@@ -341,8 +356,8 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
return -EINVAL;
}
- qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
- sizeof(struct mlx5_wqe_inline_seg);
+ qp->max_inline_data = wqe_size - sq_overhead(attr) -
+ sizeof(struct mlx5_wqe_inline_seg);
attr->cap.max_inline_data = qp->max_inline_data;
if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
@@ -366,7 +381,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
static int set_user_buf_size(struct mlx5_ib_dev *dev,
struct mlx5_ib_qp *qp,
- struct mlx5_ib_create_qp *ucmd)
+ struct mlx5_ib_create_qp *ucmd,
+ struct mlx5_ib_qp_base *base,
+ struct ib_qp_init_attr *attr)
{
int desc_sz = 1 << qp->sq.wqe_shift;
@@ -391,8 +408,13 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
return -EINVAL;
}
- qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
- (qp->sq.wqe_cnt << 6);
+ if (attr->qp_type == IB_QPT_RAW_PACKET) {
+ base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+ qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
+ } else {
+ base->ubuffer.buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+ (qp->sq.wqe_cnt << 6);
+ }
return 0;
}
@@ -576,10 +598,10 @@ static int to_mlx5_st(enum ib_qp_type type)
case IB_QPT_XRC_INI:
case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC;
case IB_QPT_SMI: return MLX5_QP_ST_QP0;
- case IB_QPT_GSI: return MLX5_QP_ST_QP1;
+ case MLX5_IB_QPT_HW_GSI: return MLX5_QP_ST_QP1;
case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
- case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
case IB_QPT_RAW_PACKET:
+ case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
case IB_QPT_MAX:
default: return -EINVAL;
}
@@ -590,13 +612,51 @@ static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
}
+static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
+ struct ib_pd *pd,
+ unsigned long addr, size_t size,
+ struct ib_umem **umem,
+ int *npages, int *page_shift, int *ncont,
+ u32 *offset)
+{
+ int err;
+
+ *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0);
+ if (IS_ERR(*umem)) {
+ mlx5_ib_dbg(dev, "umem_get failed\n");
+ return PTR_ERR(*umem);
+ }
+
+ mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL);
+
+ err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
+ if (err) {
+ mlx5_ib_warn(dev, "bad offset\n");
+ goto err_umem;
+ }
+
+ mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n",
+ addr, size, *npages, *page_shift, *ncont, *offset);
+
+ return 0;
+
+err_umem:
+ ib_umem_release(*umem);
+ *umem = NULL;
+
+ return err;
+}
+
static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_qp *qp, struct ib_udata *udata,
+ struct ib_qp_init_attr *attr,
struct mlx5_create_qp_mbox_in **in,
- struct mlx5_ib_create_qp_resp *resp, int *inlen)
+ struct mlx5_ib_create_qp_resp *resp, int *inlen,
+ struct mlx5_ib_qp_base *base)
{
struct mlx5_ib_ucontext *context;
struct mlx5_ib_create_qp ucmd;
+ struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
int page_shift = 0;
int uar_index;
int npages;
@@ -615,18 +675,23 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
/*
* TBD: should come from the verbs when we have the API
*/
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
- if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
- mlx5_ib_dbg(dev, "reverting to medium latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ /* In CROSS_CHANNEL CQ and QP must use the same UAR */
+ uuarn = MLX5_CROSS_CHANNEL_UUAR;
+ else {
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
- mlx5_ib_dbg(dev, "reverting to high latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+ mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
+ mlx5_ib_dbg(dev, "reverting to medium latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
if (uuarn < 0) {
- mlx5_ib_warn(dev, "uuar allocation failed\n");
- return uuarn;
+ mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
+ mlx5_ib_dbg(dev, "reverting to high latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+ if (uuarn < 0) {
+ mlx5_ib_warn(dev, "uuar allocation failed\n");
+ return uuarn;
+ }
}
}
}
@@ -638,32 +703,20 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
- err = set_user_buf_size(dev, qp, &ucmd);
+ err = set_user_buf_size(dev, qp, &ucmd, base, attr);
if (err)
goto err_uuar;
- if (ucmd.buf_addr && qp->buf_size) {
- qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- qp->buf_size, 0, 0);
- if (IS_ERR(qp->umem)) {
- mlx5_ib_dbg(dev, "umem_get failed\n");
- err = PTR_ERR(qp->umem);
+ if (ucmd.buf_addr && ubuffer->buf_size) {
+ ubuffer->buf_addr = ucmd.buf_addr;
+ err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr,
+ ubuffer->buf_size,
+ &ubuffer->umem, &npages, &page_shift,
+ &ncont, &offset);
+ if (err)
goto err_uuar;
- }
} else {
- qp->umem = NULL;
- }
-
- if (qp->umem) {
- mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
- &ncont, NULL);
- err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
- if (err) {
- mlx5_ib_warn(dev, "bad offset\n");
- goto err_umem;
- }
- mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
- ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
+ ubuffer->umem = NULL;
}
*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
@@ -672,8 +725,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
err = -ENOMEM;
goto err_umem;
}
- if (qp->umem)
- mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+ if (ubuffer->umem)
+ mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift,
+ (*in)->pas, 0);
(*in)->ctx.log_pg_sz_remote_qpn =
cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
(*in)->ctx.params2 = cpu_to_be32(offset << 6);
@@ -704,29 +758,31 @@ err_free:
kvfree(*in);
err_umem:
- if (qp->umem)
- ib_umem_release(qp->umem);
+ if (ubuffer->umem)
+ ib_umem_release(ubuffer->umem);
err_uuar:
free_uuar(&context->uuari, uuarn);
return err;
}
-static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
+static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
+ struct mlx5_ib_qp_base *base)
{
struct mlx5_ib_ucontext *context;
context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &qp->db);
- if (qp->umem)
- ib_umem_release(qp->umem);
+ if (base->ubuffer.umem)
+ ib_umem_release(base->ubuffer.umem);
free_uuar(&context->uuari, qp->uuarn);
}
static int create_kernel_qp(struct mlx5_ib_dev *dev,
struct ib_qp_init_attr *init_attr,
struct mlx5_ib_qp *qp,
- struct mlx5_create_qp_mbox_in **in, int *inlen)
+ struct mlx5_create_qp_mbox_in **in, int *inlen,
+ struct mlx5_ib_qp_base *base)
{
enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
struct mlx5_uuar_info *uuari;
@@ -735,7 +791,10 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
int err;
uuari = &dev->mdev->priv.uuari;
- if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
+ if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN |
+ IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+ IB_QP_CREATE_IPOIB_UD_LSO |
+ mlx5_ib_create_qp_sqpn_qp1()))
return -EINVAL;
if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
@@ -758,9 +817,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
qp->rq.offset = 0;
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
- qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
+ base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
- err = mlx5_buf_alloc(dev->mdev, qp->buf_size, &qp->buf);
+ err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
goto err_uuar;
@@ -780,6 +839,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
(*in)->ctx.params1 |= cpu_to_be32(1 << 11);
(*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
+ if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) {
+ (*in)->ctx.deth_sqpn = cpu_to_be32(1);
+ qp->flags |= MLX5_IB_QP_SQPN_QP1;
+ }
+
mlx5_fill_page_array(&qp->buf, (*in)->pas);
err = mlx5_db_alloc(dev->mdev, &qp->db);
@@ -853,19 +917,304 @@ static int is_connected(enum ib_qp_type qp_type)
return 0;
}
+static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq, u32 tdn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+ void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(tisc, tisc, transport_domain, tdn);
+
+ return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
+}
+
+static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ mlx5_core_destroy_tis(dev->mdev, sq->tisn);
+}
+
+static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq, void *qpin,
+ struct ib_pd *pd)
+{
+ struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
+ __be64 *pas;
+ void *in;
+ void *sqc;
+ void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+ void *wq;
+ int inlen;
+ int err;
+ int page_shift = 0;
+ int npages;
+ int ncont = 0;
+ u32 offset = 0;
+
+ err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size,
+ &sq->ubuffer.umem, &npages, &page_shift,
+ &ncont, &offset);
+ if (err)
+ return err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_umem;
+ }
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
+ MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
+ MLX5_SET(sqc, sqc, tis_lst_sz, 1);
+ MLX5_SET(sqc, sqc, tis_num_0, sq->tisn);
+
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+ MLX5_SET(wq, wq, uar_page, MLX5_GET(qpc, qpc, uar_page));
+ MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
+ MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(wq, wq, page_offset, offset);
+
+ pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+ mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0);
+
+ err = mlx5_core_create_sq_tracked(dev->mdev, in, inlen, &sq->base.mqp);
+
+ kvfree(in);
+
+ if (err)
+ goto err_umem;
+
+ return 0;
+
+err_umem:
+ ib_umem_release(sq->ubuffer.umem);
+ sq->ubuffer.umem = NULL;
+
+ return err;
+}
+
+static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+ ib_umem_release(sq->ubuffer.umem);
+}
+
+static int get_rq_pas_size(void *qpc)
+{
+ u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
+ u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
+ u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size);
+ u32 page_offset = MLX5_GET(qpc, qpc, page_offset);
+ u32 po_quanta = 1 << (log_page_size - 6);
+ u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride);
+ u32 page_size = 1 << log_page_size;
+ u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
+ u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size;
+
+ return rq_num_pas * sizeof(u64);
+}
+
+static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq, void *qpin)
+{
+ __be64 *pas;
+ __be64 *qp_pas;
+ void *in;
+ void *rqc;
+ void *wq;
+ void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+ int inlen;
+ int err;
+ u32 rq_pas_size = get_rq_pas_size(qpc);
+
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ MLX5_SET(rqc, rqc, vsd, 1);
+ MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, flush_in_error_en, 1);
+ MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
+ MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
+
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, end_padding_mode,
+ MLX5_GET(qpc, qpc, end_padding_mode));
+ MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset));
+ MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+ MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+ MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
+ MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size));
+ MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
+
+ pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+ qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas);
+ memcpy(pas, qp_pas, rq_pas_size);
+
+ err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rq->base.mqp);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq)
+{
+ mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp);
+}
+
+static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq, u32 tdn)
+{
+ u32 *in;
+ void *tirc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+ MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
+ MLX5_SET(tirc, tirc, transport_domain, tdn);
+
+ err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq)
+{
+ mlx5_core_destroy_tir(dev->mdev, rq->tirn);
+}
+
+static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_mbox_in *in,
+ struct ib_pd *pd)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ struct ib_uobject *uobj = pd->uobject;
+ struct ib_ucontext *ucontext = uobj->context;
+ struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
+ int err;
+ u32 tdn = mucontext->tdn;
+
+ if (qp->sq.wqe_cnt) {
+ err = create_raw_packet_qp_tis(dev, sq, tdn);
+ if (err)
+ return err;
+
+ err = create_raw_packet_qp_sq(dev, sq, in, pd);
+ if (err)
+ goto err_destroy_tis;
+
+ sq->base.container_mibqp = qp;
+ }
+
+ if (qp->rq.wqe_cnt) {
+ err = create_raw_packet_qp_rq(dev, rq, in);
+ if (err)
+ goto err_destroy_sq;
+
+ rq->base.container_mibqp = qp;
+
+ err = create_raw_packet_qp_tir(dev, rq, tdn);
+ if (err)
+ goto err_destroy_rq;
+ }
+
+ qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
+ rq->base.mqp.qpn;
+
+ return 0;
+
+err_destroy_rq:
+ destroy_raw_packet_qp_rq(dev, rq);
+err_destroy_sq:
+ if (!qp->sq.wqe_cnt)
+ return err;
+ destroy_raw_packet_qp_sq(dev, sq);
+err_destroy_tis:
+ destroy_raw_packet_qp_tis(dev, sq);
+
+ return err;
+}
+
+static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+ if (qp->rq.wqe_cnt) {
+ destroy_raw_packet_qp_tir(dev, rq);
+ destroy_raw_packet_qp_rq(dev, rq);
+ }
+
+ if (qp->sq.wqe_cnt) {
+ destroy_raw_packet_qp_sq(dev, sq);
+ destroy_raw_packet_qp_tis(dev, sq);
+ }
+}
+
+static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp)
+{
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+ sq->sq = &qp->sq;
+ rq->rq = &qp->rq;
+ sq->doorbell = &qp->db;
+ rq->doorbell = &qp->db;
+}
+
static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_resources *devr = &dev->devr;
struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_qp_base *base;
struct mlx5_ib_create_qp_resp resp;
struct mlx5_create_qp_mbox_in *in;
struct mlx5_ib_create_qp ucmd;
int inlen = sizeof(*in);
int err;
+ u32 uidx = MLX5_IB_DEFAULT_UIDX;
+ void *qpc;
+
+ base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
- mlx5_ib_odp_create_qp(qp);
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET)
+ mlx5_ib_odp_create_qp(qp);
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
@@ -880,6 +1229,29 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
}
}
+ if (init_attr->create_flags &
+ (IB_QP_CREATE_CROSS_CHANNEL |
+ IB_QP_CREATE_MANAGED_SEND |
+ IB_QP_CREATE_MANAGED_RECV)) {
+ if (!MLX5_CAP_GEN(mdev, cd)) {
+ mlx5_ib_dbg(dev, "cross-channel isn't supported\n");
+ return -EINVAL;
+ }
+ if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL)
+ qp->flags |= MLX5_IB_QP_CROSS_CHANNEL;
+ if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND)
+ qp->flags |= MLX5_IB_QP_MANAGED_SEND;
+ if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
+ qp->flags |= MLX5_IB_QP_MANAGED_RECV;
+ }
+
+ if (init_attr->qp_type == IB_QPT_UD &&
+ (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO))
+ if (!MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
+ mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n");
+ return -EOPNOTSUPP;
+ }
+
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
@@ -889,6 +1261,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return -EFAULT;
}
+ err = get_qp_user_index(to_mucontext(pd->uobject->context),
+ &ucmd, udata->inlen, &uidx);
+ if (err)
+ return err;
+
qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
} else {
@@ -918,11 +1295,18 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
ucmd.sq_wqe_count, max_wqes);
return -EINVAL;
}
- err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
+ if (init_attr->create_flags &
+ mlx5_ib_create_qp_sqpn_qp1()) {
+ mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n");
+ return -EINVAL;
+ }
+ err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
+ &resp, &inlen, base);
if (err)
mlx5_ib_dbg(dev, "err %d\n", err);
} else {
- err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
+ err = create_kernel_qp(dev, init_attr, qp, &in, &inlen,
+ base);
if (err)
mlx5_ib_dbg(dev, "err %d\n", err);
}
@@ -954,6 +1338,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER);
+ if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+ in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND);
+ if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+ in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV);
+
if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
int rcqe_sz;
int scqe_sz;
@@ -1018,26 +1409,42 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
- err = mlx5_core_create_qp(dev->mdev, &qp->mqp, in, inlen);
+ if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ /* 0xffffff means we ask to work with cqe version 0 */
+ MLX5_SET(qpc, qpc, user_index, uidx);
+ }
+ /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
+ if (init_attr->qp_type == IB_QPT_UD &&
+ (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) {
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
+ qp->flags |= MLX5_IB_QP_LSO;
+ }
+
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+ qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
+ raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
+ err = create_raw_packet_qp(dev, qp, in, pd);
+ } else {
+ err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
+ }
+
if (err) {
mlx5_ib_dbg(dev, "create qp failed\n");
goto err_create;
}
kvfree(in);
- /* Hardware wants QPN written in big-endian order (after
- * shifting) for send doorbell. Precompute this value to save
- * a little bit when posting sends.
- */
- qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
- qp->mqp.event = mlx5_ib_qp_event;
+ base->container_mibqp = qp;
+ base->mqp.event = mlx5_ib_qp_event;
return 0;
err_create:
if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(pd, qp);
+ destroy_qp_user(pd, qp, base);
else if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
@@ -1123,17 +1530,17 @@ static void get_cqs(struct mlx5_ib_qp *qp,
break;
case IB_QPT_SMI:
- case IB_QPT_GSI:
+ case MLX5_IB_QPT_HW_GSI:
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
case IB_QPT_RAW_IPV6:
case IB_QPT_RAW_ETHERTYPE:
+ case IB_QPT_RAW_PACKET:
*send_cq = to_mcq(qp->ibqp.send_cq);
*recv_cq = to_mcq(qp->ibqp.recv_cq);
break;
- case IB_QPT_RAW_PACKET:
case IB_QPT_MAX:
default:
*send_cq = NULL;
@@ -1142,45 +1549,66 @@ static void get_cqs(struct mlx5_ib_qp *qp,
}
}
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ u16 operation);
+
static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_cq *send_cq, *recv_cq;
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_modify_qp_mbox_in *in;
int err;
+ base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
+
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
return;
if (qp->state != IB_QPS_RESET) {
- mlx5_ib_qp_disable_pagefaults(qp);
- if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state),
- MLX5_QP_STATE_RST, in, 0, &qp->mqp))
- mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
- qp->mqp.qpn);
+ if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
+ mlx5_ib_qp_disable_pagefaults(qp);
+ err = mlx5_core_qp_modify(dev->mdev,
+ MLX5_CMD_OP_2RST_QP, in, 0,
+ &base->mqp);
+ } else {
+ err = modify_raw_packet_qp(dev, qp,
+ MLX5_CMD_OP_2RST_QP);
+ }
+ if (err)
+ mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n",
+ base->mqp.qpn);
}
get_cqs(qp, &send_cq, &recv_cq);
if (qp->create_type == MLX5_QP_KERNEL) {
mlx5_ib_lock_cqs(send_cq, recv_cq);
- __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
if (send_cq != recv_cq)
- __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+ __mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
+ NULL);
mlx5_ib_unlock_cqs(send_cq, recv_cq);
}
- err = mlx5_core_destroy_qp(dev->mdev, &qp->mqp);
- if (err)
- mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn);
- kfree(in);
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ destroy_raw_packet_qp(dev, qp);
+ } else {
+ err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
+ if (err)
+ mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
+ base->mqp.qpn);
+ }
+ kfree(in);
if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
else if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(&get_pd(qp)->ibpd, qp);
+ destroy_qp_user(&get_pd(qp)->ibpd, qp, base);
}
static const char *ib_qp_type_str(enum ib_qp_type type)
@@ -1225,6 +1653,16 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
if (pd) {
dev = to_mdev(pd->device);
+
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+ if (!pd->uobject) {
+ mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
+ return ERR_PTR(-EINVAL);
+ } else if (!to_mucontext(pd->uobject->context)->cqe_version) {
+ mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n");
+ return ERR_PTR(-EINVAL);
+ }
+ }
} else {
/* being cautious here */
if (init_attr->qp_type != IB_QPT_XRC_TGT &&
@@ -1250,11 +1688,12 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
}
/* fall through */
+ case IB_QPT_RAW_PACKET:
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
case IB_QPT_SMI:
- case IB_QPT_GSI:
+ case MLX5_IB_QPT_HW_GSI:
case MLX5_IB_QPT_REG_UMR:
qp = kzalloc(sizeof(*qp), GFP_KERNEL);
if (!qp)
@@ -1272,19 +1711,22 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
else if (is_qp1(init_attr->qp_type))
qp->ibqp.qp_num = 1;
else
- qp->ibqp.qp_num = qp->mqp.qpn;
+ qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
- qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn,
+ qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
+ to_mcq(init_attr->recv_cq)->mcq.cqn,
to_mcq(init_attr->send_cq)->mcq.cqn);
- qp->xrcdn = xrcdn;
+ qp->trans_qp.xrcdn = xrcdn;
break;
+ case IB_QPT_GSI:
+ return mlx5_ib_gsi_create_qp(pd, init_attr);
+
case IB_QPT_RAW_IPV6:
case IB_QPT_RAW_ETHERTYPE:
- case IB_QPT_RAW_PACKET:
case IB_QPT_MAX:
default:
mlx5_ib_dbg(dev, "unsupported qp type %d\n",
@@ -1301,6 +1743,9 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp)
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_ib_qp *mqp = to_mqp(qp);
+ if (unlikely(qp->qp_type == IB_QPT_GSI))
+ return mlx5_ib_gsi_destroy_qp(qp);
+
destroy_qp_common(dev, mqp);
kfree(mqp);
@@ -1318,12 +1763,12 @@ static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_att
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
dest_rd_atomic = attr->max_dest_rd_atomic;
else
- dest_rd_atomic = qp->resp_depth;
+ dest_rd_atomic = qp->trans_qp.resp_depth;
if (attr_mask & IB_QP_ACCESS_FLAGS)
access_flags = attr->qp_access_flags;
else
- access_flags = qp->atomic_rd_en;
+ access_flags = qp->trans_qp.atomic_rd_en;
if (!dest_rd_atomic)
access_flags &= IB_ACCESS_REMOTE_WRITE;
@@ -1360,21 +1805,42 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
return rate + MLX5_STAT_RATE_OFFSET;
}
-static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
+static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
+ struct mlx5_ib_sq *sq, u8 sl)
+{
+ void *in;
+ void *tisc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
+
+ tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
+ MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
+
+ err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ const struct ib_ah_attr *ah,
struct mlx5_qp_path *path, u8 port, int attr_mask,
u32 path_flags, const struct ib_qp_attr *attr)
{
+ enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
int err;
- path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
- path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
-
if (attr_mask & IB_QP_PKEY_INDEX)
path->pkey_index = attr->pkey_index;
- path->grh_mlid = ah->src_path_bits & 0x7f;
- path->rlid = cpu_to_be16(ah->dlid);
-
if (ah->ah_flags & IB_AH_GRH) {
if (ah->grh.sgid_index >=
dev->mdev->port_caps[port - 1].gid_table_len) {
@@ -1383,7 +1849,27 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
dev->mdev->port_caps[port - 1].gid_table_len);
return -EINVAL;
}
- path->grh_mlid |= 1 << 7;
+ }
+
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ if (!(ah->ah_flags & IB_AH_GRH))
+ return -EINVAL;
+ memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
+ path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
+ ah->grh.sgid_index);
+ path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
+ } else {
+ path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
+ path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 :
+ 0;
+ path->rlid = cpu_to_be16(ah->dlid);
+ path->grh_mlid = ah->src_path_bits & 0x7f;
+ if (ah->ah_flags & IB_AH_GRH)
+ path->grh_mlid |= 1 << 7;
+ path->dci_cfi_prio_sl = ah->sl & 0xf;
+ }
+
+ if (ah->ah_flags & IB_AH_GRH) {
path->mgid_index = ah->grh.sgid_index;
path->hop_limit = ah->grh.hop_limit;
path->tclass_flowlabel =
@@ -1401,7 +1887,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
if (attr_mask & IB_QP_TIMEOUT)
path->ackto_lt = attr->timeout << 3;
- path->sl = ah->sl & 0xf;
+ if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
+ return modify_raw_packet_eth_prio(dev->mdev,
+ &qp->raw_packet_qp.sq,
+ ah->sl & 0xf);
return 0;
}
@@ -1549,12 +2038,154 @@ static int ib_mask_to_mlx5_opt(int ib_mask)
return result;
}
+static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev,
+ struct mlx5_ib_rq *rq, int new_state)
+{
+ void *in;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+ MLX5_SET(rqc, rqc, state, new_state);
+
+ err = mlx5_core_modify_rq(dev, rq->base.mqp.qpn, in, inlen);
+ if (err)
+ goto out;
+
+ rq->state = new_state;
+
+out:
+ kvfree(in);
+ return err;
+}
+
+static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
+ struct mlx5_ib_sq *sq, int new_state)
+{
+ void *in;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_sq_in, in, sq_state, sq->state);
+
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+ MLX5_SET(sqc, sqc, state, new_state);
+
+ err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
+ if (err)
+ goto out;
+
+ sq->state = new_state;
+
+out:
+ kvfree(in);
+ return err;
+}
+
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ u16 operation)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ int rq_state;
+ int sq_state;
+ int err;
+
+ switch (operation) {
+ case MLX5_CMD_OP_RST2INIT_QP:
+ rq_state = MLX5_RQC_STATE_RDY;
+ sq_state = MLX5_SQC_STATE_RDY;
+ break;
+ case MLX5_CMD_OP_2ERR_QP:
+ rq_state = MLX5_RQC_STATE_ERR;
+ sq_state = MLX5_SQC_STATE_ERR;
+ break;
+ case MLX5_CMD_OP_2RST_QP:
+ rq_state = MLX5_RQC_STATE_RST;
+ sq_state = MLX5_SQC_STATE_RST;
+ break;
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ /* Nothing to do here... */
+ return 0;
+ default:
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ if (qp->rq.wqe_cnt) {
+ err = modify_raw_packet_qp_rq(dev->mdev, rq, rq_state);
+ if (err)
+ return err;
+ }
+
+ if (qp->sq.wqe_cnt)
+ return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
+
+ return 0;
+}
+
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
{
+ static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
+ [MLX5_QP_STATE_RST] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP,
+ },
+ [MLX5_QP_STATE_INIT] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP,
+ [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP,
+ },
+ [MLX5_QP_STATE_RTR] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP,
+ },
+ [MLX5_QP_STATE_RTS] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP,
+ },
+ [MLX5_QP_STATE_SQD] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ },
+ [MLX5_QP_STATE_SQER] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP,
+ },
+ [MLX5_QP_STATE_ERR] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ }
+ };
+
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_qp_context *context;
struct mlx5_modify_qp_mbox_in *in;
@@ -1564,6 +2195,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
int sqd_event;
int mlx5_st;
int err;
+ u16 op;
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
@@ -1571,8 +2203,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
context = &in->ctx;
err = to_mlx5_st(ibqp->qp_type);
- if (err < 0)
+ if (err < 0) {
+ mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type);
goto out;
+ }
context->flags = cpu_to_be32(err << 16);
@@ -1592,7 +2226,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
}
}
- if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
+ if (is_sqp(ibqp->qp_type)) {
context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
} else if (ibqp->qp_type == IB_QPT_UD ||
ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
@@ -1623,7 +2257,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
context->pri_path.port = attr->port_num;
if (attr_mask & IB_QP_AV) {
- err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
+ err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path,
attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
attr_mask, 0, attr);
if (err)
@@ -1634,7 +2268,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
context->pri_path.ackto_lt |= attr->timeout << 3;
if (attr_mask & IB_QP_ALT_PATH) {
- err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
+ err = mlx5_set_path(dev, qp, &attr->alt_ah_attr,
+ &context->alt_path,
attr->alt_port_num, attr_mask, 0, attr);
if (err)
goto out;
@@ -1693,6 +2328,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
context->sq_crq_size |= cpu_to_be16(1 << 4);
+ if (qp->flags & MLX5_IB_QP_SQPN_QP1)
+ context->deth_sqpn = cpu_to_be32(1);
mlx5_cur = to_mlx5_state(cur_state);
mlx5_new = to_mlx5_state(new_state);
@@ -1706,41 +2343,51 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
* again to RTS, and may cause the driver and the device to get out of
* sync. */
if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
- (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
+ (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
+ (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
mlx5_ib_qp_disable_pagefaults(qp);
+ if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
+ !optab[mlx5_cur][mlx5_new])
+ goto out;
+
+ op = optab[mlx5_cur][mlx5_new];
optpar = ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
in->optparam = cpu_to_be32(optpar);
- err = mlx5_core_qp_modify(dev->mdev, to_mlx5_state(cur_state),
- to_mlx5_state(new_state), in, sqd_event,
- &qp->mqp);
+
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
+ err = modify_raw_packet_qp(dev, qp, op);
+ else
+ err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
+ &base->mqp);
if (err)
goto out;
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
+ (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
mlx5_ib_qp_enable_pagefaults(qp);
qp->state = new_state;
if (attr_mask & IB_QP_ACCESS_FLAGS)
- qp->atomic_rd_en = attr->qp_access_flags;
+ qp->trans_qp.atomic_rd_en = attr->qp_access_flags;
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- qp->resp_depth = attr->max_dest_rd_atomic;
+ qp->trans_qp.resp_depth = attr->max_dest_rd_atomic;
if (attr_mask & IB_QP_PORT)
qp->port = attr->port_num;
if (attr_mask & IB_QP_ALT_PATH)
- qp->alt_port = attr->alt_port_num;
+ qp->trans_qp.alt_port = attr->alt_port_num;
/*
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
if (new_state == IB_QPS_RESET && !ibqp->uobject) {
- mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
ibqp->srq ? to_msrq(ibqp->srq) : NULL);
if (send_cq != recv_cq)
- mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+ mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL);
qp->rq.head = 0;
qp->rq.tail = 0;
@@ -1762,41 +2409,68 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ enum ib_qp_type qp_type;
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
int port;
+ enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
+
+ if (unlikely(ibqp->qp_type == IB_QPT_GSI))
+ return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
+
+ qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ?
+ IB_QPT_GSI : ibqp->qp_type;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
- !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_UNSPECIFIED))
+ if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
+ port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
+ }
+
+ if (qp_type != MLX5_IB_QPT_REG_UMR &&
+ !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) {
+ mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
+ cur_state, new_state, ibqp->qp_type, attr_mask);
goto out;
+ }
if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 ||
- attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports)))
+ attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) {
+ mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
+ attr->port_num, dev->num_ports);
goto out;
+ }
if (attr_mask & IB_QP_PKEY_INDEX) {
port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
if (attr->pkey_index >=
- dev->mdev->port_caps[port - 1].pkey_table_len)
+ dev->mdev->port_caps[port - 1].pkey_table_len) {
+ mlx5_ib_dbg(dev, "invalid pkey index %d\n",
+ attr->pkey_index);
goto out;
+ }
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
attr->max_rd_atomic >
- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp)))
+ (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) {
+ mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
+ attr->max_rd_atomic);
goto out;
+ }
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
attr->max_dest_rd_atomic >
- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp)))
+ (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) {
+ mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
+ attr->max_dest_rd_atomic);
goto out;
+ }
if (cur_state == new_state && cur_state == IB_QPS_RESET) {
err = 0;
@@ -1835,6 +2509,59 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
rseg->reserved = 0;
}
+static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
+ struct ib_send_wr *wr, void *qend,
+ struct mlx5_ib_qp *qp, int *size)
+{
+ void *seg = eseg;
+
+ memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
+
+ if (wr->send_flags & IB_SEND_IP_CSUM)
+ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
+ MLX5_ETH_WQE_L4_CSUM;
+
+ seg += sizeof(struct mlx5_wqe_eth_seg);
+ *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
+
+ if (wr->opcode == IB_WR_LSO) {
+ struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
+ int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start);
+ u64 left, leftlen, copysz;
+ void *pdata = ud_wr->header;
+
+ left = ud_wr->hlen;
+ eseg->mss = cpu_to_be16(ud_wr->mss);
+ eseg->inline_hdr_sz = cpu_to_be16(left);
+
+ /*
+ * check if there is space till the end of queue, if yes,
+ * copy all in one shot, otherwise copy till the end of queue,
+ * rollback and than the copy the left
+ */
+ leftlen = qend - (void *)eseg->inline_hdr_start;
+ copysz = min_t(u64, leftlen, left);
+
+ memcpy(seg - size_of_inl_hdr_start, pdata, copysz);
+
+ if (likely(copysz > size_of_inl_hdr_start)) {
+ seg += ALIGN(copysz - size_of_inl_hdr_start, 16);
+ *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16;
+ }
+
+ if (unlikely(copysz < left)) { /* the last wqe in the queue */
+ seg = mlx5_get_send_wqe(qp, 0);
+ left -= copysz;
+ pdata += copysz;
+ memcpy(seg, pdata, left);
+ seg += ALIGN(left, 16);
+ *size += ALIGN(left, 16) / 16;
+ }
+ }
+
+ return seg;
+}
+
static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
struct ib_send_wr *wr)
{
@@ -1902,6 +2629,11 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
int ndescs = mr->ndescs;
memset(umr, 0, sizeof(*umr));
+
+ if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+ /* KLMs take twice the size of MTTs */
+ ndescs *= 2;
+
umr->flags = MLX5_UMR_CHECK_NOT_FREE;
umr->klm_octowords = get_klm_octo(ndescs);
umr->mkey_mask = frwr_mkey_mask();
@@ -1951,6 +2683,44 @@ static __be64 get_umr_update_mtt_mask(void)
return cpu_to_be64(result);
}
+static __be64 get_umr_update_translation_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LEN |
+ MLX5_MKEY_MASK_PAGE_SIZE |
+ MLX5_MKEY_MASK_START_ADDR |
+ MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_FREE;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_access_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_RR |
+ MLX5_MKEY_MASK_RW |
+ MLX5_MKEY_MASK_A |
+ MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_FREE;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_pd_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_PD |
+ MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_FREE;
+
+ return cpu_to_be64(result);
+}
+
static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
struct ib_send_wr *wr)
{
@@ -1969,9 +2739,15 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
umr->mkey_mask = get_umr_update_mtt_mask();
umr->bsf_octowords = get_klm_octo(umrwr->target.offset);
umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
- } else {
- umr->mkey_mask = get_umr_reg_mr_mask();
}
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
+ umr->mkey_mask |= get_umr_update_translation_mask();
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_ACCESS)
+ umr->mkey_mask |= get_umr_update_access_mask();
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD)
+ umr->mkey_mask |= get_umr_update_pd_mask();
+ if (!umr->mkey_mask)
+ umr->mkey_mask = get_umr_reg_mr_mask();
} else {
umr->mkey_mask = get_umr_unreg_mr_mask();
}
@@ -1996,13 +2772,19 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
int ndescs = ALIGN(mr->ndescs, 8) >> 1;
memset(seg, 0, sizeof(*seg));
- seg->flags = get_umr_flags(access) | MLX5_ACCESS_MODE_MTT;
+
+ if (mr->access_mode == MLX5_ACCESS_MODE_MTT)
+ seg->log2_page_size = ilog2(mr->ibmr.page_size);
+ else if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+ /* KLMs take twice the size of MTTs */
+ ndescs *= 2;
+
+ seg->flags = get_umr_flags(access) | mr->access_mode;
seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
seg->start_addr = cpu_to_be64(mr->ibmr.iova);
seg->len = cpu_to_be64(mr->ibmr.length);
seg->xlt_oct_size = cpu_to_be32(ndescs);
- seg->log2_page_size = ilog2(mr->ibmr.page_size);
}
static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
@@ -2023,7 +2805,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
seg->flags = convert_access(umrwr->access_flags);
if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) {
- seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
+ if (umrwr->pd)
+ seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
seg->start_addr = cpu_to_be64(umrwr->target.virt_addr);
}
seg->len = cpu_to_be64(umrwr->length);
@@ -2570,7 +3353,7 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
mlx5_opcode | ((u32)opmod << 24));
- ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
+ ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
ctrl->fm_ce_se |= fence;
qp->fm_cache = next_fence;
if (unlikely(qp->wq_sig))
@@ -2589,13 +3372,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
{
struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_ib_qp *qp;
struct mlx5_ib_mr *mr;
struct mlx5_wqe_data_seg *dpseg;
struct mlx5_wqe_xrc_seg *xrc;
- struct mlx5_bf *bf = qp->bf;
+ struct mlx5_bf *bf;
int uninitialized_var(size);
- void *qend = qp->sq.qend;
+ void *qend;
unsigned long flags;
unsigned idx;
int err = 0;
@@ -2607,6 +3390,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
u8 next_fence = 0;
u8 fence;
+ if (unlikely(ibqp->qp_type == IB_QPT_GSI))
+ return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
+
+ qp = to_mqp(ibqp);
+ bf = qp->bf;
+ qend = qp->sq.qend;
+
spin_lock_irqsave(&qp->sq.lock, flags);
for (nreq = 0; wr; nreq++, wr = wr->next) {
@@ -2766,16 +3556,37 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
break;
- case IB_QPT_UD:
case IB_QPT_SMI:
- case IB_QPT_GSI:
+ case MLX5_IB_QPT_HW_GSI:
set_datagram_seg(seg, wr);
seg += sizeof(struct mlx5_wqe_datagram_seg);
size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
if (unlikely((seg == qend)))
seg = mlx5_get_send_wqe(qp, 0);
break;
+ case IB_QPT_UD:
+ set_datagram_seg(seg, wr);
+ seg += sizeof(struct mlx5_wqe_datagram_seg);
+ size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
+
+ if (unlikely((seg == qend)))
+ seg = mlx5_get_send_wqe(qp, 0);
+
+ /* handle qp that supports ud offload */
+ if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
+ struct mlx5_wqe_eth_pad *pad;
+
+ pad = seg;
+ memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
+ seg += sizeof(struct mlx5_wqe_eth_pad);
+ size += sizeof(struct mlx5_wqe_eth_pad) / 16;
+
+ seg = set_eth_seg(seg, wr, qend, qp, &size);
+ if (unlikely((seg == qend)))
+ seg = mlx5_get_send_wqe(qp, 0);
+ }
+ break;
case MLX5_IB_QPT_REG_UMR:
if (wr->opcode != MLX5_IB_WR_UMR) {
err = -EINVAL;
@@ -2895,6 +3706,9 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
int ind;
int i;
+ if (unlikely(ibqp->qp_type == IB_QPT_GSI))
+ return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
+
spin_lock_irqsave(&qp->rq.lock, flags);
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
@@ -3003,7 +3817,7 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
return;
- ib_ah_attr->sl = path->sl & 0xf;
+ ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
ib_ah_attr->dlid = be16_to_cpu(path->rlid);
ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
@@ -3021,39 +3835,153 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
}
}
-int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
+static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq,
+ u8 *sq_state)
+{
+ void *out;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(query_sq_out);
+ out = mlx5_vzalloc(inlen);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out);
+ if (err)
+ goto out;
+
+ sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
+ *sq_state = MLX5_GET(sqc, sqc, state);
+ sq->state = *sq_state;
+
+out:
+ kvfree(out);
+ return err;
+}
+
+static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq,
+ u8 *rq_state)
+{
+ void *out;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(query_rq_out);
+ out = mlx5_vzalloc(inlen);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_rq(dev->mdev, rq->base.mqp.qpn, out);
+ if (err)
+ goto out;
+
+ rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
+ *rq_state = MLX5_GET(rqc, rqc, state);
+ rq->state = *rq_state;
+
+out:
+ kvfree(out);
+ return err;
+}
+
+static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
+ struct mlx5_ib_qp *qp, u8 *qp_state)
+{
+ static const u8 sqrq_trans[MLX5_RQ_NUM_STATE][MLX5_SQ_NUM_STATE] = {
+ [MLX5_RQC_STATE_RST] = {
+ [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE_BAD,
+ [MLX5_SQ_STATE_NA] = IB_QPS_RESET,
+ },
+ [MLX5_RQC_STATE_RDY] = {
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
+ [MLX5_SQC_STATE_ERR] = IB_QPS_SQE,
+ [MLX5_SQ_STATE_NA] = MLX5_QP_STATE,
+ },
+ [MLX5_RQC_STATE_ERR] = {
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_ERR] = IB_QPS_ERR,
+ [MLX5_SQ_STATE_NA] = IB_QPS_ERR,
+ },
+ [MLX5_RQ_STATE_NA] = {
+ [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
+ [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE,
+ [MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD,
+ },
+ };
+
+ *qp_state = sqrq_trans[rq_state][sq_state];
+
+ if (*qp_state == MLX5_QP_STATE_BAD) {
+ WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x",
+ qp->raw_packet_qp.sq.base.mqp.qpn, sq_state,
+ qp->raw_packet_qp.rq.base.mqp.qpn, rq_state);
+ return -EINVAL;
+ }
+
+ if (*qp_state == MLX5_QP_STATE)
+ *qp_state = qp->state;
+
+ return 0;
+}
+
+static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
+ u8 *raw_packet_qp_state)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ int err;
+ u8 sq_state = MLX5_SQ_STATE_NA;
+ u8 rq_state = MLX5_RQ_STATE_NA;
+
+ if (qp->sq.wqe_cnt) {
+ err = query_raw_packet_qp_sq_state(dev, sq, &sq_state);
+ if (err)
+ return err;
+ }
+
+ if (qp->rq.wqe_cnt) {
+ err = query_raw_packet_qp_rq_state(dev, rq, &rq_state);
+ if (err)
+ return err;
+ }
+
+ return sqrq_state_to_qp_state(sq_state, rq_state, qp,
+ raw_packet_qp_state);
+}
+
+static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct ib_qp_attr *qp_attr)
{
- struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_query_qp_mbox_out *outb;
struct mlx5_qp_context *context;
int mlx5_state;
int err = 0;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- /*
- * Wait for any outstanding page faults, in case the user frees memory
- * based upon this query's result.
- */
- flush_workqueue(mlx5_ib_page_fault_wq);
-#endif
-
- mutex_lock(&qp->mutex);
outb = kzalloc(sizeof(*outb), GFP_KERNEL);
- if (!outb) {
- err = -ENOMEM;
- goto out;
- }
+ if (!outb)
+ return -ENOMEM;
+
context = &outb->ctx;
- err = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb, sizeof(*outb));
+ err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
+ sizeof(*outb));
if (err)
- goto out_free;
+ goto out;
mlx5_state = be32_to_cpu(context->flags) >> 28;
qp->state = to_ib_qp_state(mlx5_state);
- qp_attr->qp_state = qp->state;
qp_attr->path_mtu = context->mtu_msgmax >> 5;
qp_attr->path_mig_state =
to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
@@ -3087,6 +4015,47 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7;
qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3;
+
+out:
+ kfree(outb);
+ return err;
+}
+
+int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ int err = 0;
+ u8 raw_packet_qp_state;
+
+ if (unlikely(ibqp->qp_type == IB_QPT_GSI))
+ return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
+ qp_init_attr);
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ /*
+ * Wait for any outstanding page faults, in case the user frees memory
+ * based upon this query's result.
+ */
+ flush_workqueue(mlx5_ib_page_fault_wq);
+#endif
+
+ mutex_lock(&qp->mutex);
+
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
+ if (err)
+ goto out;
+ qp->state = raw_packet_qp_state;
+ qp_attr->port_num = 1;
+ } else {
+ err = query_qp_attr(dev, qp, qp_attr);
+ if (err)
+ goto out;
+ }
+
+ qp_attr->qp_state = qp->state;
qp_attr->cur_qp_state = qp_attr->qp_state;
qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
qp_attr->cap.max_recv_sge = qp->rq.max_gs;
@@ -3110,12 +4079,18 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL;
+ if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+ qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
+ if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+ qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
+ if (qp->flags & MLX5_IB_QP_SQPN_QP1)
+ qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1();
+
qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
-out_free:
- kfree(outb);
-
out:
mutex_unlock(&qp->mutex);
return err;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index e008505e96e9..3b2ddd64a371 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -75,31 +75,42 @@ static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
struct mlx5_create_srq_mbox_in **in,
- struct ib_udata *udata, int buf_size, int *inlen)
+ struct ib_udata *udata, int buf_size, int *inlen,
+ int is_xrc)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_ib_create_srq ucmd;
+ struct mlx5_ib_create_srq ucmd = {};
size_t ucmdlen;
+ void *xsrqc;
int err;
int npages;
int page_shift;
int ncont;
u32 offset;
+ u32 uidx = MLX5_IB_DEFAULT_UIDX;
- ucmdlen =
- (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
- sizeof(ucmd)) ? (sizeof(ucmd) -
- sizeof(ucmd.reserved)) : sizeof(ucmd);
+ ucmdlen = min(udata->inlen, sizeof(ucmd));
if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
mlx5_ib_dbg(dev, "failed copy udata\n");
return -EFAULT;
}
- if (ucmdlen == sizeof(ucmd) &&
- ucmd.reserved != 0)
+ if (ucmd.reserved0 || ucmd.reserved1)
return -EINVAL;
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd)))
+ return -EINVAL;
+
+ if (is_xrc) {
+ err = get_srq_user_index(to_mucontext(pd->uobject->context),
+ &ucmd, udata->inlen, &uidx);
+ if (err)
+ return err;
+ }
+
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
@@ -138,6 +149,13 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
+ if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
+ is_xrc){
+ xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+ xrc_srq_context_entry);
+ MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
+ }
+
return 0;
err_in:
@@ -151,13 +169,14 @@ err_umem:
static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
struct mlx5_create_srq_mbox_in **in, int buf_size,
- int *inlen)
+ int *inlen, int is_xrc)
{
int err;
int i;
struct mlx5_wqe_srq_next_seg *next;
int page_shift;
int npages;
+ void *xsrqc;
err = mlx5_db_alloc(dev->mdev, &srq->db);
if (err) {
@@ -204,6 +223,14 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
+ is_xrc){
+ xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+ xrc_srq_context_entry);
+ /* 0xffffff means we ask to work with cqe version 0 */
+ MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX);
+ }
+
return 0;
err_in:
@@ -275,10 +302,14 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
srq->msrq.max_avail_gather);
+ is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
+
if (pd->uobject)
- err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen);
+ err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen,
+ is_xrc);
else
- err = create_srq_kernel(dev, srq, &in, buf_size, &inlen);
+ err = create_srq_kernel(dev, srq, &in, buf_size, &inlen,
+ is_xrc);
if (err) {
mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
@@ -286,7 +317,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
goto err_srq;
}
- is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
in->ctx.state_log_sz = ilog2(srq->msrq.max);
flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
xrcdn = 0;
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
index 76fb7b927d37..61bc308bb802 100644
--- a/drivers/infiniband/hw/mlx5/user.h
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -35,6 +35,8 @@
#include <linux/types.h>
+#include "mlx5_ib.h"
+
enum {
MLX5_QP_FLAG_SIGNATURE = 1 << 0,
MLX5_QP_FLAG_SCATTER_CQE = 1 << 1,
@@ -66,7 +68,15 @@ struct mlx5_ib_alloc_ucontext_req_v2 {
__u32 total_num_uuars;
__u32 num_low_latency_uuars;
__u32 flags;
- __u32 reserved;
+ __u32 comp_mask;
+ __u8 max_cqe_version;
+ __u8 reserved0;
+ __u16 reserved1;
+ __u32 reserved2;
+};
+
+enum mlx5_ib_alloc_ucontext_resp_mask {
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
};
struct mlx5_ib_alloc_ucontext_resp {
@@ -80,7 +90,13 @@ struct mlx5_ib_alloc_ucontext_resp {
__u32 max_recv_wr;
__u32 max_srq_recv_wr;
__u16 num_ports;
- __u16 reserved;
+ __u16 reserved1;
+ __u32 comp_mask;
+ __u32 response_length;
+ __u8 cqe_version;
+ __u8 reserved2;
+ __u16 reserved3;
+ __u64 hca_core_clock_offset;
};
struct mlx5_ib_alloc_pd_resp {
@@ -110,7 +126,9 @@ struct mlx5_ib_create_srq {
__u64 buf_addr;
__u64 db_addr;
__u32 flags;
- __u32 reserved; /* explicit padding (optional on i386) */
+ __u32 reserved0; /* explicit padding (optional on i386) */
+ __u32 uidx;
+ __u32 reserved1;
};
struct mlx5_ib_create_srq_resp {
@@ -125,9 +143,55 @@ struct mlx5_ib_create_qp {
__u32 rq_wqe_count;
__u32 rq_wqe_shift;
__u32 flags;
+ __u32 uidx;
+ __u32 reserved0;
+ __u64 sq_buf_addr;
};
struct mlx5_ib_create_qp_resp {
__u32 uuar_index;
};
+
+struct mlx5_ib_alloc_mw {
+ __u32 comp_mask;
+ __u8 num_klms;
+ __u8 reserved1;
+ __u16 reserved2;
+};
+
+static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
+ struct mlx5_ib_create_qp *ucmd,
+ int inlen,
+ u32 *user_index)
+{
+ u8 cqe_version = ucontext->cqe_version;
+
+ if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
+ !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+ return 0;
+
+ if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
+ !!cqe_version))
+ return -EINVAL;
+
+ return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
+
+static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
+ struct mlx5_ib_create_srq *ucmd,
+ int inlen,
+ u32 *user_index)
+{
+ u8 cqe_version = ucontext->cqe_version;
+
+ if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
+ !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+ return 0;
+
+ if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
+ !!cqe_version))
+ return -EINVAL;
+
+ return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
#endif /* MLX5_IB_USER_H */
OpenPOWER on IntegriCloud