summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/Makefile1
-rw-r--r--drivers/infiniband/hw/bnxt_re/Kconfig12
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h1
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c2
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c44
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h3
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c146
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c14
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c17
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h11
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c2
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h8
-rw-r--r--drivers/infiniband/hw/cxgb3/Kconfig19
-rw-r--r--drivers/infiniband/hw/cxgb3/Makefile7
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c1312
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h204
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c344
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.h69
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h802
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c282
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h155
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c2258
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h233
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c230
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_ev.c232
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c101
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c1326
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h347
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c1082
-rw-r--r--drivers/infiniband/hw/cxgb3/tcb.h632
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c38
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c30
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c31
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c14
-rw-r--r--drivers/infiniband/hw/efa/efa.h16
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h66
-rw-r--r--drivers/infiniband/hw/efa/efa_com.c75
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.c199
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.h42
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c19
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c462
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c2
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c200
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h9
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h1
-rw-r--r--drivers/infiniband/hw/hfi1/common.h3
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c2
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c237
-rw-r--r--drivers/infiniband/hw/hfi1/fault.c12
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c66
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h202
-rw-r--r--drivers/infiniband/hw/hfi1/init.c88
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.c4
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c62
-rw-r--r--drivers/infiniband/hw/hfi1/msix.c106
-rw-r--r--drivers/infiniband/hw/hfi1/msix.h1
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c8
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c2
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c33
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c16
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c155
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.h3
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ctxts.h2
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rx.h15
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tid.h46
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tx.h2
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c151
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.h3
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c7
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c17
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h6
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c29
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h9
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c2
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c4
-rw-r--r--drivers/infiniband/hw/hns/Kconfig9
-rw-r--r--drivers/infiniband/hw/hns/Makefile8
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c35
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c11
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h14
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c454
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_db.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h190
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c252
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h8
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c166
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c1987
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h170
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c134
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c505
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_pd.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c479
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c10
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c357
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c34
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c29
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c22
-rw-r--r--drivers/infiniband/hw/mlx4/doorbell.c3
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c34
-rw-r--r--drivers/infiniband/hw/mlx4/main.c50
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h8
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c15
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c246
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c3
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c140
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h6
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c41
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c288
-rw-r--r--drivers/infiniband/hw/mlx5/doorbell.c3
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c63
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c5
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c2
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.h2
-rw-r--r--drivers/infiniband/hw/mlx5/ib_virt.c24
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c124
-rw-r--r--drivers/infiniband/hw/mlx5/main.c831
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c236
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h171
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c360
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c1185
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c291
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.c90
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c2
-rw-r--r--drivers/infiniband/hw/mlx5/srq_cmd.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c74
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c8
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c33
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h11
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c1
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c9
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.h3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c12
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h2
-rw-r--r--drivers/infiniband/hw/qedr/main.c9
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h74
-rw-r--r--drivers/infiniband/hw/qedr/qedr_iw_cm.c150
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c666
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h12
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c38
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c7
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h5
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c10
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c9
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c7
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c4
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h15
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c124
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c4
166 files changed, 7688 insertions, 15177 deletions
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index 433fca59febd..0aeccd984889 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/
obj-$(CONFIG_INFINIBAND_QIB) += qib/
-obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/
obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/
obj-$(CONFIG_INFINIBAND_EFA) += efa/
obj-$(CONFIG_INFINIBAND_I40IW) += i40iw/
diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig
index ab8779d23382..b83f1cc38c52 100644
--- a/drivers/infiniband/hw/bnxt_re/Kconfig
+++ b/drivers/infiniband/hw/bnxt_re/Kconfig
@@ -1,11 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_BNXT_RE
- tristate "Broadcom Netxtreme HCA support"
- depends on 64BIT
- depends on ETHERNET && NETDEVICES && PCI && INET && DCB
- select NET_VENDOR_BROADCOM
- select BNXT
- ---help---
+ tristate "Broadcom Netxtreme HCA support"
+ depends on 64BIT
+ depends on ETHERNET && NETDEVICES && PCI && INET && DCB
+ select NET_VENDOR_BROADCOM
+ select BNXT
+ ---help---
This driver supports Broadcom NetXtreme-E 10/25/40/50 gigabit
RoCE HCAs. To compile this driver as a module, choose M here:
the module will be called bnxt_re.
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index e55a1666c0cd..725b2350e349 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -108,6 +108,7 @@ struct bnxt_re_sqp_entries {
#define BNXT_RE_MAX_MSIX 9
#define BNXT_RE_AEQ_IDX 0
#define BNXT_RE_NQ_IDX 1
+#define BNXT_RE_GEN_P5_MAX_VF 64
struct bnxt_re_dev {
struct ib_device ibdev;
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index 604b71875f5f..3421a0b15983 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -74,7 +74,7 @@ static const char * const bnxt_re_stat_name[] = {
[BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd",
[BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded",
[BNXT_RE_RNR_NAKS_RCVD] = "rnr_naks_rcvd",
- [BNXT_RE_MISSING_RESP] = "missin_resp",
+ [BNXT_RE_MISSING_RESP] = "missing_resp",
[BNXT_RE_UNRECOVERABLE_ERR] = "unrecoverable_err",
[BNXT_RE_BAD_RESP_ERR] = "bad_resp_err",
[BNXT_RE_LOCAL_QP_OP_ERR] = "local_qp_op_err",
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 098ab883733e..52b6a4d85460 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -191,24 +191,6 @@ int bnxt_re_query_device(struct ib_device *ibdev,
return 0;
}
-int bnxt_re_modify_device(struct ib_device *ibdev,
- int device_modify_mask,
- struct ib_device_modify *device_modify)
-{
- switch (device_modify_mask) {
- case IB_DEVICE_MODIFY_SYS_IMAGE_GUID:
- /* Modify the GUID requires the modification of the GID table */
- /* GUID should be made as READ-ONLY */
- break;
- case IB_DEVICE_MODIFY_NODE_DESC:
- /* Node Desc should be made as READ-ONLY */
- break;
- default:
- break;
- }
- return 0;
-}
-
/* Port */
int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr *port_attr)
@@ -220,10 +202,10 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
if (netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev)) {
port_attr->state = IB_PORT_ACTIVE;
- port_attr->phys_state = 5;
+ port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
} else {
port_attr->state = IB_PORT_DOWN;
- port_attr->phys_state = 3;
+ port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
}
port_attr->max_mtu = IB_MTU_4096;
port_attr->active_mtu = iboe_get_mtu(rdev->netdev->mtu);
@@ -855,7 +837,8 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
bytes += (qplib_qp->sq.max_wqe * psn_sz);
}
bytes = PAGE_ALIGN(bytes);
- umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE, 1);
+ umem = ib_umem_get(&rdev->ibdev, ureq.qpsva, bytes,
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem))
return PTR_ERR(umem);
@@ -868,8 +851,8 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
if (!qp->qplib_qp.srq) {
bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
bytes = PAGE_ALIGN(bytes);
- umem = ib_umem_get(udata, ureq.qprva, bytes,
- IB_ACCESS_LOCAL_WRITE, 1);
+ umem = ib_umem_get(&rdev->ibdev, ureq.qprva, bytes,
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem))
goto rqfail;
qp->rumem = umem;
@@ -1322,7 +1305,8 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
bytes = PAGE_ALIGN(bytes);
- umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE, 1);
+ umem = ib_umem_get(&rdev->ibdev, ureq.srqva, bytes,
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem))
return PTR_ERR(umem);
@@ -1398,7 +1382,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
dev_err(rdev_to_dev(rdev), "SRQ copy to udata failed!");
bnxt_qplib_destroy_srq(&rdev->qplib_res,
&srq->qplib_srq);
- goto exit;
+ goto fail;
}
}
if (nq)
@@ -2563,9 +2547,9 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
goto fail;
}
- cq->umem = ib_umem_get(udata, req.cq_va,
+ cq->umem = ib_umem_get(&rdev->ibdev, req.cq_va,
entries * sizeof(struct cq_base),
- IB_ACCESS_LOCAL_WRITE, 1);
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(cq->umem)) {
rc = PTR_ERR(cq->umem);
goto fail;
@@ -3323,8 +3307,10 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
int rc;
rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
- if (rc)
+ if (rc) {
dev_err(rdev_to_dev(rdev), "Dereg MR failed: %#x\n", rc);
+ return rc;
+ }
if (mr->pages) {
rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
@@ -3530,7 +3516,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
/* The fixed portion of the rkey is the same as the lkey */
mr->ib_mr.rkey = mr->qplib_mr.rkey;
- umem = ib_umem_get(udata, start, length, mr_access_flags, 0);
+ umem = ib_umem_get(&rdev->ibdev, start, length, mr_access_flags);
if (IS_ERR(umem)) {
dev_err(rdev_to_dev(rdev), "Failed to get umem");
rc = -EFAULT;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 31662b1ee35a..23d972da5652 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -145,9 +145,6 @@ struct bnxt_re_ucontext {
int bnxt_re_query_device(struct ib_device *ibdev,
struct ib_device_attr *ib_attr,
struct ib_udata *udata);
-int bnxt_re_modify_device(struct ib_device *ibdev,
- int device_modify_mask,
- struct ib_device_modify *device_modify);
int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr *port_attr);
int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 029babe713f3..793c97251588 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -119,61 +119,76 @@ static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev)
* reserved for the function. The driver may choose to allocate fewer
* resources than the firmware maximum.
*/
-static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
+static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev)
{
- u32 vf_qps = 0, vf_srqs = 0, vf_cqs = 0, vf_mrws = 0, vf_gids = 0;
- u32 i;
- u32 vf_pct;
- u32 num_vfs;
- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ struct bnxt_qplib_dev_attr *attr;
+ struct bnxt_qplib_ctx *ctx;
+ int i;
- rdev->qplib_ctx.qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT,
- dev_attr->max_qp);
+ attr = &rdev->dev_attr;
+ ctx = &rdev->qplib_ctx;
- rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT_256K;
+ ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT,
+ attr->max_qp);
+ ctx->mrw_count = BNXT_RE_MAX_MRW_COUNT_256K;
/* Use max_mr from fw since max_mrw does not get set */
- rdev->qplib_ctx.mrw_count = min_t(u32, rdev->qplib_ctx.mrw_count,
- dev_attr->max_mr);
- rdev->qplib_ctx.srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
- dev_attr->max_srq);
- rdev->qplib_ctx.cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT,
- dev_attr->max_cq);
-
- for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
- rdev->qplib_ctx.tqm_count[i] =
- rdev->dev_attr.tqm_alloc_reqs[i];
-
- if (rdev->num_vfs) {
- /*
- * Reserve a set of resources for the PF. Divide the remaining
- * resources among the VFs
- */
- vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF;
- num_vfs = 100 * rdev->num_vfs;
- vf_qps = (rdev->qplib_ctx.qpc_count * vf_pct) / num_vfs;
- vf_srqs = (rdev->qplib_ctx.srqc_count * vf_pct) / num_vfs;
- vf_cqs = (rdev->qplib_ctx.cq_count * vf_pct) / num_vfs;
- /*
- * The driver allows many more MRs than other resources. If the
- * firmware does also, then reserve a fixed amount for the PF
- * and divide the rest among VFs. VFs may use many MRs for NFS
- * mounts, ISER, NVME applications, etc. If the firmware
- * severely restricts the number of MRs, then let PF have
- * half and divide the rest among VFs, as for the other
- * resource types.
- */
- if (rdev->qplib_ctx.mrw_count < BNXT_RE_MAX_MRW_COUNT_64K)
- vf_mrws = rdev->qplib_ctx.mrw_count * vf_pct / num_vfs;
- else
- vf_mrws = (rdev->qplib_ctx.mrw_count -
- BNXT_RE_RESVD_MR_FOR_PF) / rdev->num_vfs;
- vf_gids = BNXT_RE_MAX_GID_PER_VF;
+ ctx->mrw_count = min_t(u32, ctx->mrw_count, attr->max_mr);
+ ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
+ attr->max_srq);
+ ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq);
+ if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx))
+ for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
+ rdev->qplib_ctx.tqm_count[i] =
+ rdev->dev_attr.tqm_alloc_reqs[i];
+}
+
+static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf)
+{
+ struct bnxt_qplib_vf_res *vf_res;
+ u32 mrws = 0;
+ u32 vf_pct;
+ u32 nvfs;
+
+ vf_res = &qplib_ctx->vf_res;
+ /*
+ * Reserve a set of resources for the PF. Divide the remaining
+ * resources among the VFs
+ */
+ vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF;
+ nvfs = num_vf;
+ num_vf = 100 * num_vf;
+ vf_res->max_qp_per_vf = (qplib_ctx->qpc_count * vf_pct) / num_vf;
+ vf_res->max_srq_per_vf = (qplib_ctx->srqc_count * vf_pct) / num_vf;
+ vf_res->max_cq_per_vf = (qplib_ctx->cq_count * vf_pct) / num_vf;
+ /*
+ * The driver allows many more MRs than other resources. If the
+ * firmware does also, then reserve a fixed amount for the PF and
+ * divide the rest among VFs. VFs may use many MRs for NFS
+ * mounts, ISER, NVME applications, etc. If the firmware severely
+ * restricts the number of MRs, then let PF have half and divide
+ * the rest among VFs, as for the other resource types.
+ */
+ if (qplib_ctx->mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) {
+ mrws = qplib_ctx->mrw_count * vf_pct;
+ nvfs = num_vf;
+ } else {
+ mrws = qplib_ctx->mrw_count - BNXT_RE_RESVD_MR_FOR_PF;
}
- rdev->qplib_ctx.vf_res.max_mrw_per_vf = vf_mrws;
- rdev->qplib_ctx.vf_res.max_gid_per_vf = vf_gids;
- rdev->qplib_ctx.vf_res.max_qp_per_vf = vf_qps;
- rdev->qplib_ctx.vf_res.max_srq_per_vf = vf_srqs;
- rdev->qplib_ctx.vf_res.max_cq_per_vf = vf_cqs;
+ vf_res->max_mrw_per_vf = (mrws / nvfs);
+ vf_res->max_gid_per_vf = BNXT_RE_MAX_GID_PER_VF;
+}
+
+static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
+{
+ u32 num_vfs;
+
+ memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res));
+ bnxt_re_limit_pf_res(rdev);
+
+ num_vfs = bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ?
+ BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs;
+ if (num_vfs)
+ bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs);
}
/* for handling bnxt_en callbacks later */
@@ -193,9 +208,11 @@ static void bnxt_re_sriov_config(void *p, int num_vfs)
return;
rdev->num_vfs = num_vfs;
- bnxt_re_set_resource_limits(rdev);
- bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
- &rdev->qplib_ctx);
+ if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) {
+ bnxt_re_set_resource_limits(rdev);
+ bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
+ &rdev->qplib_ctx);
+ }
}
static void bnxt_re_shutdown(void *p)
@@ -477,6 +494,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
req.update_period_ms = cpu_to_le32(1000);
req.stats_dma_addr = cpu_to_le64(dma_map);
+ req.stats_dma_length = cpu_to_le16(sizeof(struct ctx_hw_stats_ext));
req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
@@ -625,7 +643,6 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.map_mr_sg = bnxt_re_map_mr_sg,
.mmap = bnxt_re_mmap,
.modify_ah = bnxt_re_modify_ah,
- .modify_device = bnxt_re_modify_device,
.modify_qp = bnxt_re_modify_qp,
.modify_srq = bnxt_re_modify_srq,
.poll_cq = bnxt_re_poll_cq,
@@ -660,7 +677,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ibdev->node_guid);
- ibdev->num_comp_vectors = 1;
+ ibdev->num_comp_vectors = rdev->num_msix - 1;
ibdev->dev.parent = &rdev->en_dev->pdev->dev;
ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
@@ -895,10 +912,14 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
return 0;
}
+#define BNXT_RE_GEN_P5_PF_NQ_DB 0x10000
+#define BNXT_RE_GEN_P5_VF_NQ_DB 0x4000
static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx)
{
return bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ?
- 0x10000 : rdev->msix_entries[indx].db_offset;
+ (rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB :
+ BNXT_RE_GEN_P5_PF_NQ_DB) :
+ rdev->msix_entries[indx].db_offset;
}
static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
@@ -1270,10 +1291,10 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
return;
}
rdev->qplib_ctx.hwrm_intf_ver =
- (u64)resp.hwrm_intf_major << 48 |
- (u64)resp.hwrm_intf_minor << 32 |
- (u64)resp.hwrm_intf_build << 16 |
- resp.hwrm_intf_patch;
+ (u64)le16_to_cpu(resp.hwrm_intf_major) << 48 |
+ (u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 |
+ (u64)le16_to_cpu(resp.hwrm_intf_build) << 16 |
+ le16_to_cpu(resp.hwrm_intf_patch);
}
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
@@ -1408,8 +1429,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
rdev->is_virtfn);
if (rc)
goto disable_rcfw;
- if (!rdev->is_virtfn)
- bnxt_re_set_resource_limits(rdev);
+
+ bnxt_re_set_resource_limits(rdev);
rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0,
bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx));
@@ -1473,7 +1494,6 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
&rdev->active_width);
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
- bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
return 0;
free_sctx:
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 958c1ff9c515..020f70e6865e 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -442,7 +442,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
goto fail;
}
/* Unconditionally map 8 bytes to support 57500 series */
- nq->bar_reg_iomem = ioremap_nocache(nq_base + nq->bar_reg_off, 8);
+ nq->bar_reg_iomem = ioremap(nq_base + nq->bar_reg_off, 8);
if (!nq->bar_reg_iomem) {
rc = -ENOMEM;
goto fail;
@@ -2283,13 +2283,13 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
/* Add qp to flush list of the CQ */
bnxt_qplib_add_flush_qp(qp);
} else {
+ /* Before we complete, do WA 9060 */
+ if (do_wa9060(qp, cq, cq_cons, sw_sq_cons,
+ cqe_sq_cons)) {
+ *lib_qp = qp;
+ goto out;
+ }
if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
- /* Before we complete, do WA 9060 */
- if (do_wa9060(qp, cq, cq_cons, sw_sq_cons,
- cqe_sq_cons)) {
- *lib_qp = qp;
- goto out;
- }
cqe->status = CQ_REQ_STATUS_OK;
cqe++;
(*budget)--;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 48b04d2f175f..1291b12287a5 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -136,6 +136,13 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
spin_unlock_irqrestore(&cmdq->lock, flags);
return -EBUSY;
}
+
+ size = req->cmd_size;
+ /* change the cmd_size to the number of 16byte cmdq unit.
+ * req->cmd_size is modified here
+ */
+ bnxt_qplib_set_cmd_slots(req);
+
memset(resp, 0, sizeof(*resp));
crsqe->resp = (struct creq_qp_event *)resp;
crsqe->resp->cookie = req->cookie;
@@ -150,7 +157,6 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
cmdq_ptr = (struct bnxt_qplib_cmdqe **)cmdq->pbl_ptr;
preq = (u8 *)req;
- size = req->cmd_size * BNXT_QPLIB_CMDQE_UNITS;
do {
/* Locate the next cmdq slot */
sw_prod = HWQ_CMP(cmdq->prod, cmdq);
@@ -488,8 +494,10 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
* shall setup this area for VF. Skipping the
* HW programming
*/
- if (is_virtfn || bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx))
+ if (is_virtfn)
goto skip_ctx_setup;
+ if (bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx))
+ goto config_vf_res;
level = ctx->qpc_tbl.level;
req.qpc_pg_size_qpc_lvl = (level << CMDQ_INITIALIZE_FW_QPC_LVL_SFT) |
@@ -534,6 +542,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements);
req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements);
+config_vf_res:
req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf);
req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf);
req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf);
@@ -708,7 +717,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
if (!res_base)
return -ENOMEM;
- rcfw->cmdq_bar_reg_iomem = ioremap_nocache(res_base +
+ rcfw->cmdq_bar_reg_iomem = ioremap(res_base +
RCFW_COMM_BASE_OFFSET,
RCFW_COMM_SIZE);
if (!rcfw->cmdq_bar_reg_iomem) {
@@ -730,7 +739,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
"CREQ BAR region %d resc start is 0!\n",
rcfw->creq_bar_reg);
/* Unconditionally map 8 bytes to support 57500 series */
- rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off,
+ rcfw->creq_bar_reg_iomem = ioremap(res_base + cp_bar_reg_off,
8);
if (!rcfw->creq_bar_reg_iomem) {
dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n",
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 2138533bb642..dfeadc192e17 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -55,9 +55,7 @@
do { \
memset(&(req), 0, sizeof((req))); \
(req).opcode = CMDQ_BASE_OPCODE_##CMD; \
- (req).cmd_size = (sizeof((req)) + \
- BNXT_QPLIB_CMDQE_UNITS - 1) / \
- BNXT_QPLIB_CMDQE_UNITS; \
+ (req).cmd_size = sizeof((req)); \
(req).flags = cpu_to_le16(cmd_flags); \
} while (0)
@@ -95,6 +93,13 @@ static inline u32 bnxt_qplib_cmdqe_cnt_per_pg(u32 depth)
BNXT_QPLIB_CMDQE_UNITS);
}
+/* Set the cmd_size to a factor of CMDQE unit */
+static inline void bnxt_qplib_set_cmd_slots(struct cmdq_base *req)
+{
+ req->cmd_size = (req->cmd_size + BNXT_QPLIB_CMDQE_UNITS - 1) /
+ BNXT_QPLIB_CMDQE_UNITS;
+}
+
#define MAX_CMDQ_IDX(depth) ((depth) - 1)
static inline u32 bnxt_qplib_max_cmdq_idx_per_pg(u32 depth)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index bdbde8e22420..60ea1b924b67 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -704,7 +704,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
return -ENOMEM;
}
- dpit->dbr_bar_reg_iomem = ioremap_nocache(bar_reg_base + dbr_offset,
+ dpit->dbr_bar_reg_iomem = ioremap(bar_reg_base + dbr_offset,
dbr_len);
if (!dpit->dbr_bar_reg_iomem) {
dev_err(&res->pdev->dev,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index fbda11a7ab1a..aaa76d792185 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -186,7 +186,9 @@ struct bnxt_qplib_chip_ctx {
u8 chip_metal;
};
-#define CHIP_NUM_57500 0x1750
+#define CHIP_NUM_57508 0x1750
+#define CHIP_NUM_57504 0x1751
+#define CHIP_NUM_57502 0x1752
struct bnxt_qplib_res {
struct pci_dev *pdev;
@@ -203,7 +205,9 @@ struct bnxt_qplib_res {
static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx)
{
- return (cctx->chip_num == CHIP_NUM_57500);
+ return (cctx->chip_num == CHIP_NUM_57508 ||
+ cctx->chip_num == CHIP_NUM_57504 ||
+ cctx->chip_num == CHIP_NUM_57502);
}
static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res)
diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig
deleted file mode 100644
index 8c1a72bff447..000000000000
--- a/drivers/infiniband/hw/cxgb3/Kconfig
+++ /dev/null
@@ -1,19 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-config INFINIBAND_CXGB3
- tristate "Chelsio RDMA Driver"
- depends on CHELSIO_T3
- select GENERIC_ALLOCATOR
- ---help---
- This is an iWARP/RDMA driver for the Chelsio T3 1GbE and
- 10GbE adapters.
-
- For general information about Chelsio and our products, visit
- our website at <http://www.chelsio.com>.
-
- For customer support, please visit our customer support page at
- <http://www.chelsio.com/support.html>.
-
- Please send feedback to <linux-bugs@chelsio.com>.
-
- To compile this driver as a module, choose M here: the module
- will be called iw_cxgb3.
diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile
deleted file mode 100644
index 34bb86a6ae3a..000000000000
--- a/drivers/infiniband/hw/cxgb3/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb3
-
-obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o
-
-iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \
- iwch_provider.o iwch.o cxio_hal.o cxio_resource.o
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
deleted file mode 100644
index 95b22a651673..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ /dev/null
@@ -1,1312 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <asm/delay.h>
-
-#include <linux/mutex.h>
-#include <linux/netdevice.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-#include <net/net_namespace.h>
-
-#include "cxio_resource.h"
-#include "cxio_hal.h"
-#include "cxgb3_offload.h"
-#include "sge_defs.h"
-
-static LIST_HEAD(rdev_list);
-static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL;
-
-static struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name)
-{
- struct cxio_rdev *rdev;
-
- list_for_each_entry(rdev, &rdev_list, entry)
- if (!strcmp(rdev->dev_name, dev_name))
- return rdev;
- return NULL;
-}
-
-static struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev)
-{
- struct cxio_rdev *rdev;
-
- list_for_each_entry(rdev, &rdev_list, entry)
- if (rdev->t3cdev_p == tdev)
- return rdev;
- return NULL;
-}
-
-int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
- enum t3_cq_opcode op, u32 credit)
-{
- int ret;
- struct t3_cqe *cqe;
- u32 rptr;
-
- struct rdma_cq_op setup;
- setup.id = cq->cqid;
- setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0;
- setup.op = op;
- ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup);
-
- if ((ret < 0) || (op == CQ_CREDIT_UPDATE))
- return ret;
-
- /*
- * If the rearm returned an index other than our current index,
- * then there might be CQE's in flight (being DMA'd). We must wait
- * here for them to complete or the consumer can miss a notification.
- */
- if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) {
- int i=0;
-
- rptr = cq->rptr;
-
- /*
- * Keep the generation correct by bumping rptr until it
- * matches the index returned by the rearm - 1.
- */
- while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret)
- rptr++;
-
- /*
- * Now rptr is the index for the (last) cqe that was
- * in-flight at the time the HW rearmed the CQ. We
- * spin until that CQE is valid.
- */
- cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2);
- while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
- udelay(1);
- if (i++ > 1000000) {
- pr_err("%s: stalled rnic\n", rdev_p->dev_name);
- return -EIO;
- }
- }
-
- return 1;
- }
-
- return 0;
-}
-
-static int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
-{
- struct rdma_cq_setup setup;
- setup.id = cqid;
- setup.base_addr = 0; /* NULL address */
- setup.size = 0; /* disaable the CQ */
- setup.credits = 0;
- setup.credit_thres = 0;
- setup.ovfl_mode = 0;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-
-static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
-{
- u64 sge_cmd;
- struct t3_modify_qp_wr *wqe;
- struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
- if (!skb) {
- pr_debug("%s alloc_skb failed\n", __func__);
- return -ENOMEM;
- }
- wqe = skb_put_zero(skb, sizeof(*wqe));
- build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD,
- T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7,
- T3_SOPEOP);
- wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
- sge_cmd = qpid << 8 | 3;
- wqe->sge_cmd = cpu_to_be64(sge_cmd);
- skb->priority = CPL_PRIORITY_CONTROL;
- return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
-}
-
-int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
-{
- struct rdma_cq_setup setup;
- int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
-
- size += 1; /* one extra page for storing cq-in-err state */
- cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
- if (!cq->cqid)
- return -ENOMEM;
- if (kernel) {
- cq->sw_queue = kzalloc(size, GFP_KERNEL);
- if (!cq->sw_queue)
- return -ENOMEM;
- }
- cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size,
- &(cq->dma_addr), GFP_KERNEL);
- if (!cq->queue) {
- kfree(cq->sw_queue);
- return -ENOMEM;
- }
- dma_unmap_addr_set(cq, mapping, cq->dma_addr);
- setup.id = cq->cqid;
- setup.base_addr = (u64) (cq->dma_addr);
- setup.size = 1UL << cq->size_log2;
- setup.credits = 65535;
- setup.credit_thres = 1;
- if (rdev_p->t3cdev_p->type != T3A)
- setup.ovfl_mode = 0;
- else
- setup.ovfl_mode = 1;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-
-static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
-{
- struct cxio_qpid_list *entry;
- u32 qpid;
- int i;
-
- mutex_lock(&uctx->lock);
- if (!list_empty(&uctx->qpids)) {
- entry = list_entry(uctx->qpids.next, struct cxio_qpid_list,
- entry);
- list_del(&entry->entry);
- qpid = entry->qpid;
- kfree(entry);
- } else {
- qpid = cxio_hal_get_qpid(rdev_p->rscp);
- if (!qpid)
- goto out;
- for (i = qpid+1; i & rdev_p->qpmask; i++) {
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- break;
- entry->qpid = i;
- list_add_tail(&entry->entry, &uctx->qpids);
- }
- }
-out:
- mutex_unlock(&uctx->lock);
- pr_debug("%s qpid 0x%x\n", __func__, qpid);
- return qpid;
-}
-
-static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid,
- struct cxio_ucontext *uctx)
-{
- struct cxio_qpid_list *entry;
-
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- return;
- pr_debug("%s qpid 0x%x\n", __func__, qpid);
- entry->qpid = qpid;
- mutex_lock(&uctx->lock);
- list_add_tail(&entry->entry, &uctx->qpids);
- mutex_unlock(&uctx->lock);
-}
-
-void cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
-{
- struct list_head *pos, *nxt;
- struct cxio_qpid_list *entry;
-
- mutex_lock(&uctx->lock);
- list_for_each_safe(pos, nxt, &uctx->qpids) {
- entry = list_entry(pos, struct cxio_qpid_list, entry);
- list_del_init(&entry->entry);
- if (!(entry->qpid & rdev_p->qpmask))
- cxio_hal_put_qpid(rdev_p->rscp, entry->qpid);
- kfree(entry);
- }
- mutex_unlock(&uctx->lock);
-}
-
-void cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
-{
- INIT_LIST_HEAD(&uctx->qpids);
- mutex_init(&uctx->lock);
-}
-
-int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
- struct t3_wq *wq, struct cxio_ucontext *uctx)
-{
- int depth = 1UL << wq->size_log2;
- int rqsize = 1UL << wq->rq_size_log2;
-
- wq->qpid = get_qpid(rdev_p, uctx);
- if (!wq->qpid)
- return -ENOMEM;
-
- wq->rq = kcalloc(depth, sizeof(struct t3_swrq), GFP_KERNEL);
- if (!wq->rq)
- goto err1;
-
- wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize);
- if (!wq->rq_addr)
- goto err2;
-
- wq->sq = kcalloc(depth, sizeof(struct t3_swsq), GFP_KERNEL);
- if (!wq->sq)
- goto err3;
-
- wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
- depth * sizeof(union t3_wr),
- &(wq->dma_addr), GFP_KERNEL);
- if (!wq->queue)
- goto err4;
-
- dma_unmap_addr_set(wq, mapping, wq->dma_addr);
- wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
- if (!kernel_domain)
- wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
- (wq->qpid << rdev_p->qpshift);
- wq->rdev = rdev_p;
- pr_debug("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n",
- __func__, wq->qpid, wq->doorbell, (unsigned long long)wq->udb);
- return 0;
-err4:
- kfree(wq->sq);
-err3:
- cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize);
-err2:
- kfree(wq->rq);
-err1:
- put_qpid(rdev_p, wq->qpid, uctx);
- return -ENOMEM;
-}
-
-void cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
-{
- cxio_hal_clear_cq_ctx(rdev_p, cq->cqid);
- kfree(cq->sw_queue);
- dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
- (1UL << (cq->size_log2))
- * sizeof(struct t3_cqe) + 1, cq->queue,
- dma_unmap_addr(cq, mapping));
- cxio_hal_put_cqid(rdev_p->rscp, cq->cqid);
-}
-
-int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq,
- struct cxio_ucontext *uctx)
-{
- dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
- (1UL << (wq->size_log2))
- * sizeof(union t3_wr), wq->queue,
- dma_unmap_addr(wq, mapping));
- kfree(wq->sq);
- cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2));
- kfree(wq->rq);
- put_qpid(rdev_p, wq->qpid, uctx);
- return 0;
-}
-
-static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
-{
- struct t3_cqe cqe;
-
- pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
- wq, cq, cq->sw_rptr, cq->sw_wptr);
- memset(&cqe, 0, sizeof(cqe));
- cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
- V_CQE_OPCODE(T3_SEND) |
- V_CQE_TYPE(0) |
- V_CQE_SWCQE(1) |
- V_CQE_QPID(wq->qpid) |
- V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
- cq->size_log2)));
- *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
- cq->sw_wptr++;
-}
-
-int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
-{
- u32 ptr;
- int flushed = 0;
-
- pr_debug("%s wq %p cq %p\n", __func__, wq, cq);
-
- /* flush RQ */
- pr_debug("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
- wq->rq_rptr, wq->rq_wptr, count);
- ptr = wq->rq_rptr + count;
- while (ptr++ != wq->rq_wptr) {
- insert_recv_cqe(wq, cq);
- flushed++;
- }
- return flushed;
-}
-
-static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
- struct t3_swsq *sqp)
-{
- struct t3_cqe cqe;
-
- pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
- wq, cq, cq->sw_rptr, cq->sw_wptr);
- memset(&cqe, 0, sizeof(cqe));
- cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
- V_CQE_OPCODE(sqp->opcode) |
- V_CQE_TYPE(1) |
- V_CQE_SWCQE(1) |
- V_CQE_QPID(wq->qpid) |
- V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
- cq->size_log2)));
- cqe.u.scqe.wrid_hi = sqp->sq_wptr;
-
- *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
- cq->sw_wptr++;
-}
-
-int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
-{
- __u32 ptr = wq->sq_rptr + count;
- int flushed = 0;
- struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
-
- while (ptr != wq->sq_wptr) {
- sqp->signaled = 0;
- insert_sq_cqe(wq, cq, sqp);
- ptr++;
- sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
- flushed++;
- }
- return flushed;
-}
-
-/*
- * Move all CQEs from the HWCQ into the SWCQ.
- */
-void cxio_flush_hw_cq(struct t3_cq *cq)
-{
- struct t3_cqe *cqe, *swcqe;
-
- pr_debug("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid);
- cqe = cxio_next_hw_cqe(cq);
- while (cqe) {
- pr_debug("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n",
- __func__, cq->rptr, cq->sw_wptr);
- swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2);
- *swcqe = *cqe;
- swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
- cq->sw_wptr++;
- cq->rptr++;
- cqe = cxio_next_hw_cqe(cq);
- }
-}
-
-static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
-{
- if (CQE_OPCODE(*cqe) == T3_TERMINATE)
- return 0;
-
- if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe))
- return 0;
-
- if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe))
- return 0;
-
- if (CQE_SEND_OPCODE(*cqe) && RQ_TYPE(*cqe) &&
- Q_EMPTY(wq->rq_rptr, wq->rq_wptr))
- return 0;
-
- return 1;
-}
-
-void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
-{
- struct t3_cqe *cqe;
- u32 ptr;
-
- *count = 0;
- ptr = cq->sw_rptr;
- while (!Q_EMPTY(ptr, cq->sw_wptr)) {
- cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
- if ((SQ_TYPE(*cqe) ||
- ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) &&
- (CQE_QPID(*cqe) == wq->qpid))
- (*count)++;
- ptr++;
- }
- pr_debug("%s cq %p count %d\n", __func__, cq, *count);
-}
-
-void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
-{
- struct t3_cqe *cqe;
- u32 ptr;
-
- *count = 0;
- pr_debug("%s count zero %d\n", __func__, *count);
- ptr = cq->sw_rptr;
- while (!Q_EMPTY(ptr, cq->sw_wptr)) {
- cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
- if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) &&
- (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq))
- (*count)++;
- ptr++;
- }
- pr_debug("%s cq %p count %d\n", __func__, cq, *count);
-}
-
-static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
-{
- struct rdma_cq_setup setup;
- setup.id = 0;
- setup.base_addr = 0; /* NULL address */
- setup.size = 1; /* enable the CQ */
- setup.credits = 0;
-
- /* force SGE to redirect to RspQ and interrupt */
- setup.credit_thres = 0;
- setup.ovfl_mode = 1;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-
-static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
-{
- int err;
- u64 sge_cmd, ctx0, ctx1;
- u64 base_addr;
- struct t3_modify_qp_wr *wqe;
- struct sk_buff *skb;
-
- skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
- if (!skb) {
- pr_debug("%s alloc_skb failed\n", __func__);
- return -ENOMEM;
- }
- err = cxio_hal_init_ctrl_cq(rdev_p);
- if (err) {
- pr_debug("%s err %d initializing ctrl_cq\n", __func__, err);
- goto err;
- }
- rdev_p->ctrl_qp.workq = dma_alloc_coherent(
- &(rdev_p->rnic_info.pdev->dev),
- (1 << T3_CTRL_QP_SIZE_LOG2) *
- sizeof(union t3_wr),
- &(rdev_p->ctrl_qp.dma_addr),
- GFP_KERNEL);
- if (!rdev_p->ctrl_qp.workq) {
- pr_debug("%s dma_alloc_coherent failed\n", __func__);
- err = -ENOMEM;
- goto err;
- }
- dma_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
- rdev_p->ctrl_qp.dma_addr);
- rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
-
- mutex_init(&rdev_p->ctrl_qp.lock);
- init_waitqueue_head(&rdev_p->ctrl_qp.waitq);
-
- /* update HW Ctrl QP context */
- base_addr = rdev_p->ctrl_qp.dma_addr;
- base_addr >>= 12;
- ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) |
- V_EC_BASE_LO((u32) base_addr & 0xffff));
- ctx0 <<= 32;
- ctx0 |= V_EC_CREDITS(FW_WR_NUM);
- base_addr >>= 16;
- ctx1 = (u32) base_addr;
- base_addr >>= 32;
- ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) |
- V_EC_TYPE(0) | V_EC_GEN(1) |
- V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
- wqe = skb_put_zero(skb, sizeof(*wqe));
- build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
- T3_CTL_QP_TID, 7, T3_SOPEOP);
- wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
- sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
- wqe->sge_cmd = cpu_to_be64(sge_cmd);
- wqe->ctx1 = cpu_to_be64(ctx1);
- wqe->ctx0 = cpu_to_be64(ctx0);
- pr_debug("CtrlQP dma_addr %pad workq %p size %d\n",
- &rdev_p->ctrl_qp.dma_addr, rdev_p->ctrl_qp.workq,
- 1 << T3_CTRL_QP_SIZE_LOG2);
- skb->priority = CPL_PRIORITY_CONTROL;
- return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
-err:
- kfree_skb(skb);
- return err;
-}
-
-static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
-{
- dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
- (1UL << T3_CTRL_QP_SIZE_LOG2)
- * sizeof(union t3_wr), rdev_p->ctrl_qp.workq,
- dma_unmap_addr(&rdev_p->ctrl_qp, mapping));
- return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID);
-}
-
-/* write len bytes of data into addr (32B aligned address)
- * If data is NULL, clear len byte of memory to zero.
- * caller acquires the ctrl_qp lock before the call
- */
-static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
- u32 len, void *data)
-{
- u32 i, nr_wqe, copy_len;
- u8 *copy_data;
- u8 wr_len, utx_len; /* length in 8 byte flit */
- enum t3_wr_flags flag;
- __be64 *wqe;
- u64 utx_cmd;
- addr &= 0x7FFFFFF;
- nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */
- pr_debug("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n",
- __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
- nr_wqe, data, addr);
- utx_len = 3; /* in 32B unit */
- for (i = 0; i < nr_wqe; i++) {
- if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr,
- T3_CTRL_QP_SIZE_LOG2)) {
- pr_debug("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, wait for more space i %d\n",
- __func__,
- rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
- if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
- !Q_FULL(rdev_p->ctrl_qp.rptr,
- rdev_p->ctrl_qp.wptr,
- T3_CTRL_QP_SIZE_LOG2))) {
- pr_debug("%s ctrl_qp workq interrupted\n",
- __func__);
- return -ERESTARTSYS;
- }
- pr_debug("%s ctrl_qp wakeup, continue posting work request i %d\n",
- __func__, i);
- }
- wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
- (1 << T3_CTRL_QP_SIZE_LOG2)));
- flag = 0;
- if (i == (nr_wqe - 1)) {
- /* last WQE */
- flag = T3_COMPLETION_FLAG;
- if (len % 32)
- utx_len = len / 32 + 1;
- else
- utx_len = len / 32;
- }
-
- /*
- * Force a CQE to return the credit to the workq in case
- * we posted more than half the max QP size of WRs
- */
- if ((i != 0) &&
- (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) {
- flag = T3_COMPLETION_FLAG;
- pr_debug("%s force completion at i %d\n", __func__, i);
- }
-
- /* build the utx mem command */
- wqe += (sizeof(struct t3_bypass_wr) >> 3);
- utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3);
- utx_cmd <<= 32;
- utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1);
- *wqe = cpu_to_be64(utx_cmd);
- wqe++;
- copy_data = (u8 *) data + i * 96;
- copy_len = len > 96 ? 96 : len;
-
- /* clear memory content if data is NULL */
- if (data)
- memcpy(wqe, copy_data, copy_len);
- else
- memset(wqe, 0, copy_len);
- if (copy_len % 32)
- memset(((u8 *) wqe) + copy_len, 0,
- 32 - (copy_len % 32));
- wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 +
- (utx_len << 2);
- wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
- (1 << T3_CTRL_QP_SIZE_LOG2)));
-
- /* wptr in the WRID[31:0] */
- ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr;
-
- /*
- * This must be the last write with a memory barrier
- * for the genbit
- */
- build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag,
- Q_GENBIT(rdev_p->ctrl_qp.wptr,
- T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID,
- wr_len, T3_SOPEOP);
- if (flag == T3_COMPLETION_FLAG)
- ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID);
- len -= 96;
- rdev_p->ctrl_qp.wptr++;
- }
- return 0;
-}
-
-/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr
- * OUT: stag index
- * TBD: shared memory region support
- */
-static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
- u32 *stag, u8 stag_state, u32 pdid,
- enum tpt_mem_type type, enum tpt_mem_perm perm,
- u32 zbva, u64 to, u32 len, u8 page_size,
- u32 pbl_size, u32 pbl_addr)
-{
- int err;
- struct tpt_entry tpt;
- u32 stag_idx;
- u32 wptr;
-
- if (cxio_fatal_error(rdev_p))
- return -EIO;
-
- stag_state = stag_state > 0;
- stag_idx = (*stag) >> 8;
-
- if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) {
- stag_idx = cxio_hal_get_stag(rdev_p->rscp);
- if (!stag_idx)
- return -ENOMEM;
- *stag = (stag_idx << 8) | ((*stag) & 0xFF);
- }
- pr_debug("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
- __func__, stag_state, type, pdid, stag_idx);
-
- mutex_lock(&rdev_p->ctrl_qp.lock);
-
- /* write TPT entry */
- if (reset_tpt_entry)
- memset(&tpt, 0, sizeof(tpt));
- else {
- tpt.valid_stag_pdid = cpu_to_be32(F_TPT_VALID |
- V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) |
- V_TPT_STAG_STATE(stag_state) |
- V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid));
- BUG_ON(page_size >= 28);
- tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) |
- ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) |
- V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
- V_TPT_PAGE_SIZE(page_size));
- tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
- tpt.len = cpu_to_be32(len);
- tpt.va_hi = cpu_to_be32((u32) (to >> 32));
- tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
- tpt.rsvd_bind_cnt_or_pstag = 0;
- tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
- }
- err = cxio_hal_ctrl_qp_write_mem(rdev_p,
- stag_idx +
- (rdev_p->rnic_info.tpt_base >> 5),
- sizeof(tpt), &tpt);
-
- /* release the stag index to free pool */
- if (reset_tpt_entry)
- cxio_hal_put_stag(rdev_p->rscp, stag_idx);
-
- wptr = rdev_p->ctrl_qp.wptr;
- mutex_unlock(&rdev_p->ctrl_qp.lock);
- if (!err)
- if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
- SEQ32_GE(rdev_p->ctrl_qp.rptr,
- wptr)))
- return -ERESTARTSYS;
- return err;
-}
-
-int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
- u32 pbl_addr, u32 pbl_size)
-{
- u32 wptr;
- int err;
-
- pr_debug("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
- __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
- pbl_size);
-
- mutex_lock(&rdev_p->ctrl_qp.lock);
- err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
- pbl);
- wptr = rdev_p->ctrl_qp.wptr;
- mutex_unlock(&rdev_p->ctrl_qp.lock);
- if (err)
- return err;
-
- if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
- SEQ32_GE(rdev_p->ctrl_qp.rptr,
- wptr)))
- return -ERESTARTSYS;
-
- return 0;
-}
-
-int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
- enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, u32 pbl_size, u32 pbl_addr)
-{
- *stag = T3_STAG_UNSET;
- return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
- zbva, to, len, page_size, pbl_size, pbl_addr);
-}
-
-int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
- enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, u32 pbl_size, u32 pbl_addr)
-{
- return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
- zbva, to, len, page_size, pbl_size, pbl_addr);
-}
-
-int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
- u32 pbl_addr)
-{
- return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
- pbl_size, pbl_addr);
-}
-
-int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
-{
- *stag = T3_STAG_UNSET;
- return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
- 0, 0);
-}
-
-int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
-{
- return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
- 0, 0);
-}
-
-int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr)
-{
- *stag = T3_STAG_UNSET;
- return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR,
- 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr);
-}
-
-int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
-{
- struct t3_rdma_init_wr *wqe;
- struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC);
- if (!skb)
- return -ENOMEM;
- pr_debug("%s rdev_p %p\n", __func__, rdev_p);
- wqe = __skb_put(skb, sizeof(*wqe));
- wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT));
- wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) |
- V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
- wqe->wrid.id1 = 0;
- wqe->qpid = cpu_to_be32(attr->qpid);
- wqe->pdid = cpu_to_be32(attr->pdid);
- wqe->scqid = cpu_to_be32(attr->scqid);
- wqe->rcqid = cpu_to_be32(attr->rcqid);
- wqe->rq_addr = cpu_to_be32(attr->rq_addr - rdev_p->rnic_info.rqt_base);
- wqe->rq_size = cpu_to_be32(attr->rq_size);
- wqe->mpaattrs = attr->mpaattrs;
- wqe->qpcaps = attr->qpcaps;
- wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
- wqe->rqe_count = cpu_to_be16(attr->rqe_count);
- wqe->flags_rtr_type = cpu_to_be16(attr->flags |
- V_RTR_TYPE(attr->rtr_type) |
- V_CHAN(attr->chan));
- wqe->ord = cpu_to_be32(attr->ord);
- wqe->ird = cpu_to_be32(attr->ird);
- wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
- wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
- wqe->irs = cpu_to_be32(attr->irs);
- skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */
- return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
-}
-
-void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
-{
- cxio_ev_cb = ev_cb;
-}
-
-void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
-{
- cxio_ev_cb = NULL;
-}
-
-static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb)
-{
- static int cnt;
- struct cxio_rdev *rdev_p = NULL;
- struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
- pr_debug("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x se %0x notify %0x cqbranch %0x creditth %0x\n",
- cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
- RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg),
- RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
- RSPQ_CREDIT_THRESH(rsp_msg));
- pr_debug("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe),
- CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
- CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe),
- CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
- rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
- if (!rdev_p) {
- pr_debug("%s called by t3cdev %p with null ulp\n", __func__,
- t3cdev_p);
- return 0;
- }
- if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
- rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1;
- wake_up_interruptible(&rdev_p->ctrl_qp.waitq);
- dev_kfree_skb_irq(skb);
- } else if (CQE_QPID(rsp_msg->cqe) == 0xfff8)
- dev_kfree_skb_irq(skb);
- else if (cxio_ev_cb)
- (*cxio_ev_cb) (rdev_p, skb);
- else
- dev_kfree_skb_irq(skb);
- cnt++;
- return 0;
-}
-
-/* Caller takes care of locking if needed */
-int cxio_rdev_open(struct cxio_rdev *rdev_p)
-{
- struct net_device *netdev_p = NULL;
- int err = 0;
- if (strlen(rdev_p->dev_name)) {
- if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) {
- return -EBUSY;
- }
- netdev_p = dev_get_by_name(&init_net, rdev_p->dev_name);
- if (!netdev_p) {
- return -EINVAL;
- }
- dev_put(netdev_p);
- } else if (rdev_p->t3cdev_p) {
- if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) {
- return -EBUSY;
- }
- netdev_p = rdev_p->t3cdev_p->lldev;
- strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
- T3_MAX_DEV_NAME_LEN);
- } else {
- pr_debug("%s t3cdev_p or dev_name must be set\n", __func__);
- return -EINVAL;
- }
-
- list_add_tail(&rdev_p->entry, &rdev_list);
-
- pr_debug("%s opening rnic dev %s\n", __func__, rdev_p->dev_name);
- memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
- if (!rdev_p->t3cdev_p)
- rdev_p->t3cdev_p = dev2t3cdev(netdev_p);
- rdev_p->t3cdev_p->ulp = (void *) rdev_p;
-
- err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO,
- &(rdev_p->fw_info));
- if (err) {
- pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
- __func__, rdev_p->t3cdev_p, err);
- goto err1;
- }
- if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) {
- pr_err("fatal firmware version mismatch: need version %u but adapter has version %u\n",
- CXIO_FW_MAJ,
- G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers));
- err = -EINVAL;
- goto err1;
- }
-
- err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
- &(rdev_p->rnic_info));
- if (err) {
- pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
- __func__, rdev_p->t3cdev_p, err);
- goto err1;
- }
- err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
- &(rdev_p->port_info));
- if (err) {
- pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
- __func__, rdev_p->t3cdev_p, err);
- goto err1;
- }
-
- /*
- * qpshift is the number of bits to shift the qpid left in order
- * to get the correct address of the doorbell for that qp.
- */
- cxio_init_ucontext(rdev_p, &rdev_p->uctx);
- rdev_p->qpshift = PAGE_SHIFT -
- ilog2(65536 >>
- ilog2(rdev_p->rnic_info.udbell_len >>
- PAGE_SHIFT));
- rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
- rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
- pr_debug("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n",
- __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
- rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p),
- rdev_p->rnic_info.pbl_base,
- rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
- rdev_p->rnic_info.rqt_top);
- pr_debug("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu qpnr %d qpmask 0x%x\n",
- rdev_p->rnic_info.udbell_len,
- rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
- rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
-
- err = cxio_hal_init_ctrl_qp(rdev_p);
- if (err) {
- pr_err("%s error %d initializing ctrl_qp\n", __func__, err);
- goto err1;
- }
- err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0,
- 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ,
- T3_MAX_NUM_PD);
- if (err) {
- pr_err("%s error %d initializing hal resources\n",
- __func__, err);
- goto err2;
- }
- err = cxio_hal_pblpool_create(rdev_p);
- if (err) {
- pr_err("%s error %d initializing pbl mem pool\n",
- __func__, err);
- goto err3;
- }
- err = cxio_hal_rqtpool_create(rdev_p);
- if (err) {
- pr_err("%s error %d initializing rqt mem pool\n",
- __func__, err);
- goto err4;
- }
- return 0;
-err4:
- cxio_hal_pblpool_destroy(rdev_p);
-err3:
- cxio_hal_destroy_resource(rdev_p->rscp);
-err2:
- cxio_hal_destroy_ctrl_qp(rdev_p);
-err1:
- rdev_p->t3cdev_p->ulp = NULL;
- list_del(&rdev_p->entry);
- return err;
-}
-
-void cxio_rdev_close(struct cxio_rdev *rdev_p)
-{
- if (rdev_p) {
- cxio_hal_pblpool_destroy(rdev_p);
- cxio_hal_rqtpool_destroy(rdev_p);
- list_del(&rdev_p->entry);
- cxio_hal_destroy_ctrl_qp(rdev_p);
- cxio_hal_destroy_resource(rdev_p->rscp);
- rdev_p->t3cdev_p->ulp = NULL;
- }
-}
-
-int __init cxio_hal_init(void)
-{
- if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI))
- return -ENOMEM;
- t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
- return 0;
-}
-
-void __exit cxio_hal_exit(void)
-{
- struct cxio_rdev *rdev, *tmp;
-
- t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL);
- list_for_each_entry_safe(rdev, tmp, &rdev_list, entry)
- cxio_rdev_close(rdev);
- cxio_hal_destroy_rhdl_resource();
-}
-
-static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
-{
- struct t3_swsq *sqp;
- __u32 ptr = wq->sq_rptr;
- int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr);
-
- sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
- while (count--)
- if (!sqp->signaled) {
- ptr++;
- sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
- } else if (sqp->complete) {
-
- /*
- * Insert this completed cqe into the swcq.
- */
- pr_debug("%s moving cqe into swcq sq idx %ld cq idx %ld\n",
- __func__, Q_PTR2IDX(ptr, wq->sq_size_log2),
- Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
- sqp->cqe.header |= htonl(V_CQE_SWCQE(1));
- *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2))
- = sqp->cqe;
- cq->sw_wptr++;
- sqp->signaled = 0;
- break;
- } else
- break;
-}
-
-static void create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe,
- struct t3_cqe *read_cqe)
-{
- read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr;
- read_cqe->len = wq->oldest_read->read_len;
- read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) |
- V_CQE_SWCQE(SW_CQE(*hw_cqe)) |
- V_CQE_OPCODE(T3_READ_REQ) |
- V_CQE_TYPE(1));
-}
-
-/*
- * Return a ptr to the next read wr in the SWSQ or NULL.
- */
-static void advance_oldest_read(struct t3_wq *wq)
-{
-
- u32 rptr = wq->oldest_read - wq->sq + 1;
- u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2);
-
- while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) {
- wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2);
-
- if (wq->oldest_read->opcode == T3_READ_REQ)
- return;
- rptr++;
- }
- wq->oldest_read = NULL;
-}
-
-/*
- * cxio_poll_cq
- *
- * Caller must:
- * check the validity of the first CQE,
- * supply the wq assicated with the qpid.
- *
- * credit: cq credit to return to sge.
- * cqe_flushed: 1 iff the CQE is flushed.
- * cqe: copy of the polled CQE.
- *
- * return value:
- * 0 CQE returned,
- * -1 CQE skipped, try again.
- */
-int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
- u8 *cqe_flushed, u64 *cookie, u32 *credit)
-{
- int ret = 0;
- struct t3_cqe *hw_cqe, read_cqe;
-
- *cqe_flushed = 0;
- *credit = 0;
- hw_cqe = cxio_next_cqe(cq);
-
- pr_debug("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
- CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe),
- CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
- CQE_WRID_LOW(*hw_cqe));
-
- /*
- * skip cqe's not affiliated with a QP.
- */
- if (wq == NULL) {
- ret = -1;
- goto skip_cqe;
- }
-
- /*
- * Gotta tweak READ completions:
- * 1) the cqe doesn't contain the sq_wptr from the wr.
- * 2) opcode not reflected from the wr.
- * 3) read_len not reflected from the wr.
- * 4) cq_type is RQ_TYPE not SQ_TYPE.
- */
- if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) {
-
- /*
- * If this is an unsolicited read response, then the read
- * was generated by the kernel driver as part of peer-2-peer
- * connection setup. So ignore the completion.
- */
- if (!wq->oldest_read) {
- if (CQE_STATUS(*hw_cqe))
- wq->error = 1;
- ret = -1;
- goto skip_cqe;
- }
-
- /*
- * Don't write to the HWCQ, so create a new read req CQE
- * in local memory.
- */
- create_read_req_cqe(wq, hw_cqe, &read_cqe);
- hw_cqe = &read_cqe;
- advance_oldest_read(wq);
- }
-
- /*
- * T3A: Discard TERMINATE CQEs.
- */
- if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) {
- ret = -1;
- wq->error = 1;
- goto skip_cqe;
- }
-
- if (CQE_STATUS(*hw_cqe) || wq->error) {
- *cqe_flushed = wq->error;
- wq->error = 1;
-
- /*
- * T3A inserts errors into the CQE. We cannot return
- * these as work completions.
- */
- /* incoming write failures */
- if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE)
- && RQ_TYPE(*hw_cqe)) {
- ret = -1;
- goto skip_cqe;
- }
- /* incoming read request failures */
- if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) {
- ret = -1;
- goto skip_cqe;
- }
-
- /* incoming SEND with no receive posted failures */
- if (CQE_SEND_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) &&
- Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
- ret = -1;
- goto skip_cqe;
- }
- BUG_ON((*cqe_flushed == 0) && !SW_CQE(*hw_cqe));
- goto proc_cqe;
- }
-
- /*
- * RECV completion.
- */
- if (RQ_TYPE(*hw_cqe)) {
-
- /*
- * HW only validates 4 bits of MSN. So we must validate that
- * the MSN in the SEND is the next expected MSN. If its not,
- * then we complete this with TPT_ERR_MSN and mark the wq in
- * error.
- */
-
- if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
- wq->error = 1;
- ret = -1;
- goto skip_cqe;
- }
-
- if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) {
- wq->error = 1;
- hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN));
- goto proc_cqe;
- }
- goto proc_cqe;
- }
-
- /*
- * If we get here its a send completion.
- *
- * Handle out of order completion. These get stuffed
- * in the SW SQ. Then the SW SQ is walked to move any
- * now in-order completions into the SW CQ. This handles
- * 2 cases:
- * 1) reaping unsignaled WRs when the first subsequent
- * signaled WR is completed.
- * 2) out of order read completions.
- */
- if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) {
- struct t3_swsq *sqp;
-
- pr_debug("%s out of order completion going in swsq at idx %ld\n",
- __func__,
- Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe),
- wq->sq_size_log2));
- sqp = wq->sq +
- Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2);
- sqp->cqe = *hw_cqe;
- sqp->complete = 1;
- ret = -1;
- goto flush_wq;
- }
-
-proc_cqe:
- *cqe = *hw_cqe;
-
- /*
- * Reap the associated WR(s) that are freed up with this
- * completion.
- */
- if (SQ_TYPE(*hw_cqe)) {
- wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
- pr_debug("%s completing sq idx %ld\n", __func__,
- Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
- *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id;
- wq->sq_rptr++;
- } else {
- pr_debug("%s completing rq idx %ld\n", __func__,
- Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
- *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id;
- if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr)
- cxio_hal_pblpool_free(wq->rdev,
- wq->rq[Q_PTR2IDX(wq->rq_rptr,
- wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE);
- BUG_ON(Q_EMPTY(wq->rq_rptr, wq->rq_wptr));
- wq->rq_rptr++;
- }
-
-flush_wq:
- /*
- * Flush any completed cqes that are now in-order.
- */
- flush_completed_wrs(wq, cq);
-
-skip_cqe:
- if (SW_CQE(*hw_cqe)) {
- pr_debug("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n",
- __func__, cq, cq->cqid, cq->sw_rptr);
- ++cq->sw_rptr;
- } else {
- pr_debug("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n",
- __func__, cq, cq->cqid, cq->rptr);
- ++cq->rptr;
-
- /*
- * T3A: compute credits.
- */
- if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1)))
- || ((cq->rptr - cq->wptr) >= 128)) {
- *credit = cq->rptr - cq->wptr;
- cq->wptr = cq->rptr;
- }
- }
- return ret;
-}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
deleted file mode 100644
index 40c029ffa425..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __CXIO_HAL_H__
-#define __CXIO_HAL_H__
-
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/kfifo.h>
-
-#include "t3_cpl.h"
-#include "t3cdev.h"
-#include "cxgb3_ctl_defs.h"
-#include "cxio_wr.h"
-
-#define T3_CTRL_QP_ID FW_RI_SGEEC_START
-#define T3_CTL_QP_TID FW_RI_TID_START
-#define T3_CTRL_QP_SIZE_LOG2 8
-#define T3_CTRL_CQ_ID 0
-
-#define T3_MAX_NUM_RI (1<<15)
-#define T3_MAX_NUM_QP (1<<15)
-#define T3_MAX_NUM_CQ (1<<15)
-#define T3_MAX_NUM_PD (1<<15)
-#define T3_MAX_PBL_SIZE 256
-#define T3_MAX_RQ_SIZE 1024
-#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1)
-#define T3_MAX_CQ_DEPTH 65536
-#define T3_MAX_NUM_STAG (1<<15)
-#define T3_MAX_MR_SIZE 0x100000000ULL
-#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
-
-#define T3_STAG_UNSET 0xffffffff
-
-#define T3_MAX_DEV_NAME_LEN 32
-
-#define CXIO_FW_MAJ 7
-
-struct cxio_hal_ctrl_qp {
- u32 wptr;
- u32 rptr;
- struct mutex lock; /* for the wtpr, can sleep */
- wait_queue_head_t waitq;/* wait for RspQ/CQE msg */
- union t3_wr *workq; /* the work request queue */
- dma_addr_t dma_addr; /* pci bus address of the workq */
- DEFINE_DMA_UNMAP_ADDR(mapping);
- void __iomem *doorbell;
-};
-
-struct cxio_hal_resource {
- struct kfifo tpt_fifo;
- spinlock_t tpt_fifo_lock;
- struct kfifo qpid_fifo;
- spinlock_t qpid_fifo_lock;
- struct kfifo cqid_fifo;
- spinlock_t cqid_fifo_lock;
- struct kfifo pdid_fifo;
- spinlock_t pdid_fifo_lock;
-};
-
-struct cxio_qpid_list {
- struct list_head entry;
- u32 qpid;
-};
-
-struct cxio_ucontext {
- struct list_head qpids;
- struct mutex lock;
-};
-
-struct cxio_rdev {
- char dev_name[T3_MAX_DEV_NAME_LEN];
- struct t3cdev *t3cdev_p;
- struct rdma_info rnic_info;
- struct adap_ports port_info;
- struct cxio_hal_resource *rscp;
- struct cxio_hal_ctrl_qp ctrl_qp;
- void *ulp;
- unsigned long qpshift;
- u32 qpnr;
- u32 qpmask;
- struct cxio_ucontext uctx;
- struct gen_pool *pbl_pool;
- struct gen_pool *rqt_pool;
- struct list_head entry;
- struct ch_embedded_info fw_info;
- u32 flags;
-#define CXIO_ERROR_FATAL 1
-};
-
-static inline int cxio_fatal_error(struct cxio_rdev *rdev_p)
-{
- return rdev_p->flags & CXIO_ERROR_FATAL;
-}
-
-static inline int cxio_num_stags(struct cxio_rdev *rdev_p)
-{
- return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5));
-}
-
-typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p,
- struct sk_buff * skb);
-
-#define RSPQ_CQID(rsp) (be32_to_cpu(rsp->cq_ptrid) & 0xffff)
-#define RSPQ_CQPTR(rsp) ((be32_to_cpu(rsp->cq_ptrid) >> 16) & 0xffff)
-#define RSPQ_GENBIT(rsp) ((be32_to_cpu(rsp->flags) >> 16) & 1)
-#define RSPQ_OVERFLOW(rsp) ((be32_to_cpu(rsp->flags) >> 17) & 1)
-#define RSPQ_AN(rsp) ((be32_to_cpu(rsp->flags) >> 18) & 1)
-#define RSPQ_SE(rsp) ((be32_to_cpu(rsp->flags) >> 19) & 1)
-#define RSPQ_NOTIFY(rsp) ((be32_to_cpu(rsp->flags) >> 20) & 1)
-#define RSPQ_CQBRANCH(rsp) ((be32_to_cpu(rsp->flags) >> 21) & 1)
-#define RSPQ_CREDIT_THRESH(rsp) ((be32_to_cpu(rsp->flags) >> 22) & 1)
-
-struct respQ_msg_t {
- __be32 flags; /* flit 0 */
- __be32 cq_ptrid;
- __be64 rsvd; /* flit 1 */
- struct t3_cqe cqe; /* flits 2-3 */
-};
-
-enum t3_cq_opcode {
- CQ_ARM_AN = 0x2,
- CQ_ARM_SE = 0x6,
- CQ_FORCE_AN = 0x3,
- CQ_CREDIT_UPDATE = 0x7
-};
-
-int cxio_rdev_open(struct cxio_rdev *rdev);
-void cxio_rdev_close(struct cxio_rdev *rdev);
-int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
- enum t3_cq_opcode op, u32 credit);
-int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel);
-void cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
-void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
-void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
-int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
- struct cxio_ucontext *uctx);
-int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
- struct cxio_ucontext *uctx);
-int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
-int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
- u32 pbl_addr, u32 pbl_size);
-int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
- enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, u32 pbl_size, u32 pbl_addr);
-int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
- enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, u32 pbl_size, u32 pbl_addr);
-int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
- u32 pbl_addr);
-int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
-int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr);
-int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
-int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
-void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
-void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
-u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
-void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
-int __init cxio_hal_init(void);
-void __exit cxio_hal_exit(void);
-int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
-int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
-void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
-void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
-void cxio_flush_hw_cq(struct t3_cq *cq);
-int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
- u8 *cqe_flushed, u64 *cookie, u32 *credit);
-int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb);
-
-#ifdef pr_fmt
-#undef pr_fmt
-#endif
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
deleted file mode 100644
index c6e7bc4420b6..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-/* Crude resource management */
-#include <linux/kernel.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/kfifo.h>
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include "cxio_resource.h"
-#include "cxio_hal.h"
-
-static struct kfifo rhdl_fifo;
-static spinlock_t rhdl_fifo_lock;
-
-#define RANDOM_SIZE 16
-
-static int __cxio_init_resource_fifo(struct kfifo *fifo,
- spinlock_t *fifo_lock,
- u32 nr, u32 skip_low,
- u32 skip_high,
- int random)
-{
- u32 i, j, entry = 0, idx;
- u32 random_bytes;
- u32 rarray[16];
- spin_lock_init(fifo_lock);
-
- if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL))
- return -ENOMEM;
-
- for (i = 0; i < skip_low + skip_high; i++)
- kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32));
- if (random) {
- j = 0;
- random_bytes = prandom_u32();
- for (i = 0; i < RANDOM_SIZE; i++)
- rarray[i] = i + skip_low;
- for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) {
- if (j >= RANDOM_SIZE) {
- j = 0;
- random_bytes = prandom_u32();
- }
- idx = (random_bytes >> (j * 2)) & 0xF;
- kfifo_in(fifo,
- (unsigned char *) &rarray[idx],
- sizeof(u32));
- rarray[idx] = i;
- j++;
- }
- for (i = 0; i < RANDOM_SIZE; i++)
- kfifo_in(fifo,
- (unsigned char *) &rarray[i],
- sizeof(u32));
- } else
- for (i = skip_low; i < nr - skip_high; i++)
- kfifo_in(fifo, (unsigned char *) &i, sizeof(u32));
-
- for (i = 0; i < skip_low + skip_high; i++)
- if (kfifo_out_locked(fifo, (unsigned char *) &entry,
- sizeof(u32), fifo_lock) != sizeof(u32))
- break;
- return 0;
-}
-
-static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock,
- u32 nr, u32 skip_low, u32 skip_high)
-{
- return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
- skip_high, 0));
-}
-
-static int cxio_init_resource_fifo_random(struct kfifo *fifo,
- spinlock_t * fifo_lock,
- u32 nr, u32 skip_low, u32 skip_high)
-{
-
- return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
- skip_high, 1));
-}
-
-static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
-{
- u32 i;
-
- spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
-
- if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32),
- GFP_KERNEL))
- return -ENOMEM;
-
- for (i = 16; i < T3_MAX_NUM_QP; i++)
- if (!(i & rdev_p->qpmask))
- kfifo_in(&rdev_p->rscp->qpid_fifo,
- (unsigned char *) &i, sizeof(u32));
- return 0;
-}
-
-int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
-{
- return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1,
- 0);
-}
-
-void cxio_hal_destroy_rhdl_resource(void)
-{
- kfifo_free(&rhdl_fifo);
-}
-
-/* nr_* must be power of 2 */
-int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
- u32 nr_tpt, u32 nr_pbl,
- u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid)
-{
- int err = 0;
- struct cxio_hal_resource *rscp;
-
- rscp = kmalloc(sizeof(*rscp), GFP_KERNEL);
- if (!rscp)
- return -ENOMEM;
- rdev_p->rscp = rscp;
- err = cxio_init_resource_fifo_random(&rscp->tpt_fifo,
- &rscp->tpt_fifo_lock,
- nr_tpt, 1, 0);
- if (err)
- goto tpt_err;
- err = cxio_init_qpid_fifo(rdev_p);
- if (err)
- goto qpid_err;
- err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock,
- nr_cqid, 1, 0);
- if (err)
- goto cqid_err;
- err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock,
- nr_pdid, 1, 0);
- if (err)
- goto pdid_err;
- return 0;
-pdid_err:
- kfifo_free(&rscp->cqid_fifo);
-cqid_err:
- kfifo_free(&rscp->qpid_fifo);
-qpid_err:
- kfifo_free(&rscp->tpt_fifo);
-tpt_err:
- return -ENOMEM;
-}
-
-/*
- * returns 0 if no resource available
- */
-static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock)
-{
- u32 entry;
- if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
- return entry;
- else
- return 0; /* fifo emptry */
-}
-
-static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock,
- u32 entry)
-{
- BUG_ON(
- kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
- == 0);
-}
-
-u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
-{
- return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock);
-}
-
-void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
-{
- cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag);
-}
-
-u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
-{
- u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo,
- &rscp->qpid_fifo_lock);
- pr_debug("%s qpid 0x%x\n", __func__, qpid);
- return qpid;
-}
-
-void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
-{
- pr_debug("%s qpid 0x%x\n", __func__, qpid);
- cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid);
-}
-
-u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
-{
- return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock);
-}
-
-void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
-{
- cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid);
-}
-
-u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
-{
- return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock);
-}
-
-void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
-{
- cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid);
-}
-
-void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
-{
- kfifo_free(&rscp->tpt_fifo);
- kfifo_free(&rscp->cqid_fifo);
- kfifo_free(&rscp->qpid_fifo);
- kfifo_free(&rscp->pdid_fifo);
- kfree(rscp);
-}
-
-/*
- * PBL Memory Manager. Uses Linux generic allocator.
- */
-
-#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */
-
-u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
-{
- unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size);
- pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
- return (u32)addr;
-}
-
-void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
-{
- pr_debug("%s addr 0x%x size %d\n", __func__, addr, size);
- gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size);
-}
-
-int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
-{
- unsigned pbl_start, pbl_chunk;
-
- rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1);
- if (!rdev_p->pbl_pool)
- return -ENOMEM;
-
- pbl_start = rdev_p->rnic_info.pbl_base;
- pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1;
-
- while (pbl_start < rdev_p->rnic_info.pbl_top) {
- pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1,
- pbl_chunk);
- if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) {
- pr_debug("%s failed to add PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
- if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
- pr_warn("%s: Failed to add all PBL chunks (%x/%x)\n",
- __func__, pbl_start,
- rdev_p->rnic_info.pbl_top - pbl_start);
- return 0;
- }
- pbl_chunk >>= 1;
- } else {
- pr_debug("%s added PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
- pbl_start += pbl_chunk;
- }
- }
-
- return 0;
-}
-
-void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
-{
- gen_pool_destroy(rdev_p->pbl_pool);
-}
-
-/*
- * RQT Memory Manager. Uses Linux generic allocator.
- */
-
-#define MIN_RQT_SHIFT 10 /* 1KB == mini RQT size (16 entries) */
-#define RQT_CHUNK 2*1024*1024
-
-u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size)
-{
- unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6);
- pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
- return (u32)addr;
-}
-
-void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
-{
- pr_debug("%s addr 0x%x size %d\n", __func__, addr, size << 6);
- gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6);
-}
-
-int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p)
-{
- unsigned long i;
- rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1);
- if (rdev_p->rqt_pool)
- for (i = rdev_p->rnic_info.rqt_base;
- i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1;
- i += RQT_CHUNK)
- gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1);
- return rdev_p->rqt_pool ? 0 : -ENOMEM;
-}
-
-void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p)
-{
- gen_pool_destroy(rdev_p->rqt_pool);
-}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h
deleted file mode 100644
index a2703a3d882d..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __CXIO_RESOURCE_H__
-#define __CXIO_RESOURCE_H__
-
-#include <linux/kernel.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/kfifo.h>
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/genalloc.h>
-#include "cxio_hal.h"
-
-extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl);
-extern void cxio_hal_destroy_rhdl_resource(void);
-extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
- u32 nr_tpt, u32 nr_pbl,
- u32 nr_rqt, u32 nr_qpid, u32 nr_cqid,
- u32 nr_pdid);
-extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp);
-extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag);
-extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp);
-extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid);
-extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp);
-extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid);
-extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp);
-
-#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base )
-extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p);
-extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p);
-extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size);
-extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
-
-#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base )
-extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p);
-extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p);
-extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size);
-extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
deleted file mode 100644
index 53aa5c36247a..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ /dev/null
@@ -1,802 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __CXIO_WR_H__
-#define __CXIO_WR_H__
-
-#include <asm/io.h>
-#include <linux/pci.h>
-#include <linux/timer.h>
-#include "firmware_exports.h"
-
-#define T3_MAX_SGE 4
-#define T3_MAX_INLINE 64
-#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3)
-#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024)
-#define T3_STAG0_PAGE_SHIFT 15
-
-#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
-#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \
- ((rptr)!=(wptr)) )
-#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1))
-#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<<size_log2)-((wptr)-(rptr)))
-#define Q_COUNT(rptr,wptr) ((wptr)-(rptr))
-#define Q_PTR2IDX(ptr,size_log2) (ptr & ((1UL<<size_log2)-1))
-
-static inline void ring_doorbell(void __iomem *doorbell, u32 qpid)
-{
- writel(((1<<31) | qpid), doorbell);
-}
-
-#define SEQ32_GE(x,y) (!( (((u32) (x)) - ((u32) (y))) & 0x80000000 ))
-
-enum t3_wr_flags {
- T3_COMPLETION_FLAG = 0x01,
- T3_NOTIFY_FLAG = 0x02,
- T3_SOLICITED_EVENT_FLAG = 0x04,
- T3_READ_FENCE_FLAG = 0x08,
- T3_LOCAL_FENCE_FLAG = 0x10
-} __packed;
-
-enum t3_wr_opcode {
- T3_WR_BP = FW_WROPCODE_RI_BYPASS,
- T3_WR_SEND = FW_WROPCODE_RI_SEND,
- T3_WR_WRITE = FW_WROPCODE_RI_RDMA_WRITE,
- T3_WR_READ = FW_WROPCODE_RI_RDMA_READ,
- T3_WR_INV_STAG = FW_WROPCODE_RI_LOCAL_INV,
- T3_WR_BIND = FW_WROPCODE_RI_BIND_MW,
- T3_WR_RCV = FW_WROPCODE_RI_RECEIVE,
- T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT,
- T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP,
- T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR
-} __packed;
-
-enum t3_rdma_opcode {
- T3_RDMA_WRITE, /* IETF RDMAP v1.0 ... */
- T3_READ_REQ,
- T3_READ_RESP,
- T3_SEND,
- T3_SEND_WITH_INV,
- T3_SEND_WITH_SE,
- T3_SEND_WITH_SE_INV,
- T3_TERMINATE,
- T3_RDMA_INIT, /* CHELSIO RI specific ... */
- T3_BIND_MW,
- T3_FAST_REGISTER,
- T3_LOCAL_INV,
- T3_QP_MOD,
- T3_BYPASS,
- T3_RDMA_READ_REQ_WITH_INV,
-} __packed;
-
-static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop)
-{
- switch (wrop) {
- case T3_WR_BP: return T3_BYPASS;
- case T3_WR_SEND: return T3_SEND;
- case T3_WR_WRITE: return T3_RDMA_WRITE;
- case T3_WR_READ: return T3_READ_REQ;
- case T3_WR_INV_STAG: return T3_LOCAL_INV;
- case T3_WR_BIND: return T3_BIND_MW;
- case T3_WR_INIT: return T3_RDMA_INIT;
- case T3_WR_QP_MOD: return T3_QP_MOD;
- case T3_WR_FASTREG: return T3_FAST_REGISTER;
- default: break;
- }
- return -1;
-}
-
-
-/* Work request id */
-union t3_wrid {
- struct {
- u32 hi;
- u32 low;
- } id0;
- u64 id1;
-};
-
-#define WRID(wrid) (wrid.id1)
-#define WRID_GEN(wrid) (wrid.id0.wr_gen)
-#define WRID_IDX(wrid) (wrid.id0.wr_idx)
-#define WRID_LO(wrid) (wrid.id0.wr_lo)
-
-struct fw_riwrh {
- __be32 op_seop_flags;
- __be32 gen_tid_len;
-};
-
-#define S_FW_RIWR_OP 24
-#define M_FW_RIWR_OP 0xff
-#define V_FW_RIWR_OP(x) ((x) << S_FW_RIWR_OP)
-#define G_FW_RIWR_OP(x) ((((x) >> S_FW_RIWR_OP)) & M_FW_RIWR_OP)
-
-#define S_FW_RIWR_SOPEOP 22
-#define M_FW_RIWR_SOPEOP 0x3
-#define V_FW_RIWR_SOPEOP(x) ((x) << S_FW_RIWR_SOPEOP)
-
-#define S_FW_RIWR_FLAGS 8
-#define M_FW_RIWR_FLAGS 0x3fffff
-#define V_FW_RIWR_FLAGS(x) ((x) << S_FW_RIWR_FLAGS)
-#define G_FW_RIWR_FLAGS(x) ((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS)
-
-#define S_FW_RIWR_TID 8
-#define V_FW_RIWR_TID(x) ((x) << S_FW_RIWR_TID)
-
-#define S_FW_RIWR_LEN 0
-#define V_FW_RIWR_LEN(x) ((x) << S_FW_RIWR_LEN)
-
-#define S_FW_RIWR_GEN 31
-#define V_FW_RIWR_GEN(x) ((x) << S_FW_RIWR_GEN)
-
-struct t3_sge {
- __be32 stag;
- __be32 len;
- __be64 to;
-};
-
-/* If num_sgle is zero, flit 5+ contains immediate data.*/
-struct t3_send_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
-
- u8 rdmaop; /* 2 */
- u8 reserved[3];
- __be32 rem_stag;
- __be32 plen; /* 3 */
- __be32 num_sgle;
- struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */
-};
-
-#define T3_MAX_FASTREG_DEPTH 10
-#define T3_MAX_FASTREG_FRAG 10
-
-struct t3_fastreg_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- __be32 stag; /* 2 */
- __be32 len;
- __be32 va_base_hi; /* 3 */
- __be32 va_base_lo_fbo;
- __be32 page_type_perms; /* 4 */
- __be32 reserved1;
- __be64 pbl_addrs[0]; /* 5+ */
-};
-
-/*
- * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this.
- */
-struct t3_pbl_frag {
- struct fw_riwrh wrh; /* 0 */
- __be64 pbl_addrs[14]; /* 1..14 */
-};
-
-#define S_FR_PAGE_COUNT 24
-#define M_FR_PAGE_COUNT 0xff
-#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT)
-#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT)
-
-#define S_FR_PAGE_SIZE 16
-#define M_FR_PAGE_SIZE 0x1f
-#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE)
-#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE)
-
-#define S_FR_TYPE 8
-#define M_FR_TYPE 0x1
-#define V_FR_TYPE(x) ((x) << S_FR_TYPE)
-#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE)
-
-#define S_FR_PERMS 0
-#define M_FR_PERMS 0xff
-#define V_FR_PERMS(x) ((x) << S_FR_PERMS)
-#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS)
-
-struct t3_local_inv_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- __be32 stag; /* 2 */
- __be32 reserved;
-};
-
-struct t3_rdma_write_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- u8 rdmaop; /* 2 */
- u8 reserved[3];
- __be32 stag_sink;
- __be64 to_sink; /* 3 */
- __be32 plen; /* 4 */
- __be32 num_sgle;
- struct t3_sge sgl[T3_MAX_SGE]; /* 5+ */
-};
-
-struct t3_rdma_read_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- u8 rdmaop; /* 2 */
- u8 local_inv;
- u8 reserved[2];
- __be32 rem_stag;
- __be64 rem_to; /* 3 */
- __be32 local_stag; /* 4 */
- __be32 local_len;
- __be64 local_to; /* 5 */
-};
-
-struct t3_bind_mw_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- u16 reserved; /* 2 */
- u8 type;
- u8 perms;
- __be32 mr_stag;
- __be32 mw_stag; /* 3 */
- __be32 mw_len;
- __be64 mw_va; /* 4 */
- __be32 mr_pbl_addr; /* 5 */
- u8 reserved2[3];
- u8 mr_pagesz;
-};
-
-struct t3_receive_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- u8 pagesz[T3_MAX_SGE];
- __be32 num_sgle; /* 2 */
- struct t3_sge sgl[T3_MAX_SGE]; /* 3+ */
- __be32 pbl_addr[T3_MAX_SGE];
-};
-
-struct t3_bypass_wr {
- struct fw_riwrh wrh;
- union t3_wrid wrid; /* 1 */
-};
-
-struct t3_modify_qp_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- __be32 flags; /* 2 */
- __be32 quiesce; /* 2 */
- __be32 max_ird; /* 3 */
- __be32 max_ord; /* 3 */
- __be64 sge_cmd; /* 4 */
- __be64 ctx1; /* 5 */
- __be64 ctx0; /* 6 */
-};
-
-enum t3_modify_qp_flags {
- MODQP_QUIESCE = 0x01,
- MODQP_MAX_IRD = 0x02,
- MODQP_MAX_ORD = 0x04,
- MODQP_WRITE_EC = 0x08,
- MODQP_READ_EC = 0x10,
-};
-
-
-enum t3_mpa_attrs {
- uP_RI_MPA_RX_MARKER_ENABLE = 0x1,
- uP_RI_MPA_TX_MARKER_ENABLE = 0x2,
- uP_RI_MPA_CRC_ENABLE = 0x4,
- uP_RI_MPA_IETF_ENABLE = 0x8
-} __packed;
-
-enum t3_qp_caps {
- uP_RI_QP_RDMA_READ_ENABLE = 0x01,
- uP_RI_QP_RDMA_WRITE_ENABLE = 0x02,
- uP_RI_QP_BIND_ENABLE = 0x04,
- uP_RI_QP_FAST_REGISTER_ENABLE = 0x08,
- uP_RI_QP_STAG0_ENABLE = 0x10
-} __packed;
-
-enum rdma_init_rtr_types {
- RTR_READ = 1,
- RTR_WRITE = 2,
- RTR_SEND = 3,
-};
-
-#define S_RTR_TYPE 2
-#define M_RTR_TYPE 0x3
-#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE)
-#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
-
-#define S_CHAN 4
-#define M_CHAN 0x3
-#define V_CHAN(x) ((x) << S_CHAN)
-#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN)
-
-struct t3_rdma_init_attr {
- u32 tid;
- u32 qpid;
- u32 pdid;
- u32 scqid;
- u32 rcqid;
- u32 rq_addr;
- u32 rq_size;
- enum t3_mpa_attrs mpaattrs;
- enum t3_qp_caps qpcaps;
- u16 tcp_emss;
- u32 ord;
- u32 ird;
- u64 qp_dma_addr;
- u32 qp_dma_size;
- enum rdma_init_rtr_types rtr_type;
- u16 flags;
- u16 rqe_count;
- u32 irs;
- u32 chan;
-};
-
-struct t3_rdma_init_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- __be32 qpid; /* 2 */
- __be32 pdid;
- __be32 scqid; /* 3 */
- __be32 rcqid;
- __be32 rq_addr; /* 4 */
- __be32 rq_size;
- u8 mpaattrs; /* 5 */
- u8 qpcaps;
- __be16 ulpdu_size;
- __be16 flags_rtr_type;
- __be16 rqe_count;
- __be32 ord; /* 6 */
- __be32 ird;
- __be64 qp_dma_addr; /* 7 */
- __be32 qp_dma_size; /* 8 */
- __be32 irs;
-};
-
-struct t3_genbit {
- u64 flit[15];
- __be64 genbit;
-};
-
-struct t3_wq_in_err {
- u64 flit[13];
- u64 err;
-};
-
-enum rdma_init_wr_flags {
- MPA_INITIATOR = (1<<0),
- PRIV_QP = (1<<1),
-};
-
-union t3_wr {
- struct t3_send_wr send;
- struct t3_rdma_write_wr write;
- struct t3_rdma_read_wr read;
- struct t3_receive_wr recv;
- struct t3_fastreg_wr fastreg;
- struct t3_pbl_frag pbl_frag;
- struct t3_local_inv_wr local_inv;
- struct t3_bind_mw_wr bind;
- struct t3_bypass_wr bypass;
- struct t3_rdma_init_wr init;
- struct t3_modify_qp_wr qp_mod;
- struct t3_genbit genbit;
- struct t3_wq_in_err wq_in_err;
- __be64 flit[16];
-};
-
-#define T3_SQ_CQE_FLIT 13
-#define T3_SQ_COOKIE_FLIT 14
-
-#define T3_RQ_COOKIE_FLIT 13
-#define T3_RQ_CQE_FLIT 14
-
-static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe)
-{
- return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags));
-}
-
-enum t3_wr_hdr_bits {
- T3_EOP = 1,
- T3_SOP = 2,
- T3_SOPEOP = T3_EOP|T3_SOP,
-};
-
-static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op,
- enum t3_wr_flags flags, u8 genbit, u32 tid,
- u8 len, u8 sopeop)
-{
- wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) |
- V_FW_RIWR_SOPEOP(sopeop) |
- V_FW_RIWR_FLAGS(flags));
- wmb();
- wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) |
- V_FW_RIWR_TID(tid) |
- V_FW_RIWR_LEN(len));
- /* 2nd gen bit... */
- ((union t3_wr *)wqe)->genbit.genbit = cpu_to_be64(genbit);
-}
-
-/*
- * T3 ULP2_TX commands
- */
-enum t3_utx_mem_op {
- T3_UTX_MEM_READ = 2,
- T3_UTX_MEM_WRITE = 3
-};
-
-/* T3 MC7 RDMA TPT entry format */
-
-enum tpt_mem_type {
- TPT_NON_SHARED_MR = 0x0,
- TPT_SHARED_MR = 0x1,
- TPT_MW = 0x2,
- TPT_MW_RELAXED_PROTECTION = 0x3
-};
-
-enum tpt_addr_type {
- TPT_ZBTO = 0,
- TPT_VATO = 1
-};
-
-enum tpt_mem_perm {
- TPT_MW_BIND = 0x10,
- TPT_LOCAL_READ = 0x8,
- TPT_LOCAL_WRITE = 0x4,
- TPT_REMOTE_READ = 0x2,
- TPT_REMOTE_WRITE = 0x1
-};
-
-struct tpt_entry {
- __be32 valid_stag_pdid;
- __be32 flags_pagesize_qpid;
-
- __be32 rsvd_pbl_addr;
- __be32 len;
- __be32 va_hi;
- __be32 va_low_or_fbo;
-
- __be32 rsvd_bind_cnt_or_pstag;
- __be32 rsvd_pbl_size;
-};
-
-#define S_TPT_VALID 31
-#define V_TPT_VALID(x) ((x) << S_TPT_VALID)
-#define F_TPT_VALID V_TPT_VALID(1U)
-
-#define S_TPT_STAG_KEY 23
-#define M_TPT_STAG_KEY 0xFF
-#define V_TPT_STAG_KEY(x) ((x) << S_TPT_STAG_KEY)
-#define G_TPT_STAG_KEY(x) (((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY)
-
-#define S_TPT_STAG_STATE 22
-#define V_TPT_STAG_STATE(x) ((x) << S_TPT_STAG_STATE)
-#define F_TPT_STAG_STATE V_TPT_STAG_STATE(1U)
-
-#define S_TPT_STAG_TYPE 20
-#define M_TPT_STAG_TYPE 0x3
-#define V_TPT_STAG_TYPE(x) ((x) << S_TPT_STAG_TYPE)
-#define G_TPT_STAG_TYPE(x) (((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE)
-
-#define S_TPT_PDID 0
-#define M_TPT_PDID 0xFFFFF
-#define V_TPT_PDID(x) ((x) << S_TPT_PDID)
-#define G_TPT_PDID(x) (((x) >> S_TPT_PDID) & M_TPT_PDID)
-
-#define S_TPT_PERM 28
-#define M_TPT_PERM 0xF
-#define V_TPT_PERM(x) ((x) << S_TPT_PERM)
-#define G_TPT_PERM(x) (((x) >> S_TPT_PERM) & M_TPT_PERM)
-
-#define S_TPT_REM_INV_DIS 27
-#define V_TPT_REM_INV_DIS(x) ((x) << S_TPT_REM_INV_DIS)
-#define F_TPT_REM_INV_DIS V_TPT_REM_INV_DIS(1U)
-
-#define S_TPT_ADDR_TYPE 26
-#define V_TPT_ADDR_TYPE(x) ((x) << S_TPT_ADDR_TYPE)
-#define F_TPT_ADDR_TYPE V_TPT_ADDR_TYPE(1U)
-
-#define S_TPT_MW_BIND_ENABLE 25
-#define V_TPT_MW_BIND_ENABLE(x) ((x) << S_TPT_MW_BIND_ENABLE)
-#define F_TPT_MW_BIND_ENABLE V_TPT_MW_BIND_ENABLE(1U)
-
-#define S_TPT_PAGE_SIZE 20
-#define M_TPT_PAGE_SIZE 0x1F
-#define V_TPT_PAGE_SIZE(x) ((x) << S_TPT_PAGE_SIZE)
-#define G_TPT_PAGE_SIZE(x) (((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE)
-
-#define S_TPT_PBL_ADDR 0
-#define M_TPT_PBL_ADDR 0x1FFFFFFF
-#define V_TPT_PBL_ADDR(x) ((x) << S_TPT_PBL_ADDR)
-#define G_TPT_PBL_ADDR(x) (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR)
-
-#define S_TPT_QPID 0
-#define M_TPT_QPID 0xFFFFF
-#define V_TPT_QPID(x) ((x) << S_TPT_QPID)
-#define G_TPT_QPID(x) (((x) >> S_TPT_QPID) & M_TPT_QPID)
-
-#define S_TPT_PSTAG 0
-#define M_TPT_PSTAG 0xFFFFFF
-#define V_TPT_PSTAG(x) ((x) << S_TPT_PSTAG)
-#define G_TPT_PSTAG(x) (((x) >> S_TPT_PSTAG) & M_TPT_PSTAG)
-
-#define S_TPT_PBL_SIZE 0
-#define M_TPT_PBL_SIZE 0xFFFFF
-#define V_TPT_PBL_SIZE(x) ((x) << S_TPT_PBL_SIZE)
-#define G_TPT_PBL_SIZE(x) (((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE)
-
-/*
- * CQE defs
- */
-struct t3_cqe {
- __be32 header;
- __be32 len;
- union {
- struct {
- __be32 stag;
- __be32 msn;
- } rcqe;
- struct {
- u32 wrid_hi;
- u32 wrid_low;
- } scqe;
- } u;
-};
-
-#define S_CQE_OOO 31
-#define M_CQE_OOO 0x1
-#define G_CQE_OOO(x) ((((x) >> S_CQE_OOO)) & M_CQE_OOO)
-#define V_CEQ_OOO(x) ((x)<<S_CQE_OOO)
-
-#define S_CQE_QPID 12
-#define M_CQE_QPID 0x7FFFF
-#define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID)
-#define V_CQE_QPID(x) ((x)<<S_CQE_QPID)
-
-#define S_CQE_SWCQE 11
-#define M_CQE_SWCQE 0x1
-#define G_CQE_SWCQE(x) ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE)
-#define V_CQE_SWCQE(x) ((x)<<S_CQE_SWCQE)
-
-#define S_CQE_GENBIT 10
-#define M_CQE_GENBIT 0x1
-#define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT)
-#define V_CQE_GENBIT(x) ((x)<<S_CQE_GENBIT)
-
-#define S_CQE_STATUS 5
-#define M_CQE_STATUS 0x1F
-#define G_CQE_STATUS(x) ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS)
-#define V_CQE_STATUS(x) ((x)<<S_CQE_STATUS)
-
-#define S_CQE_TYPE 4
-#define M_CQE_TYPE 0x1
-#define G_CQE_TYPE(x) ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE)
-#define V_CQE_TYPE(x) ((x)<<S_CQE_TYPE)
-
-#define S_CQE_OPCODE 0
-#define M_CQE_OPCODE 0xF
-#define G_CQE_OPCODE(x) ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE)
-#define V_CQE_OPCODE(x) ((x)<<S_CQE_OPCODE)
-
-#define SW_CQE(x) (G_CQE_SWCQE(be32_to_cpu((x).header)))
-#define CQE_OOO(x) (G_CQE_OOO(be32_to_cpu((x).header)))
-#define CQE_QPID(x) (G_CQE_QPID(be32_to_cpu((x).header)))
-#define CQE_GENBIT(x) (G_CQE_GENBIT(be32_to_cpu((x).header)))
-#define CQE_TYPE(x) (G_CQE_TYPE(be32_to_cpu((x).header)))
-#define SQ_TYPE(x) (CQE_TYPE((x)))
-#define RQ_TYPE(x) (!CQE_TYPE((x)))
-#define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x).header)))
-#define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x).header)))
-
-#define CQE_SEND_OPCODE(x)( \
- (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND) || \
- (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE) || \
- (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_INV) || \
- (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE_INV))
-
-#define CQE_LEN(x) (be32_to_cpu((x).len))
-
-/* used for RQ completion processing */
-#define CQE_WRID_STAG(x) (be32_to_cpu((x).u.rcqe.stag))
-#define CQE_WRID_MSN(x) (be32_to_cpu((x).u.rcqe.msn))
-
-/* used for SQ completion processing */
-#define CQE_WRID_SQ_WPTR(x) ((x).u.scqe.wrid_hi)
-#define CQE_WRID_WPTR(x) ((x).u.scqe.wrid_low)
-
-/* generic accessor macros */
-#define CQE_WRID_HI(x) ((x).u.scqe.wrid_hi)
-#define CQE_WRID_LOW(x) ((x).u.scqe.wrid_low)
-
-#define TPT_ERR_SUCCESS 0x0
-#define TPT_ERR_STAG 0x1 /* STAG invalid: either the */
- /* STAG is offlimt, being 0, */
- /* or STAG_key mismatch */
-#define TPT_ERR_PDID 0x2 /* PDID mismatch */
-#define TPT_ERR_QPID 0x3 /* QPID mismatch */
-#define TPT_ERR_ACCESS 0x4 /* Invalid access right */
-#define TPT_ERR_WRAP 0x5 /* Wrap error */
-#define TPT_ERR_BOUND 0x6 /* base and bounds voilation */
-#define TPT_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */
- /* shared memory region */
-#define TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */
- /* shared memory region */
-#define TPT_ERR_ECC 0x9 /* ECC error detected */
-#define TPT_ERR_ECC_PSTAG 0xA /* ECC error detected when */
- /* reading PSTAG for a MW */
- /* Invalidate */
-#define TPT_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */
- /* software error */
-#define TPT_ERR_SWFLUSH 0xC /* SW FLUSHED */
-#define TPT_ERR_CRC 0x10 /* CRC error */
-#define TPT_ERR_MARKER 0x11 /* Marker error */
-#define TPT_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */
-#define TPT_ERR_OUT_OF_RQE 0x13 /* out of RQE */
-#define TPT_ERR_DDP_VERSION 0x14 /* wrong DDP version */
-#define TPT_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */
-#define TPT_ERR_OPCODE 0x16 /* invalid rdma opcode */
-#define TPT_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */
-#define TPT_ERR_MSN 0x18 /* MSN error */
-#define TPT_ERR_TBIT 0x19 /* tag bit not set correctly */
-#define TPT_ERR_MO 0x1A /* MO not 0 for TERMINATE */
- /* or READ_REQ */
-#define TPT_ERR_MSN_GAP 0x1B
-#define TPT_ERR_MSN_RANGE 0x1C
-#define TPT_ERR_IRD_OVERFLOW 0x1D
-#define TPT_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */
- /* software error */
-#define TPT_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */
- /* mismatch) */
-
-struct t3_swsq {
- __u64 wr_id;
- struct t3_cqe cqe;
- __u32 sq_wptr;
- __be32 read_len;
- int opcode;
- int complete;
- int signaled;
-};
-
-struct t3_swrq {
- __u64 wr_id;
- __u32 pbl_addr;
-};
-
-/*
- * A T3 WQ implements both the SQ and RQ.
- */
-struct t3_wq {
- union t3_wr *queue; /* DMA accessible memory */
- dma_addr_t dma_addr; /* DMA address for HW */
- DEFINE_DMA_UNMAP_ADDR(mapping); /* unmap kruft */
- u32 error; /* 1 once we go to ERROR */
- u32 qpid;
- u32 wptr; /* idx to next available WR slot */
- u32 size_log2; /* total wq size */
- struct t3_swsq *sq; /* SW SQ */
- struct t3_swsq *oldest_read; /* tracks oldest pending read */
- u32 sq_wptr; /* sq_wptr - sq_rptr == count of */
- u32 sq_rptr; /* pending wrs */
- u32 sq_size_log2; /* sq size */
- struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */
- u32 rq_wptr; /* rq_wptr - rq_rptr == count of */
- u32 rq_rptr; /* pending wrs */
- struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */
- u32 rq_size_log2; /* rq size */
- u32 rq_addr; /* rq adapter address */
- void __iomem *doorbell; /* kernel db */
- u64 udb; /* user db if any */
- struct cxio_rdev *rdev;
-};
-
-struct t3_cq {
- u32 cqid;
- u32 rptr;
- u32 wptr;
- u32 size_log2;
- dma_addr_t dma_addr;
- DEFINE_DMA_UNMAP_ADDR(mapping);
- struct t3_cqe *queue;
- struct t3_cqe *sw_queue;
- u32 sw_rptr;
- u32 sw_wptr;
-};
-
-#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \
- CQE_GENBIT(*cqe))
-
-struct t3_cq_status_page {
- u32 cq_err;
-};
-
-static inline int cxio_cq_in_error(struct t3_cq *cq)
-{
- return ((struct t3_cq_status_page *)
- &cq->queue[1 << cq->size_log2])->cq_err;
-}
-
-static inline void cxio_set_cq_in_error(struct t3_cq *cq)
-{
- ((struct t3_cq_status_page *)
- &cq->queue[1 << cq->size_log2])->cq_err = 1;
-}
-
-static inline void cxio_set_wq_in_error(struct t3_wq *wq)
-{
- wq->queue->wq_in_err.err |= 1;
-}
-
-static inline void cxio_disable_wq_db(struct t3_wq *wq)
-{
- wq->queue->wq_in_err.err |= 2;
-}
-
-static inline void cxio_enable_wq_db(struct t3_wq *wq)
-{
- wq->queue->wq_in_err.err &= ~2;
-}
-
-static inline int cxio_wq_db_enabled(struct t3_wq *wq)
-{
- return !(wq->queue->wq_in_err.err & 2);
-}
-
-static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
-{
- struct t3_cqe *cqe;
-
- cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
- if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
- return cqe;
- return NULL;
-}
-
-static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq)
-{
- struct t3_cqe *cqe;
-
- if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
- cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
- return cqe;
- }
- return NULL;
-}
-
-static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq)
-{
- struct t3_cqe *cqe;
-
- if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
- cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
- return cqe;
- }
- cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
- if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
- return cqe;
- return NULL;
-}
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
deleted file mode 100644
index 56a8ab6210cf..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-
-#include <rdma/ib_verbs.h>
-
-#include "cxgb3_offload.h"
-#include "iwch_provider.h"
-#include <rdma/cxgb3-abi.h>
-#include "iwch.h"
-#include "iwch_cm.h"
-
-#define DRV_VERSION "1.1"
-
-MODULE_AUTHOR("Boyd Faulkner, Steve Wise");
-MODULE_DESCRIPTION("Chelsio T3 RDMA Driver");
-MODULE_LICENSE("Dual BSD/GPL");
-
-static void open_rnic_dev(struct t3cdev *);
-static void close_rnic_dev(struct t3cdev *);
-static void iwch_event_handler(struct t3cdev *, u32, u32);
-
-struct cxgb3_client t3c_client = {
- .name = "iw_cxgb3",
- .add = open_rnic_dev,
- .remove = close_rnic_dev,
- .handlers = t3c_handlers,
- .redirect = iwch_ep_redirect,
- .event_handler = iwch_event_handler
-};
-
-static LIST_HEAD(dev_list);
-static DEFINE_MUTEX(dev_mutex);
-
-static void disable_dbs(struct iwch_dev *rnicp)
-{
- unsigned long index;
- struct iwch_qp *qhp;
-
- xa_lock_irq(&rnicp->qps);
- xa_for_each(&rnicp->qps, index, qhp)
- cxio_disable_wq_db(&qhp->wq);
- xa_unlock_irq(&rnicp->qps);
-}
-
-static void enable_dbs(struct iwch_dev *rnicp, int ring_db)
-{
- unsigned long index;
- struct iwch_qp *qhp;
-
- xa_lock_irq(&rnicp->qps);
- xa_for_each(&rnicp->qps, index, qhp) {
- if (ring_db)
- ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell,
- qhp->wq.qpid);
- cxio_enable_wq_db(&qhp->wq);
- }
- xa_unlock_irq(&rnicp->qps);
-}
-
-static void iwch_db_drop_task(struct work_struct *work)
-{
- struct iwch_dev *rnicp = container_of(work, struct iwch_dev,
- db_drop_task.work);
- enable_dbs(rnicp, 1);
-}
-
-static void rnic_init(struct iwch_dev *rnicp)
-{
- pr_debug("%s iwch_dev %p\n", __func__, rnicp);
- xa_init_flags(&rnicp->cqs, XA_FLAGS_LOCK_IRQ);
- xa_init_flags(&rnicp->qps, XA_FLAGS_LOCK_IRQ);
- xa_init_flags(&rnicp->mrs, XA_FLAGS_LOCK_IRQ);
- INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task);
-
- rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
- rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
- rnicp->attr.max_sge_per_wr = T3_MAX_SGE;
- rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE;
- rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1;
- rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH;
- rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev);
- rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
- rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
- rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK;
- rnicp->attr.max_mr_size = T3_MAX_MR_SIZE;
- rnicp->attr.can_resize_wq = 0;
- rnicp->attr.max_rdma_reads_per_qp = 8;
- rnicp->attr.max_rdma_read_resources =
- rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps;
- rnicp->attr.max_rdma_read_qp_depth = 8; /* IRD */
- rnicp->attr.max_rdma_read_depth =
- rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps;
- rnicp->attr.rq_overflow_handled = 0;
- rnicp->attr.can_modify_ird = 0;
- rnicp->attr.can_modify_ord = 0;
- rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1;
- rnicp->attr.stag0_value = 1;
- rnicp->attr.zbva_support = 1;
- rnicp->attr.local_invalidate_fence = 1;
- rnicp->attr.cq_overflow_detection = 1;
- return;
-}
-
-static void open_rnic_dev(struct t3cdev *tdev)
-{
- struct iwch_dev *rnicp;
-
- pr_debug("%s t3cdev %p\n", __func__, tdev);
- pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION);
- rnicp = ib_alloc_device(iwch_dev, ibdev);
- if (!rnicp) {
- pr_err("Cannot allocate ib device\n");
- return;
- }
- rnicp->rdev.ulp = rnicp;
- rnicp->rdev.t3cdev_p = tdev;
-
- mutex_lock(&dev_mutex);
-
- if (cxio_rdev_open(&rnicp->rdev)) {
- mutex_unlock(&dev_mutex);
- pr_err("Unable to open CXIO rdev\n");
- ib_dealloc_device(&rnicp->ibdev);
- return;
- }
-
- rnic_init(rnicp);
-
- list_add_tail(&rnicp->entry, &dev_list);
- mutex_unlock(&dev_mutex);
-
- if (iwch_register_device(rnicp)) {
- pr_err("Unable to register device\n");
- close_rnic_dev(tdev);
- }
- pr_info("Initialized device %s\n",
- pci_name(rnicp->rdev.rnic_info.pdev));
- return;
-}
-
-static void close_rnic_dev(struct t3cdev *tdev)
-{
- struct iwch_dev *dev, *tmp;
- pr_debug("%s t3cdev %p\n", __func__, tdev);
- mutex_lock(&dev_mutex);
- list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
- if (dev->rdev.t3cdev_p == tdev) {
- dev->rdev.flags = CXIO_ERROR_FATAL;
- synchronize_net();
- cancel_delayed_work_sync(&dev->db_drop_task);
- list_del(&dev->entry);
- iwch_unregister_device(dev);
- cxio_rdev_close(&dev->rdev);
- WARN_ON(!xa_empty(&dev->cqs));
- WARN_ON(!xa_empty(&dev->qps));
- WARN_ON(!xa_empty(&dev->mrs));
- ib_dealloc_device(&dev->ibdev);
- break;
- }
- }
- mutex_unlock(&dev_mutex);
-}
-
-static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
-{
- struct cxio_rdev *rdev = tdev->ulp;
- struct iwch_dev *rnicp;
- struct ib_event event;
- u32 portnum = port_id + 1;
- int dispatch = 0;
-
- if (!rdev)
- return;
- rnicp = rdev_to_iwch_dev(rdev);
- switch (evt) {
- case OFFLOAD_STATUS_DOWN: {
- rdev->flags = CXIO_ERROR_FATAL;
- synchronize_net();
- event.event = IB_EVENT_DEVICE_FATAL;
- dispatch = 1;
- break;
- }
- case OFFLOAD_PORT_DOWN: {
- event.event = IB_EVENT_PORT_ERR;
- dispatch = 1;
- break;
- }
- case OFFLOAD_PORT_UP: {
- event.event = IB_EVENT_PORT_ACTIVE;
- dispatch = 1;
- break;
- }
- case OFFLOAD_DB_FULL: {
- disable_dbs(rnicp);
- break;
- }
- case OFFLOAD_DB_EMPTY: {
- enable_dbs(rnicp, 1);
- break;
- }
- case OFFLOAD_DB_DROP: {
- unsigned long delay = 1000;
- unsigned short r;
-
- disable_dbs(rnicp);
- get_random_bytes(&r, 2);
- delay += r & 1023;
-
- /*
- * delay is between 1000-2023 usecs.
- */
- schedule_delayed_work(&rnicp->db_drop_task,
- usecs_to_jiffies(delay));
- break;
- }
- }
-
- if (dispatch) {
- event.device = &rnicp->ibdev;
- event.element.port_num = portnum;
- ib_dispatch_event(&event);
- }
-
- return;
-}
-
-static int __init iwch_init_module(void)
-{
- int err;
-
- err = cxio_hal_init();
- if (err)
- return err;
- err = iwch_cm_init();
- if (err)
- return err;
- cxio_register_ev_cb(iwch_ev_dispatch);
- cxgb3_register_client(&t3c_client);
- return 0;
-}
-
-static void __exit iwch_exit_module(void)
-{
- cxgb3_unregister_client(&t3c_client);
- cxio_unregister_ev_cb(iwch_ev_dispatch);
- iwch_cm_term();
- cxio_hal_exit();
-}
-
-module_init(iwch_init_module);
-module_exit(iwch_exit_module);
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
deleted file mode 100644
index 310a937bffcf..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __IWCH_H__
-#define __IWCH_H__
-
-#include <linux/mutex.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/xarray.h>
-#include <linux/workqueue.h>
-
-#include <rdma/ib_verbs.h>
-
-#include "cxio_hal.h"
-#include "cxgb3_offload.h"
-
-struct iwch_pd;
-struct iwch_cq;
-struct iwch_qp;
-struct iwch_mr;
-
-struct iwch_rnic_attributes {
- u32 max_qps;
- u32 max_wrs; /* Max for any SQ/RQ */
- u32 max_sge_per_wr;
- u32 max_sge_per_rdma_write_wr; /* for RDMA Write WR */
- u32 max_cqs;
- u32 max_cqes_per_cq;
- u32 max_mem_regs;
- u32 max_phys_buf_entries; /* for phys buf list */
- u32 max_pds;
-
- /*
- * The memory page sizes supported by this RNIC.
- * Bit position i in bitmap indicates page of
- * size (4k)^i. Phys block list mode unsupported.
- */
- u32 mem_pgsizes_bitmask;
- u64 max_mr_size;
- u8 can_resize_wq;
-
- /*
- * The maximum number of RDMA Reads that can be outstanding
- * per QP with this RNIC as the target.
- */
- u32 max_rdma_reads_per_qp;
-
- /*
- * The maximum number of resources used for RDMA Reads
- * by this RNIC with this RNIC as the target.
- */
- u32 max_rdma_read_resources;
-
- /*
- * The max depth per QP for initiation of RDMA Read
- * by this RNIC.
- */
- u32 max_rdma_read_qp_depth;
-
- /*
- * The maximum depth for initiation of RDMA Read
- * operations by this RNIC on all QPs
- */
- u32 max_rdma_read_depth;
- u8 rq_overflow_handled;
- u32 can_modify_ird;
- u32 can_modify_ord;
- u32 max_mem_windows;
- u32 stag0_value;
- u8 zbva_support;
- u8 local_invalidate_fence;
- u32 cq_overflow_detection;
-};
-
-struct iwch_dev {
- struct ib_device ibdev;
- struct cxio_rdev rdev;
- u32 device_cap_flags;
- struct iwch_rnic_attributes attr;
- struct xarray cqs;
- struct xarray qps;
- struct xarray mrs;
- struct list_head entry;
- struct delayed_work db_drop_task;
-};
-
-static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
-{
- return container_of(ibdev, struct iwch_dev, ibdev);
-}
-
-static inline struct iwch_dev *rdev_to_iwch_dev(struct cxio_rdev *rdev)
-{
- return container_of(rdev, struct iwch_dev, rdev);
-}
-
-static inline int t3b_device(const struct iwch_dev *rhp)
-{
- return rhp->rdev.t3cdev_p->type == T3B;
-}
-
-static inline int t3a_device(const struct iwch_dev *rhp)
-{
- return rhp->rdev.t3cdev_p->type == T3A;
-}
-
-static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid)
-{
- return xa_load(&rhp->cqs, cqid);
-}
-
-static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid)
-{
- return xa_load(&rhp->qps, qpid);
-}
-
-static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid)
-{
- return xa_load(&rhp->mrs, mmid);
-}
-
-extern struct cxgb3_client t3c_client;
-extern cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
-extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb);
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
deleted file mode 100644
index 0bca72cb4d9a..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ /dev/null
@@ -1,2258 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
-#include <linux/skbuff.h>
-#include <linux/timer.h>
-#include <linux/notifier.h>
-#include <linux/inetdevice.h>
-
-#include <net/neighbour.h>
-#include <net/netevent.h>
-#include <net/route.h>
-
-#include "tcb.h"
-#include "cxgb3_offload.h"
-#include "iwch.h"
-#include "iwch_provider.h"
-#include "iwch_cm.h"
-
-static char *states[] = {
- "idle",
- "listen",
- "connecting",
- "mpa_wait_req",
- "mpa_req_sent",
- "mpa_req_rcvd",
- "mpa_rep_sent",
- "fpdu_mode",
- "aborting",
- "closing",
- "moribund",
- "dead",
- NULL,
-};
-
-int peer2peer = 0;
-module_param(peer2peer, int, 0644);
-MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
-
-static int ep_timeout_secs = 60;
-module_param(ep_timeout_secs, int, 0644);
-MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
- "in seconds (default=60)");
-
-static int mpa_rev = 1;
-module_param(mpa_rev, int, 0644);
-MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
- "1 is spec compliant. (default=1)");
-
-static int markers_enabled = 0;
-module_param(markers_enabled, int, 0644);
-MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
-
-static int crc_enabled = 1;
-module_param(crc_enabled, int, 0644);
-MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
-
-static int rcv_win = 256 * 1024;
-module_param(rcv_win, int, 0644);
-MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)");
-
-static int snd_win = 32 * 1024;
-module_param(snd_win, int, 0644);
-MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
-
-static unsigned int nocong = 0;
-module_param(nocong, uint, 0644);
-MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)");
-
-static unsigned int cong_flavor = 1;
-module_param(cong_flavor, uint, 0644);
-MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
-
-static struct workqueue_struct *workq;
-
-static struct sk_buff_head rxq;
-
-static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
-static void ep_timeout(struct timer_list *t);
-static void connect_reply_upcall(struct iwch_ep *ep, int status);
-
-static void start_ep_timer(struct iwch_ep *ep)
-{
- pr_debug("%s ep %p\n", __func__, ep);
- if (timer_pending(&ep->timer)) {
- pr_debug("%s stopped / restarted timer ep %p\n", __func__, ep);
- del_timer_sync(&ep->timer);
- } else
- get_ep(&ep->com);
- ep->timer.expires = jiffies + ep_timeout_secs * HZ;
- add_timer(&ep->timer);
-}
-
-static void stop_ep_timer(struct iwch_ep *ep)
-{
- pr_debug("%s ep %p\n", __func__, ep);
- if (!timer_pending(&ep->timer)) {
- WARN(1, "%s timer stopped when its not running! ep %p state %u\n",
- __func__, ep, ep->com.state);
- return;
- }
- del_timer_sync(&ep->timer);
- put_ep(&ep->com);
-}
-
-static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e)
-{
- int error = 0;
- struct cxio_rdev *rdev;
-
- rdev = (struct cxio_rdev *)tdev->ulp;
- if (cxio_fatal_error(rdev)) {
- kfree_skb(skb);
- return -EIO;
- }
- error = l2t_send(tdev, skb, l2e);
- if (error < 0)
- kfree_skb(skb);
- return error < 0 ? error : 0;
-}
-
-int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
-{
- int error = 0;
- struct cxio_rdev *rdev;
-
- rdev = (struct cxio_rdev *)tdev->ulp;
- if (cxio_fatal_error(rdev)) {
- kfree_skb(skb);
- return -EIO;
- }
- error = cxgb3_ofld_send(tdev, skb);
- if (error < 0)
- kfree_skb(skb);
- return error < 0 ? error : 0;
-}
-
-static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
-{
- struct cpl_tid_release *req;
-
- skb = get_skb(skb, sizeof(*req), GFP_KERNEL);
- if (!skb)
- return;
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
- skb->priority = CPL_PRIORITY_SETUP;
- iwch_cxgb3_ofld_send(tdev, skb);
- return;
-}
-
-int iwch_quiesce_tid(struct iwch_ep *ep)
-{
- struct cpl_set_tcb_field *req;
- struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
-
- if (!skb)
- return -ENOMEM;
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
- req->reply = 0;
- req->cpu_idx = 0;
- req->word = htons(W_TCB_RX_QUIESCE);
- req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
- req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
-
- skb->priority = CPL_PRIORITY_DATA;
- return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-int iwch_resume_tid(struct iwch_ep *ep)
-{
- struct cpl_set_tcb_field *req;
- struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
-
- if (!skb)
- return -ENOMEM;
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
- req->reply = 0;
- req->cpu_idx = 0;
- req->word = htons(W_TCB_RX_QUIESCE);
- req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
- req->val = 0;
-
- skb->priority = CPL_PRIORITY_DATA;
- return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-static void set_emss(struct iwch_ep *ep, u16 opt)
-{
- pr_debug("%s ep %p opt %u\n", __func__, ep, opt);
- ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
- if (G_TCPOPT_TSTAMP(opt))
- ep->emss -= 12;
- if (ep->emss < 128)
- ep->emss = 128;
- pr_debug("emss=%d\n", ep->emss);
-}
-
-static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
-{
- unsigned long flags;
- enum iwch_ep_state state;
-
- spin_lock_irqsave(&epc->lock, flags);
- state = epc->state;
- spin_unlock_irqrestore(&epc->lock, flags);
- return state;
-}
-
-static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
-{
- epc->state = new;
-}
-
-static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&epc->lock, flags);
- pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
- __state_set(epc, new);
- spin_unlock_irqrestore(&epc->lock, flags);
- return;
-}
-
-static void *alloc_ep(int size, gfp_t gfp)
-{
- struct iwch_ep_common *epc;
-
- epc = kzalloc(size, gfp);
- if (epc) {
- kref_init(&epc->kref);
- spin_lock_init(&epc->lock);
- init_waitqueue_head(&epc->waitq);
- }
- pr_debug("%s alloc ep %p\n", __func__, epc);
- return epc;
-}
-
-void __free_ep(struct kref *kref)
-{
- struct iwch_ep *ep;
- ep = container_of(container_of(kref, struct iwch_ep_common, kref),
- struct iwch_ep, com);
- pr_debug("%s ep %p state %s\n",
- __func__, ep, states[state_read(&ep->com)]);
- if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
- cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
- dst_release(ep->dst);
- l2t_release(ep->com.tdev, ep->l2t);
- }
- kfree(ep);
-}
-
-static void release_ep_resources(struct iwch_ep *ep)
-{
- pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
- set_bit(RELEASE_RESOURCES, &ep->com.flags);
- put_ep(&ep->com);
-}
-
-static int status2errno(int status)
-{
- switch (status) {
- case CPL_ERR_NONE:
- return 0;
- case CPL_ERR_CONN_RESET:
- return -ECONNRESET;
- case CPL_ERR_ARP_MISS:
- return -EHOSTUNREACH;
- case CPL_ERR_CONN_TIMEDOUT:
- return -ETIMEDOUT;
- case CPL_ERR_TCAM_FULL:
- return -ENOMEM;
- case CPL_ERR_CONN_EXIST:
- return -EADDRINUSE;
- default:
- return -EIO;
- }
-}
-
-/*
- * Try and reuse skbs already allocated...
- */
-static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
-{
- if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
- skb_trim(skb, 0);
- skb_get(skb);
- } else {
- skb = alloc_skb(len, gfp);
- }
- return skb;
-}
-
-static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip,
- __be32 peer_ip, __be16 local_port,
- __be16 peer_port, u8 tos)
-{
- struct rtable *rt;
- struct flowi4 fl4;
-
- rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
- peer_port, local_port, IPPROTO_TCP,
- tos, 0);
- if (IS_ERR(rt))
- return NULL;
- return rt;
-}
-
-static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
-{
- int i = 0;
-
- while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
- ++i;
- return i;
-}
-
-static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
-{
- pr_debug("%s t3cdev %p\n", __func__, dev);
- kfree_skb(skb);
-}
-
-/*
- * Handle an ARP failure for an active open.
- */
-static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
-{
- pr_err("ARP failure during connect\n");
- kfree_skb(skb);
-}
-
-/*
- * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
- * and send it along.
- */
-static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
-{
- struct cpl_abort_req *req = cplhdr(skb);
-
- pr_debug("%s t3cdev %p\n", __func__, dev);
- req->cmd = CPL_ABORT_NO_RST;
- iwch_cxgb3_ofld_send(dev, skb);
-}
-
-static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
-{
- struct cpl_close_con_req *req;
- struct sk_buff *skb;
-
- pr_debug("%s ep %p\n", __func__, ep);
- skb = get_skb(NULL, sizeof(*req), gfp);
- if (!skb) {
- pr_err("%s - failed to alloc skb\n", __func__);
- return -ENOMEM;
- }
- skb->priority = CPL_PRIORITY_DATA;
- set_arp_failure_handler(skb, arp_failure_discard);
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
- req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
- return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
-{
- struct cpl_abort_req *req;
-
- pr_debug("%s ep %p\n", __func__, ep);
- skb = get_skb(skb, sizeof(*req), gfp);
- if (!skb) {
- pr_err("%s - failed to alloc skb\n", __func__);
- return -ENOMEM;
- }
- skb->priority = CPL_PRIORITY_DATA;
- set_arp_failure_handler(skb, abort_arp_failure);
- req = skb_put_zero(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
- req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
- req->cmd = CPL_ABORT_SEND_RST;
- return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int send_connect(struct iwch_ep *ep)
-{
- struct cpl_act_open_req *req;
- struct sk_buff *skb;
- u32 opt0h, opt0l, opt2;
- unsigned int mtu_idx;
- int wscale;
-
- pr_debug("%s ep %p\n", __func__, ep);
-
- skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
- if (!skb) {
- pr_err("%s - failed to alloc skb\n", __func__);
- return -ENOMEM;
- }
- mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
- wscale = compute_wscale(rcv_win);
- opt0h = V_NAGLE(0) |
- V_NO_CONG(nocong) |
- V_KEEP_ALIVE(1) |
- F_TCAM_BYPASS |
- V_WND_SCALE(wscale) |
- V_MSS_IDX(mtu_idx) |
- V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
- opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
- opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
- V_CONG_CONTROL_FLAVOR(cong_flavor);
- skb->priority = CPL_PRIORITY_SETUP;
- set_arp_failure_handler(skb, act_open_req_arp_failure);
-
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid));
- req->local_port = ep->com.local_addr.sin_port;
- req->peer_port = ep->com.remote_addr.sin_port;
- req->local_ip = ep->com.local_addr.sin_addr.s_addr;
- req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
- req->opt0h = htonl(opt0h);
- req->opt0l = htonl(opt0l);
- req->params = 0;
- req->opt2 = htonl(opt2);
- return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
-{
- int mpalen;
- struct tx_data_wr *req;
- struct mpa_message *mpa;
- int len;
-
- pr_debug("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
-
- BUG_ON(skb_cloned(skb));
-
- mpalen = sizeof(*mpa) + ep->plen;
- if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) {
- kfree_skb(skb);
- skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
- if (!skb) {
- connect_reply_upcall(ep, -ENOMEM);
- return;
- }
- }
- skb_trim(skb, 0);
- skb_reserve(skb, sizeof(*req));
- skb_put(skb, mpalen);
- skb->priority = CPL_PRIORITY_DATA;
- mpa = (struct mpa_message *) skb->data;
- memset(mpa, 0, sizeof(*mpa));
- memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
- mpa->flags = (crc_enabled ? MPA_CRC : 0) |
- (markers_enabled ? MPA_MARKERS : 0);
- mpa->private_data_size = htons(ep->plen);
- mpa->revision = mpa_rev;
-
- if (ep->plen)
- memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
-
- /*
- * Reference the mpa skb. This ensures the data area
- * will remain in memory until the hw acks the tx.
- * Function tx_ack() will deref it.
- */
- skb_get(skb);
- set_arp_failure_handler(skb, arp_failure_discard);
- skb_reset_transport_header(skb);
- len = skb->len;
- req = skb_push(skb, sizeof(*req));
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
- req->wr_lo = htonl(V_WR_TID(ep->hwtid));
- req->len = htonl(len);
- req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
- V_TX_SNDBUF(snd_win>>15));
- req->flags = htonl(F_TX_INIT);
- req->sndseq = htonl(ep->snd_seq);
- BUG_ON(ep->mpa_skb);
- ep->mpa_skb = skb;
- iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
- start_ep_timer(ep);
- state_set(&ep->com, MPA_REQ_SENT);
- return;
-}
-
-static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
-{
- int mpalen;
- struct tx_data_wr *req;
- struct mpa_message *mpa;
- struct sk_buff *skb;
-
- pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
-
- mpalen = sizeof(*mpa) + plen;
-
- skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
- if (!skb) {
- pr_err("%s - cannot alloc skb!\n", __func__);
- return -ENOMEM;
- }
- skb_reserve(skb, sizeof(*req));
- mpa = skb_put(skb, mpalen);
- memset(mpa, 0, sizeof(*mpa));
- memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
- mpa->flags = MPA_REJECT;
- mpa->revision = mpa_rev;
- mpa->private_data_size = htons(plen);
- if (plen)
- memcpy(mpa->private_data, pdata, plen);
-
- /*
- * Reference the mpa skb again. This ensures the data area
- * will remain in memory until the hw acks the tx.
- * Function tx_ack() will deref it.
- */
- skb_get(skb);
- skb->priority = CPL_PRIORITY_DATA;
- set_arp_failure_handler(skb, arp_failure_discard);
- skb_reset_transport_header(skb);
- req = skb_push(skb, sizeof(*req));
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
- req->wr_lo = htonl(V_WR_TID(ep->hwtid));
- req->len = htonl(mpalen);
- req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
- V_TX_SNDBUF(snd_win>>15));
- req->flags = htonl(F_TX_INIT);
- req->sndseq = htonl(ep->snd_seq);
- BUG_ON(ep->mpa_skb);
- ep->mpa_skb = skb;
- return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
-{
- int mpalen;
- struct tx_data_wr *req;
- struct mpa_message *mpa;
- int len;
- struct sk_buff *skb;
-
- pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
-
- mpalen = sizeof(*mpa) + plen;
-
- skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
- if (!skb) {
- pr_err("%s - cannot alloc skb!\n", __func__);
- return -ENOMEM;
- }
- skb->priority = CPL_PRIORITY_DATA;
- skb_reserve(skb, sizeof(*req));
- mpa = skb_put(skb, mpalen);
- memset(mpa, 0, sizeof(*mpa));
- memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
- mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
- (markers_enabled ? MPA_MARKERS : 0);
- mpa->revision = mpa_rev;
- mpa->private_data_size = htons(plen);
- if (plen)
- memcpy(mpa->private_data, pdata, plen);
-
- /*
- * Reference the mpa skb. This ensures the data area
- * will remain in memory until the hw acks the tx.
- * Function tx_ack() will deref it.
- */
- skb_get(skb);
- set_arp_failure_handler(skb, arp_failure_discard);
- skb_reset_transport_header(skb);
- len = skb->len;
- req = skb_push(skb, sizeof(*req));
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
- req->wr_lo = htonl(V_WR_TID(ep->hwtid));
- req->len = htonl(len);
- req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
- V_TX_SNDBUF(snd_win>>15));
- req->flags = htonl(F_TX_INIT);
- req->sndseq = htonl(ep->snd_seq);
- ep->mpa_skb = skb;
- state_set(&ep->com, MPA_REP_SENT);
- return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct cpl_act_establish *req = cplhdr(skb);
- unsigned int tid = GET_TID(req);
-
- pr_debug("%s ep %p tid %d\n", __func__, ep, tid);
-
- dst_confirm(ep->dst);
-
- /* setup the hwtid for this connection */
- ep->hwtid = tid;
- cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
-
- ep->snd_seq = ntohl(req->snd_isn);
- ep->rcv_seq = ntohl(req->rcv_isn);
-
- set_emss(ep, ntohs(req->tcp_opt));
-
- /* dealloc the atid */
- cxgb3_free_atid(ep->com.tdev, ep->atid);
-
- /* start MPA negotiation */
- send_mpa_req(ep, skb);
-
- return 0;
-}
-
-static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
-{
- pr_debug("%s ep %p\n", __FILE__, ep);
- state_set(&ep->com, ABORTING);
- send_abort(ep, skb, gfp);
-}
-
-static void close_complete_upcall(struct iwch_ep *ep)
-{
- struct iw_cm_event event;
-
- pr_debug("%s ep %p\n", __func__, ep);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_CLOSE;
- if (ep->com.cm_id) {
- pr_debug("close complete delivered ep %p cm_id %p tid %d\n",
- ep, ep->com.cm_id, ep->hwtid);
- ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- ep->com.cm_id->rem_ref(ep->com.cm_id);
- ep->com.cm_id = NULL;
- ep->com.qp = NULL;
- }
-}
-
-static void peer_close_upcall(struct iwch_ep *ep)
-{
- struct iw_cm_event event;
-
- pr_debug("%s ep %p\n", __func__, ep);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_DISCONNECT;
- if (ep->com.cm_id) {
- pr_debug("peer close delivered ep %p cm_id %p tid %d\n",
- ep, ep->com.cm_id, ep->hwtid);
- ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- }
-}
-
-static void peer_abort_upcall(struct iwch_ep *ep)
-{
- struct iw_cm_event event;
-
- pr_debug("%s ep %p\n", __func__, ep);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_CLOSE;
- event.status = -ECONNRESET;
- if (ep->com.cm_id) {
- pr_debug("abort delivered ep %p cm_id %p tid %d\n", ep,
- ep->com.cm_id, ep->hwtid);
- ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- ep->com.cm_id->rem_ref(ep->com.cm_id);
- ep->com.cm_id = NULL;
- ep->com.qp = NULL;
- }
-}
-
-static void connect_reply_upcall(struct iwch_ep *ep, int status)
-{
- struct iw_cm_event event;
-
- pr_debug("%s ep %p status %d\n", __func__, ep, status);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_CONNECT_REPLY;
- event.status = status;
- memcpy(&event.local_addr, &ep->com.local_addr,
- sizeof(ep->com.local_addr));
- memcpy(&event.remote_addr, &ep->com.remote_addr,
- sizeof(ep->com.remote_addr));
-
- if ((status == 0) || (status == -ECONNREFUSED)) {
- event.private_data_len = ep->plen;
- event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
- }
- if (ep->com.cm_id) {
- pr_debug("%s ep %p tid %d status %d\n", __func__, ep,
- ep->hwtid, status);
- ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- }
- if (status < 0) {
- ep->com.cm_id->rem_ref(ep->com.cm_id);
- ep->com.cm_id = NULL;
- ep->com.qp = NULL;
- }
-}
-
-static void connect_request_upcall(struct iwch_ep *ep)
-{
- struct iw_cm_event event;
-
- pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_CONNECT_REQUEST;
- memcpy(&event.local_addr, &ep->com.local_addr,
- sizeof(ep->com.local_addr));
- memcpy(&event.remote_addr, &ep->com.remote_addr,
- sizeof(ep->com.local_addr));
- event.private_data_len = ep->plen;
- event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
- event.provider_data = ep;
- /*
- * Until ird/ord negotiation via MPAv2 support is added, send max
- * supported values
- */
- event.ird = event.ord = 8;
- if (state_read(&ep->parent_ep->com) != DEAD) {
- get_ep(&ep->com);
- ep->parent_ep->com.cm_id->event_handler(
- ep->parent_ep->com.cm_id,
- &event);
- }
- put_ep(&ep->parent_ep->com);
- ep->parent_ep = NULL;
-}
-
-static void established_upcall(struct iwch_ep *ep)
-{
- struct iw_cm_event event;
-
- pr_debug("%s ep %p\n", __func__, ep);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_ESTABLISHED;
- /*
- * Until ird/ord negotiation via MPAv2 support is added, send max
- * supported values
- */
- event.ird = event.ord = 8;
- if (ep->com.cm_id) {
- pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
- ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- }
-}
-
-static int update_rx_credits(struct iwch_ep *ep, u32 credits)
-{
- struct cpl_rx_data_ack *req;
- struct sk_buff *skb;
-
- pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
- skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
- if (!skb) {
- pr_err("update_rx_credits - cannot alloc skb!\n");
- return 0;
- }
-
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
- req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
- skb->priority = CPL_PRIORITY_ACK;
- iwch_cxgb3_ofld_send(ep->com.tdev, skb);
- return credits;
-}
-
-static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
-{
- struct mpa_message *mpa;
- u16 plen;
- struct iwch_qp_attributes attrs;
- enum iwch_qp_attr_mask mask;
- int err;
-
- pr_debug("%s ep %p\n", __func__, ep);
-
- /*
- * Stop mpa timer. If it expired, then the state has
- * changed and we bail since ep_timeout already aborted
- * the connection.
- */
- stop_ep_timer(ep);
- if (state_read(&ep->com) != MPA_REQ_SENT)
- return;
-
- /*
- * If we get more than the supported amount of private data
- * then we must fail this connection.
- */
- if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
- err = -EINVAL;
- goto err;
- }
-
- /*
- * copy the new data into our accumulation buffer.
- */
- skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
- skb->len);
- ep->mpa_pkt_len += skb->len;
-
- /*
- * if we don't even have the mpa message, then bail.
- */
- if (ep->mpa_pkt_len < sizeof(*mpa))
- return;
- mpa = (struct mpa_message *) ep->mpa_pkt;
-
- /* Validate MPA header. */
- if (mpa->revision != mpa_rev) {
- err = -EPROTO;
- goto err;
- }
- if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
- err = -EPROTO;
- goto err;
- }
-
- plen = ntohs(mpa->private_data_size);
-
- /*
- * Fail if there's too much private data.
- */
- if (plen > MPA_MAX_PRIVATE_DATA) {
- err = -EPROTO;
- goto err;
- }
-
- /*
- * If plen does not account for pkt size
- */
- if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
- err = -EPROTO;
- goto err;
- }
-
- ep->plen = (u8) plen;
-
- /*
- * If we don't have all the pdata yet, then bail.
- * We'll continue process when more data arrives.
- */
- if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
- return;
-
- if (mpa->flags & MPA_REJECT) {
- err = -ECONNREFUSED;
- goto err;
- }
-
- /*
- * If we get here we have accumulated the entire mpa
- * start reply message including private data. And
- * the MPA header is valid.
- */
- state_set(&ep->com, FPDU_MODE);
- ep->mpa_attr.initiator = 1;
- ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
- ep->mpa_attr.recv_marker_enabled = markers_enabled;
- ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
- ep->mpa_attr.version = mpa_rev;
- pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
- __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
-
- attrs.mpa_attr = ep->mpa_attr;
- attrs.max_ird = ep->ird;
- attrs.max_ord = ep->ord;
- attrs.llp_stream_handle = ep;
- attrs.next_state = IWCH_QP_STATE_RTS;
-
- mask = IWCH_QP_ATTR_NEXT_STATE |
- IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
- IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
-
- /* bind QP and TID with INIT_WR */
- err = iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, mask, &attrs, 1);
- if (err)
- goto err;
-
- if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) {
- iwch_post_zb_read(ep);
- }
-
- goto out;
-err:
- abort_connection(ep, skb, GFP_KERNEL);
-out:
- connect_reply_upcall(ep, err);
- return;
-}
-
-static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
-{
- struct mpa_message *mpa;
- u16 plen;
-
- pr_debug("%s ep %p\n", __func__, ep);
-
- /*
- * Stop mpa timer. If it expired, then the state has
- * changed and we bail since ep_timeout already aborted
- * the connection.
- */
- stop_ep_timer(ep);
- if (state_read(&ep->com) != MPA_REQ_WAIT)
- return;
-
- /*
- * If we get more than the supported amount of private data
- * then we must fail this connection.
- */
- if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
-
- pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
-
- /*
- * Copy the new data into our accumulation buffer.
- */
- skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
- skb->len);
- ep->mpa_pkt_len += skb->len;
-
- /*
- * If we don't even have the mpa message, then bail.
- * We'll continue process when more data arrives.
- */
- if (ep->mpa_pkt_len < sizeof(*mpa))
- return;
- pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
- mpa = (struct mpa_message *) ep->mpa_pkt;
-
- /*
- * Validate MPA Header.
- */
- if (mpa->revision != mpa_rev) {
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
-
- if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
-
- plen = ntohs(mpa->private_data_size);
-
- /*
- * Fail if there's too much private data.
- */
- if (plen > MPA_MAX_PRIVATE_DATA) {
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
-
- /*
- * If plen does not account for pkt size
- */
- if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
- ep->plen = (u8) plen;
-
- /*
- * If we don't have all the pdata yet, then bail.
- */
- if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
- return;
-
- /*
- * If we get here we have accumulated the entire mpa
- * start reply message including private data.
- */
- ep->mpa_attr.initiator = 0;
- ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
- ep->mpa_attr.recv_marker_enabled = markers_enabled;
- ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
- ep->mpa_attr.version = mpa_rev;
- pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
- __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
-
- state_set(&ep->com, MPA_REQ_RCVD);
-
- /* drive upcall */
- connect_request_upcall(ep);
- return;
-}
-
-static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct cpl_rx_data *hdr = cplhdr(skb);
- unsigned int dlen = ntohs(hdr->len);
-
- pr_debug("%s ep %p dlen %u\n", __func__, ep, dlen);
-
- skb_pull(skb, sizeof(*hdr));
- skb_trim(skb, dlen);
-
- ep->rcv_seq += dlen;
- BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
-
- switch (state_read(&ep->com)) {
- case MPA_REQ_SENT:
- process_mpa_reply(ep, skb);
- break;
- case MPA_REQ_WAIT:
- process_mpa_request(ep, skb);
- break;
- case MPA_REP_SENT:
- break;
- default:
- pr_err("%s Unexpected streaming data. ep %p state %d tid %d\n",
- __func__, ep, state_read(&ep->com), ep->hwtid);
-
- /*
- * The ep will timeout and inform the ULP of the failure.
- * See ep_timeout().
- */
- break;
- }
-
- /* update RX credits */
- update_rx_credits(ep, dlen);
-
- return CPL_RET_BUF_DONE;
-}
-
-/*
- * Upcall from the adapter indicating data has been transmitted.
- * For us its just the single MPA request or reply. We can now free
- * the skb holding the mpa message.
- */
-static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct cpl_wr_ack *hdr = cplhdr(skb);
- unsigned int credits = ntohs(hdr->credits);
- unsigned long flags;
- int post_zb = 0;
-
- pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
-
- if (credits == 0) {
- pr_debug("%s 0 credit ack ep %p state %u\n",
- __func__, ep, state_read(&ep->com));
- return CPL_RET_BUF_DONE;
- }
-
- spin_lock_irqsave(&ep->com.lock, flags);
- BUG_ON(credits != 1);
- dst_confirm(ep->dst);
- if (!ep->mpa_skb) {
- pr_debug("%s rdma_init wr_ack ep %p state %u\n",
- __func__, ep, ep->com.state);
- if (ep->mpa_attr.initiator) {
- pr_debug("%s initiator ep %p state %u\n",
- __func__, ep, ep->com.state);
- if (peer2peer && ep->com.state == FPDU_MODE)
- post_zb = 1;
- } else {
- pr_debug("%s responder ep %p state %u\n",
- __func__, ep, ep->com.state);
- if (ep->com.state == MPA_REQ_RCVD) {
- ep->com.rpl_done = 1;
- wake_up(&ep->com.waitq);
- }
- }
- } else {
- pr_debug("%s lsm ack ep %p state %u freeing skb\n",
- __func__, ep, ep->com.state);
- kfree_skb(ep->mpa_skb);
- ep->mpa_skb = NULL;
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
- if (post_zb)
- iwch_post_zb_read(ep);
- return CPL_RET_BUF_DONE;
-}
-
-static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- unsigned long flags;
- int release = 0;
-
- pr_debug("%s ep %p\n", __func__, ep);
- BUG_ON(!ep);
-
- /*
- * We get 2 abort replies from the HW. The first one must
- * be ignored except for scribbling that we need one more.
- */
- if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) {
- return CPL_RET_BUF_DONE;
- }
-
- spin_lock_irqsave(&ep->com.lock, flags);
- switch (ep->com.state) {
- case ABORTING:
- close_complete_upcall(ep);
- __state_set(&ep->com, DEAD);
- release = 1;
- break;
- default:
- pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
- break;
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
-
- if (release)
- release_ep_resources(ep);
- return CPL_RET_BUF_DONE;
-}
-
-/*
- * Return whether a failed active open has allocated a TID
- */
-static inline int act_open_has_tid(int status)
-{
- return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
- status != CPL_ERR_ARP_MISS;
-}
-
-static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct cpl_act_open_rpl *rpl = cplhdr(skb);
-
- pr_debug("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
- status2errno(rpl->status));
- connect_reply_upcall(ep, status2errno(rpl->status));
- state_set(&ep->com, DEAD);
- if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status))
- release_tid(ep->com.tdev, GET_TID(rpl), NULL);
- cxgb3_free_atid(ep->com.tdev, ep->atid);
- dst_release(ep->dst);
- l2t_release(ep->com.tdev, ep->l2t);
- put_ep(&ep->com);
- return CPL_RET_BUF_DONE;
-}
-
-static int listen_start(struct iwch_listen_ep *ep)
-{
- struct sk_buff *skb;
- struct cpl_pass_open_req *req;
-
- pr_debug("%s ep %p\n", __func__, ep);
- skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
- if (!skb) {
- pr_err("t3c_listen_start failed to alloc skb!\n");
- return -ENOMEM;
- }
-
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid));
- req->local_port = ep->com.local_addr.sin_port;
- req->local_ip = ep->com.local_addr.sin_addr.s_addr;
- req->peer_port = 0;
- req->peer_ip = 0;
- req->peer_netmask = 0;
- req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
- req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10));
- req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
-
- skb->priority = 1;
- return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_listen_ep *ep = ctx;
- struct cpl_pass_open_rpl *rpl = cplhdr(skb);
-
- pr_debug("%s ep %p status %d error %d\n", __func__, ep,
- rpl->status, status2errno(rpl->status));
- ep->com.rpl_err = status2errno(rpl->status);
- ep->com.rpl_done = 1;
- wake_up(&ep->com.waitq);
-
- return CPL_RET_BUF_DONE;
-}
-
-static int listen_stop(struct iwch_listen_ep *ep)
-{
- struct sk_buff *skb;
- struct cpl_close_listserv_req *req;
-
- pr_debug("%s ep %p\n", __func__, ep);
- skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
- if (!skb) {
- pr_err("%s - failed to alloc skb\n", __func__);
- return -ENOMEM;
- }
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->cpu_idx = 0;
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
- skb->priority = 1;
- return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
- void *ctx)
-{
- struct iwch_listen_ep *ep = ctx;
- struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
-
- pr_debug("%s ep %p\n", __func__, ep);
- ep->com.rpl_err = status2errno(rpl->status);
- ep->com.rpl_done = 1;
- wake_up(&ep->com.waitq);
- return CPL_RET_BUF_DONE;
-}
-
-static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
-{
- struct cpl_pass_accept_rpl *rpl;
- unsigned int mtu_idx;
- u32 opt0h, opt0l, opt2;
- int wscale;
-
- pr_debug("%s ep %p\n", __func__, ep);
- BUG_ON(skb_cloned(skb));
- skb_trim(skb, sizeof(*rpl));
- skb_get(skb);
- mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
- wscale = compute_wscale(rcv_win);
- opt0h = V_NAGLE(0) |
- V_NO_CONG(nocong) |
- V_KEEP_ALIVE(1) |
- F_TCAM_BYPASS |
- V_WND_SCALE(wscale) |
- V_MSS_IDX(mtu_idx) |
- V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
- opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
- opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
- V_CONG_CONTROL_FLAVOR(cong_flavor);
-
- rpl = cplhdr(skb);
- rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid));
- rpl->peer_ip = peer_ip;
- rpl->opt0h = htonl(opt0h);
- rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT);
- rpl->opt2 = htonl(opt2);
- rpl->rsvd = rpl->opt2; /* workaround for HW bug */
- skb->priority = CPL_PRIORITY_SETUP;
- iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-
- return;
-}
-
-static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
- struct sk_buff *skb)
-{
- pr_debug("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
- peer_ip);
- BUG_ON(skb_cloned(skb));
- skb_trim(skb, sizeof(struct cpl_tid_release));
- skb_get(skb);
-
- if (tdev->type != T3A)
- release_tid(tdev, hwtid, skb);
- else {
- struct cpl_pass_accept_rpl *rpl;
-
- rpl = cplhdr(skb);
- skb->priority = CPL_PRIORITY_SETUP;
- rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
- hwtid));
- rpl->peer_ip = peer_ip;
- rpl->opt0h = htonl(F_TCAM_BYPASS);
- rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
- rpl->opt2 = 0;
- rpl->rsvd = rpl->opt2;
- iwch_cxgb3_ofld_send(tdev, skb);
- }
-}
-
-static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *child_ep, *parent_ep = ctx;
- struct cpl_pass_accept_req *req = cplhdr(skb);
- unsigned int hwtid = GET_TID(req);
- struct dst_entry *dst;
- struct l2t_entry *l2t;
- struct rtable *rt;
- struct iff_mac tim;
-
- pr_debug("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
-
- if (state_read(&parent_ep->com) != LISTEN) {
- pr_err("%s - listening ep not in LISTEN\n", __func__);
- goto reject;
- }
-
- /*
- * Find the netdev for this connection request.
- */
- tim.mac_addr = req->dst_mac;
- tim.vlan_tag = ntohs(req->vlan_tag);
- if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
- pr_err("%s bad dst mac %pM\n", __func__, req->dst_mac);
- goto reject;
- }
-
- /* Find output route */
- rt = find_route(tdev,
- req->local_ip,
- req->peer_ip,
- req->local_port,
- req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
- if (!rt) {
- pr_err("%s - failed to find dst entry!\n", __func__);
- goto reject;
- }
- dst = &rt->dst;
- l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip);
- if (!l2t) {
- pr_err("%s - failed to allocate l2t entry!\n", __func__);
- dst_release(dst);
- goto reject;
- }
- child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
- if (!child_ep) {
- pr_err("%s - failed to allocate ep entry!\n", __func__);
- l2t_release(tdev, l2t);
- dst_release(dst);
- goto reject;
- }
- state_set(&child_ep->com, CONNECTING);
- child_ep->com.tdev = tdev;
- child_ep->com.cm_id = NULL;
- child_ep->com.local_addr.sin_family = AF_INET;
- child_ep->com.local_addr.sin_port = req->local_port;
- child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
- child_ep->com.remote_addr.sin_family = AF_INET;
- child_ep->com.remote_addr.sin_port = req->peer_port;
- child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
- get_ep(&parent_ep->com);
- child_ep->parent_ep = parent_ep;
- child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid));
- child_ep->l2t = l2t;
- child_ep->dst = dst;
- child_ep->hwtid = hwtid;
- timer_setup(&child_ep->timer, ep_timeout, 0);
- cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid);
- accept_cr(child_ep, req->peer_ip, skb);
- goto out;
-reject:
- reject_cr(tdev, hwtid, req->peer_ip, skb);
-out:
- return CPL_RET_BUF_DONE;
-}
-
-static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct cpl_pass_establish *req = cplhdr(skb);
-
- pr_debug("%s ep %p\n", __func__, ep);
- ep->snd_seq = ntohl(req->snd_isn);
- ep->rcv_seq = ntohl(req->rcv_isn);
-
- set_emss(ep, ntohs(req->tcp_opt));
-
- dst_confirm(ep->dst);
- state_set(&ep->com, MPA_REQ_WAIT);
- start_ep_timer(ep);
-
- return CPL_RET_BUF_DONE;
-}
-
-static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct iwch_qp_attributes attrs;
- unsigned long flags;
- int disconnect = 1;
- int release = 0;
-
- pr_debug("%s ep %p\n", __func__, ep);
- dst_confirm(ep->dst);
-
- spin_lock_irqsave(&ep->com.lock, flags);
- switch (ep->com.state) {
- case MPA_REQ_WAIT:
- __state_set(&ep->com, CLOSING);
- break;
- case MPA_REQ_SENT:
- __state_set(&ep->com, CLOSING);
- connect_reply_upcall(ep, -ECONNRESET);
- break;
- case MPA_REQ_RCVD:
-
- /*
- * We're gonna mark this puppy DEAD, but keep
- * the reference on it until the ULP accepts or
- * rejects the CR. Also wake up anyone waiting
- * in rdma connection migration (see iwch_accept_cr()).
- */
- __state_set(&ep->com, CLOSING);
- ep->com.rpl_done = 1;
- ep->com.rpl_err = -ECONNRESET;
- pr_debug("waking up ep %p\n", ep);
- wake_up(&ep->com.waitq);
- break;
- case MPA_REP_SENT:
- __state_set(&ep->com, CLOSING);
- ep->com.rpl_done = 1;
- ep->com.rpl_err = -ECONNRESET;
- pr_debug("waking up ep %p\n", ep);
- wake_up(&ep->com.waitq);
- break;
- case FPDU_MODE:
- start_ep_timer(ep);
- __state_set(&ep->com, CLOSING);
- attrs.next_state = IWCH_QP_STATE_CLOSING;
- iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
- IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
- peer_close_upcall(ep);
- break;
- case ABORTING:
- disconnect = 0;
- break;
- case CLOSING:
- __state_set(&ep->com, MORIBUND);
- disconnect = 0;
- break;
- case MORIBUND:
- stop_ep_timer(ep);
- if (ep->com.cm_id && ep->com.qp) {
- attrs.next_state = IWCH_QP_STATE_IDLE;
- iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
- IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
- }
- close_complete_upcall(ep);
- __state_set(&ep->com, DEAD);
- release = 1;
- disconnect = 0;
- break;
- case DEAD:
- disconnect = 0;
- break;
- default:
- BUG_ON(1);
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
- if (disconnect)
- iwch_ep_disconnect(ep, 0, GFP_KERNEL);
- if (release)
- release_ep_resources(ep);
- return CPL_RET_BUF_DONE;
-}
-
-/*
- * Returns whether an ABORT_REQ_RSS message is a negative advice.
- */
-static int is_neg_adv_abort(unsigned int status)
-{
- return status == CPL_ERR_RTX_NEG_ADVICE ||
- status == CPL_ERR_PERSIST_NEG_ADVICE;
-}
-
-static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct cpl_abort_req_rss *req = cplhdr(skb);
- struct iwch_ep *ep = ctx;
- struct cpl_abort_rpl *rpl;
- struct sk_buff *rpl_skb;
- struct iwch_qp_attributes attrs;
- int ret;
- int release = 0;
- unsigned long flags;
-
- if (is_neg_adv_abort(req->status)) {
- pr_debug("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
- ep->hwtid);
- t3_l2t_send_event(ep->com.tdev, ep->l2t);
- return CPL_RET_BUF_DONE;
- }
-
- /*
- * We get 2 peer aborts from the HW. The first one must
- * be ignored except for scribbling that we need one more.
- */
- if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) {
- return CPL_RET_BUF_DONE;
- }
-
- spin_lock_irqsave(&ep->com.lock, flags);
- pr_debug("%s ep %p state %u\n", __func__, ep, ep->com.state);
- switch (ep->com.state) {
- case CONNECTING:
- break;
- case MPA_REQ_WAIT:
- stop_ep_timer(ep);
- break;
- case MPA_REQ_SENT:
- stop_ep_timer(ep);
- connect_reply_upcall(ep, -ECONNRESET);
- break;
- case MPA_REP_SENT:
- ep->com.rpl_done = 1;
- ep->com.rpl_err = -ECONNRESET;
- pr_debug("waking up ep %p\n", ep);
- wake_up(&ep->com.waitq);
- break;
- case MPA_REQ_RCVD:
-
- /*
- * We're gonna mark this puppy DEAD, but keep
- * the reference on it until the ULP accepts or
- * rejects the CR. Also wake up anyone waiting
- * in rdma connection migration (see iwch_accept_cr()).
- */
- ep->com.rpl_done = 1;
- ep->com.rpl_err = -ECONNRESET;
- pr_debug("waking up ep %p\n", ep);
- wake_up(&ep->com.waitq);
- break;
- case MORIBUND:
- case CLOSING:
- stop_ep_timer(ep);
- /*FALLTHROUGH*/
- case FPDU_MODE:
- if (ep->com.cm_id && ep->com.qp) {
- attrs.next_state = IWCH_QP_STATE_ERROR;
- ret = iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- if (ret)
- pr_err("%s - qp <- error failed!\n", __func__);
- }
- peer_abort_upcall(ep);
- break;
- case ABORTING:
- break;
- case DEAD:
- pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
- spin_unlock_irqrestore(&ep->com.lock, flags);
- return CPL_RET_BUF_DONE;
- default:
- BUG_ON(1);
- break;
- }
- dst_confirm(ep->dst);
- if (ep->com.state != ABORTING) {
- __state_set(&ep->com, DEAD);
- release = 1;
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
-
- rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
- if (!rpl_skb) {
- pr_err("%s - cannot allocate skb!\n", __func__);
- release = 1;
- goto out;
- }
- rpl_skb->priority = CPL_PRIORITY_DATA;
- rpl = skb_put(rpl_skb, sizeof(*rpl));
- rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
- rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
- rpl->cmd = CPL_ABORT_NO_RST;
- iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb);
-out:
- if (release)
- release_ep_resources(ep);
- return CPL_RET_BUF_DONE;
-}
-
-static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct iwch_qp_attributes attrs;
- unsigned long flags;
- int release = 0;
-
- pr_debug("%s ep %p\n", __func__, ep);
- BUG_ON(!ep);
-
- /* The cm_id may be null if we failed to connect */
- spin_lock_irqsave(&ep->com.lock, flags);
- switch (ep->com.state) {
- case CLOSING:
- __state_set(&ep->com, MORIBUND);
- break;
- case MORIBUND:
- stop_ep_timer(ep);
- if ((ep->com.cm_id) && (ep->com.qp)) {
- attrs.next_state = IWCH_QP_STATE_IDLE;
- iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp,
- IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- }
- close_complete_upcall(ep);
- __state_set(&ep->com, DEAD);
- release = 1;
- break;
- case ABORTING:
- case DEAD:
- break;
- default:
- BUG_ON(1);
- break;
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
- if (release)
- release_ep_resources(ep);
- return CPL_RET_BUF_DONE;
-}
-
-/*
- * T3A does 3 things when a TERM is received:
- * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
- * 2) generate an async event on the QP with the TERMINATE opcode
- * 3) post a TERMINATE opcode cqe into the associated CQ.
- *
- * For (1), we save the message in the qp for later consumer consumption.
- * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
- * For (3), we toss the CQE in cxio_poll_cq().
- *
- * terminate() handles case (1)...
- */
-static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
-
- if (state_read(&ep->com) != FPDU_MODE)
- return CPL_RET_BUF_DONE;
-
- pr_debug("%s ep %p\n", __func__, ep);
- skb_pull(skb, sizeof(struct cpl_rdma_terminate));
- pr_debug("%s saving %d bytes of term msg\n", __func__, skb->len);
- skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
- skb->len);
- ep->com.qp->attr.terminate_msg_len = skb->len;
- ep->com.qp->attr.is_terminate_local = 0;
- return CPL_RET_BUF_DONE;
-}
-
-static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct cpl_rdma_ec_status *rep = cplhdr(skb);
- struct iwch_ep *ep = ctx;
-
- pr_debug("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
- rep->status);
- if (rep->status) {
- struct iwch_qp_attributes attrs;
-
- pr_err("%s BAD CLOSE - Aborting tid %u\n",
- __func__, ep->hwtid);
- stop_ep_timer(ep);
- attrs.next_state = IWCH_QP_STATE_ERROR;
- iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- abort_connection(ep, NULL, GFP_KERNEL);
- }
- return CPL_RET_BUF_DONE;
-}
-
-static void ep_timeout(struct timer_list *t)
-{
- struct iwch_ep *ep = from_timer(ep, t, timer);
- struct iwch_qp_attributes attrs;
- unsigned long flags;
- int abort = 1;
-
- spin_lock_irqsave(&ep->com.lock, flags);
- pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
- ep->com.state);
- switch (ep->com.state) {
- case MPA_REQ_SENT:
- __state_set(&ep->com, ABORTING);
- connect_reply_upcall(ep, -ETIMEDOUT);
- break;
- case MPA_REQ_WAIT:
- __state_set(&ep->com, ABORTING);
- break;
- case CLOSING:
- case MORIBUND:
- if (ep->com.cm_id && ep->com.qp) {
- attrs.next_state = IWCH_QP_STATE_ERROR;
- iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- }
- __state_set(&ep->com, ABORTING);
- break;
- default:
- WARN(1, "%s unexpected state ep %p state %u\n",
- __func__, ep, ep->com.state);
- abort = 0;
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
- if (abort)
- abort_connection(ep, NULL, GFP_ATOMIC);
- put_ep(&ep->com);
-}
-
-int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
- struct iwch_ep *ep = to_ep(cm_id);
-
- pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
-
- if (state_read(&ep->com) == DEAD) {
- put_ep(&ep->com);
- return -ECONNRESET;
- }
- BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
- if (mpa_rev == 0)
- abort_connection(ep, NULL, GFP_KERNEL);
- else {
- send_mpa_reject(ep, pdata, pdata_len);
- iwch_ep_disconnect(ep, 0, GFP_KERNEL);
- }
- put_ep(&ep->com);
- return 0;
-}
-
-int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
- int err;
- struct iwch_qp_attributes attrs;
- enum iwch_qp_attr_mask mask;
- struct iwch_ep *ep = to_ep(cm_id);
- struct iwch_dev *h = to_iwch_dev(cm_id->device);
- struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
-
- pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- if (state_read(&ep->com) == DEAD) {
- err = -ECONNRESET;
- goto err;
- }
-
- BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
- BUG_ON(!qp);
-
- if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
- (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
- abort_connection(ep, NULL, GFP_KERNEL);
- err = -EINVAL;
- goto err;
- }
-
- cm_id->add_ref(cm_id);
- ep->com.cm_id = cm_id;
- ep->com.qp = qp;
-
- ep->ird = conn_param->ird;
- ep->ord = conn_param->ord;
-
- if (peer2peer && ep->ird == 0)
- ep->ird = 1;
-
- pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
-
- /* bind QP to EP and move to RTS */
- attrs.mpa_attr = ep->mpa_attr;
- attrs.max_ird = ep->ird;
- attrs.max_ord = ep->ord;
- attrs.llp_stream_handle = ep;
- attrs.next_state = IWCH_QP_STATE_RTS;
-
- /* bind QP and TID with INIT_WR */
- mask = IWCH_QP_ATTR_NEXT_STATE |
- IWCH_QP_ATTR_LLP_STREAM_HANDLE |
- IWCH_QP_ATTR_MPA_ATTR |
- IWCH_QP_ATTR_MAX_IRD |
- IWCH_QP_ATTR_MAX_ORD;
-
- err = iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, mask, &attrs, 1);
- if (err)
- goto err1;
-
- /* if needed, wait for wr_ack */
- if (iwch_rqes_posted(qp)) {
- wait_event(ep->com.waitq, ep->com.rpl_done);
- err = ep->com.rpl_err;
- if (err)
- goto err1;
- }
-
- err = send_mpa_reply(ep, conn_param->private_data,
- conn_param->private_data_len);
- if (err)
- goto err1;
-
-
- state_set(&ep->com, FPDU_MODE);
- established_upcall(ep);
- put_ep(&ep->com);
- return 0;
-err1:
- ep->com.cm_id = NULL;
- ep->com.qp = NULL;
- cm_id->rem_ref(cm_id);
-err:
- put_ep(&ep->com);
- return err;
-}
-
-static int is_loopback_dst(struct iw_cm_id *cm_id)
-{
- struct net_device *dev;
- struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
-
- dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr);
- if (!dev)
- return 0;
- dev_put(dev);
- return 1;
-}
-
-int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
- struct iwch_dev *h = to_iwch_dev(cm_id->device);
- struct iwch_ep *ep;
- struct rtable *rt;
- int err = 0;
- struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
- struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
-
- if (cm_id->m_remote_addr.ss_family != PF_INET) {
- err = -ENOSYS;
- goto out;
- }
-
- if (is_loopback_dst(cm_id)) {
- err = -ENOSYS;
- goto out;
- }
-
- ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
- if (!ep) {
- pr_err("%s - cannot alloc ep\n", __func__);
- err = -ENOMEM;
- goto out;
- }
- timer_setup(&ep->timer, ep_timeout, 0);
- ep->plen = conn_param->private_data_len;
- if (ep->plen)
- memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
- conn_param->private_data, ep->plen);
- ep->ird = conn_param->ird;
- ep->ord = conn_param->ord;
-
- if (peer2peer && ep->ord == 0)
- ep->ord = 1;
-
- ep->com.tdev = h->rdev.t3cdev_p;
-
- cm_id->add_ref(cm_id);
- ep->com.cm_id = cm_id;
- ep->com.qp = get_qhp(h, conn_param->qpn);
- BUG_ON(!ep->com.qp);
- pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
- ep->com.qp, cm_id);
-
- /*
- * Allocate an active TID to initiate a TCP connection.
- */
- ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
- if (ep->atid == -1) {
- pr_err("%s - cannot alloc atid\n", __func__);
- err = -ENOMEM;
- goto fail2;
- }
-
- /* find a route */
- rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr,
- raddr->sin_addr.s_addr, laddr->sin_port,
- raddr->sin_port, IPTOS_LOWDELAY);
- if (!rt) {
- pr_err("%s - cannot find route\n", __func__);
- err = -EHOSTUNREACH;
- goto fail3;
- }
- ep->dst = &rt->dst;
- ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL,
- &raddr->sin_addr.s_addr);
- if (!ep->l2t) {
- pr_err("%s - cannot alloc l2e\n", __func__);
- err = -ENOMEM;
- goto fail4;
- }
-
- state_set(&ep->com, CONNECTING);
- ep->tos = IPTOS_LOWDELAY;
- memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
- sizeof(ep->com.local_addr));
- memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
- sizeof(ep->com.remote_addr));
-
- /* send connect request to rnic */
- err = send_connect(ep);
- if (!err)
- goto out;
-
- l2t_release(h->rdev.t3cdev_p, ep->l2t);
-fail4:
- dst_release(ep->dst);
-fail3:
- cxgb3_free_atid(ep->com.tdev, ep->atid);
-fail2:
- cm_id->rem_ref(cm_id);
- put_ep(&ep->com);
-out:
- return err;
-}
-
-int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
-{
- int err = 0;
- struct iwch_dev *h = to_iwch_dev(cm_id->device);
- struct iwch_listen_ep *ep;
-
-
- might_sleep();
-
- if (cm_id->m_local_addr.ss_family != PF_INET) {
- err = -ENOSYS;
- goto fail1;
- }
-
- ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
- if (!ep) {
- pr_err("%s - cannot alloc ep\n", __func__);
- err = -ENOMEM;
- goto fail1;
- }
- pr_debug("%s ep %p\n", __func__, ep);
- ep->com.tdev = h->rdev.t3cdev_p;
- cm_id->add_ref(cm_id);
- ep->com.cm_id = cm_id;
- ep->backlog = backlog;
- memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
- sizeof(ep->com.local_addr));
-
- /*
- * Allocate a server TID.
- */
- ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
- if (ep->stid == -1) {
- pr_err("%s - cannot alloc atid\n", __func__);
- err = -ENOMEM;
- goto fail2;
- }
-
- state_set(&ep->com, LISTEN);
- err = listen_start(ep);
- if (err)
- goto fail3;
-
- /* wait for pass_open_rpl */
- wait_event(ep->com.waitq, ep->com.rpl_done);
- err = ep->com.rpl_err;
- if (!err) {
- cm_id->provider_data = ep;
- goto out;
- }
-fail3:
- cxgb3_free_stid(ep->com.tdev, ep->stid);
-fail2:
- cm_id->rem_ref(cm_id);
- put_ep(&ep->com);
-fail1:
-out:
- return err;
-}
-
-int iwch_destroy_listen(struct iw_cm_id *cm_id)
-{
- int err;
- struct iwch_listen_ep *ep = to_listen_ep(cm_id);
-
- pr_debug("%s ep %p\n", __func__, ep);
-
- might_sleep();
- state_set(&ep->com, DEAD);
- ep->com.rpl_done = 0;
- ep->com.rpl_err = 0;
- err = listen_stop(ep);
- if (err)
- goto done;
- wait_event(ep->com.waitq, ep->com.rpl_done);
- cxgb3_free_stid(ep->com.tdev, ep->stid);
-done:
- err = ep->com.rpl_err;
- cm_id->rem_ref(cm_id);
- put_ep(&ep->com);
- return err;
-}
-
-int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
-{
- int ret=0;
- unsigned long flags;
- int close = 0;
- int fatal = 0;
- struct t3cdev *tdev;
- struct cxio_rdev *rdev;
-
- spin_lock_irqsave(&ep->com.lock, flags);
-
- pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep,
- states[ep->com.state], abrupt);
-
- tdev = (struct t3cdev *)ep->com.tdev;
- rdev = (struct cxio_rdev *)tdev->ulp;
- if (cxio_fatal_error(rdev)) {
- fatal = 1;
- close_complete_upcall(ep);
- ep->com.state = DEAD;
- }
- switch (ep->com.state) {
- case MPA_REQ_WAIT:
- case MPA_REQ_SENT:
- case MPA_REQ_RCVD:
- case MPA_REP_SENT:
- case FPDU_MODE:
- close = 1;
- if (abrupt)
- ep->com.state = ABORTING;
- else {
- ep->com.state = CLOSING;
- start_ep_timer(ep);
- }
- set_bit(CLOSE_SENT, &ep->com.flags);
- break;
- case CLOSING:
- if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
- close = 1;
- if (abrupt) {
- stop_ep_timer(ep);
- ep->com.state = ABORTING;
- } else
- ep->com.state = MORIBUND;
- }
- break;
- case MORIBUND:
- case ABORTING:
- case DEAD:
- pr_debug("%s ignoring disconnect ep %p state %u\n",
- __func__, ep, ep->com.state);
- break;
- default:
- BUG();
- break;
- }
-
- spin_unlock_irqrestore(&ep->com.lock, flags);
- if (close) {
- if (abrupt)
- ret = send_abort(ep, NULL, gfp);
- else
- ret = send_halfclose(ep, gfp);
- if (ret)
- fatal = 1;
- }
- if (fatal)
- release_ep_resources(ep);
- return ret;
-}
-
-int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
- struct l2t_entry *l2t)
-{
- struct iwch_ep *ep = ctx;
-
- if (ep->dst != old)
- return 0;
-
- pr_debug("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
- l2t);
- dst_hold(new);
- l2t_release(ep->com.tdev, ep->l2t);
- ep->l2t = l2t;
- dst_release(old);
- ep->dst = new;
- return 1;
-}
-
-/*
- * All the CM events are handled on a work queue to have a safe context.
- * These are the real handlers that are called from the work queue.
- */
-static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = {
- [CPL_ACT_ESTABLISH] = act_establish,
- [CPL_ACT_OPEN_RPL] = act_open_rpl,
- [CPL_RX_DATA] = rx_data,
- [CPL_TX_DMA_ACK] = tx_ack,
- [CPL_ABORT_RPL_RSS] = abort_rpl,
- [CPL_ABORT_RPL] = abort_rpl,
- [CPL_PASS_OPEN_RPL] = pass_open_rpl,
- [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
- [CPL_PASS_ACCEPT_REQ] = pass_accept_req,
- [CPL_PASS_ESTABLISH] = pass_establish,
- [CPL_PEER_CLOSE] = peer_close,
- [CPL_ABORT_REQ_RSS] = peer_abort,
- [CPL_CLOSE_CON_RPL] = close_con_rpl,
- [CPL_RDMA_TERMINATE] = terminate,
- [CPL_RDMA_EC_STATUS] = ec_status,
-};
-
-static void process_work(struct work_struct *work)
-{
- struct sk_buff *skb = NULL;
- void *ep;
- struct t3cdev *tdev;
- int ret;
-
- while ((skb = skb_dequeue(&rxq))) {
- ep = *((void **) (skb->cb));
- tdev = *((struct t3cdev **) (skb->cb + sizeof(void *)));
- ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep);
- if (ret & CPL_RET_BUF_DONE)
- kfree_skb(skb);
-
- /*
- * ep was referenced in sched(), and is freed here.
- */
- put_ep((struct iwch_ep_common *)ep);
- }
-}
-
-static DECLARE_WORK(skb_work, process_work);
-
-static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep_common *epc = ctx;
-
- get_ep(epc);
-
- /*
- * Save ctx and tdev in the skb->cb area.
- */
- *((void **) skb->cb) = ctx;
- *((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev;
-
- /*
- * Queue the skb and schedule the worker thread.
- */
- skb_queue_tail(&rxq, skb);
- queue_work(workq, &skb_work);
- return 0;
-}
-
-static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
-
- if (rpl->status != CPL_ERR_NONE) {
- pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
- rpl->status, GET_TID(rpl));
- }
- return CPL_RET_BUF_DONE;
-}
-
-/*
- * All upcalls from the T3 Core go to sched() to schedule the
- * processing on a work queue.
- */
-cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = {
- [CPL_ACT_ESTABLISH] = sched,
- [CPL_ACT_OPEN_RPL] = sched,
- [CPL_RX_DATA] = sched,
- [CPL_TX_DMA_ACK] = sched,
- [CPL_ABORT_RPL_RSS] = sched,
- [CPL_ABORT_RPL] = sched,
- [CPL_PASS_OPEN_RPL] = sched,
- [CPL_CLOSE_LISTSRV_RPL] = sched,
- [CPL_PASS_ACCEPT_REQ] = sched,
- [CPL_PASS_ESTABLISH] = sched,
- [CPL_PEER_CLOSE] = sched,
- [CPL_CLOSE_CON_RPL] = sched,
- [CPL_ABORT_REQ_RSS] = sched,
- [CPL_RDMA_TERMINATE] = sched,
- [CPL_RDMA_EC_STATUS] = sched,
- [CPL_SET_TCB_RPL] = set_tcb_rpl,
-};
-
-int __init iwch_cm_init(void)
-{
- skb_queue_head_init(&rxq);
-
- workq = alloc_ordered_workqueue("iw_cxgb3", WQ_MEM_RECLAIM);
- if (!workq)
- return -ENOMEM;
-
- return 0;
-}
-
-void __exit iwch_cm_term(void)
-{
- flush_workqueue(workq);
- destroy_workqueue(workq);
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
deleted file mode 100644
index cc7fe644d260..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _IWCH_CM_H_
-#define _IWCH_CM_H_
-
-#include <linux/inet.h>
-#include <linux/wait.h>
-#include <linux/spinlock.h>
-#include <linux/kref.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/iw_cm.h>
-
-#include "cxgb3_offload.h"
-#include "iwch_provider.h"
-
-#define MPA_KEY_REQ "MPA ID Req Frame"
-#define MPA_KEY_REP "MPA ID Rep Frame"
-
-#define MPA_MAX_PRIVATE_DATA 256
-#define MPA_REV 0 /* XXX - amso1100 uses rev 0 ! */
-#define MPA_REJECT 0x20
-#define MPA_CRC 0x40
-#define MPA_MARKERS 0x80
-#define MPA_FLAGS_MASK 0xE0
-
-#define put_ep(ep) { \
- pr_debug("put_ep (via %s:%u) ep %p refcnt %d\n", \
- __func__, __LINE__, ep, kref_read(&((ep)->kref))); \
- WARN_ON(kref_read(&((ep)->kref)) < 1); \
- kref_put(&((ep)->kref), __free_ep); \
-}
-
-#define get_ep(ep) { \
- pr_debug("get_ep (via %s:%u) ep %p, refcnt %d\n", \
- __func__, __LINE__, ep, kref_read(&((ep)->kref))); \
- kref_get(&((ep)->kref)); \
-}
-
-struct mpa_message {
- u8 key[16];
- u8 flags;
- u8 revision;
- __be16 private_data_size;
- u8 private_data[0];
-};
-
-struct terminate_message {
- u8 layer_etype;
- u8 ecode;
- __be16 hdrct_rsvd;
- u8 len_hdrs[0];
-};
-
-#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28)
-
-enum iwch_layers_types {
- LAYER_RDMAP = 0x00,
- LAYER_DDP = 0x10,
- LAYER_MPA = 0x20,
- RDMAP_LOCAL_CATA = 0x00,
- RDMAP_REMOTE_PROT = 0x01,
- RDMAP_REMOTE_OP = 0x02,
- DDP_LOCAL_CATA = 0x00,
- DDP_TAGGED_ERR = 0x01,
- DDP_UNTAGGED_ERR = 0x02,
- DDP_LLP = 0x03
-};
-
-enum iwch_rdma_ecodes {
- RDMAP_INV_STAG = 0x00,
- RDMAP_BASE_BOUNDS = 0x01,
- RDMAP_ACC_VIOL = 0x02,
- RDMAP_STAG_NOT_ASSOC = 0x03,
- RDMAP_TO_WRAP = 0x04,
- RDMAP_INV_VERS = 0x05,
- RDMAP_INV_OPCODE = 0x06,
- RDMAP_STREAM_CATA = 0x07,
- RDMAP_GLOBAL_CATA = 0x08,
- RDMAP_CANT_INV_STAG = 0x09,
- RDMAP_UNSPECIFIED = 0xff
-};
-
-enum iwch_ddp_ecodes {
- DDPT_INV_STAG = 0x00,
- DDPT_BASE_BOUNDS = 0x01,
- DDPT_STAG_NOT_ASSOC = 0x02,
- DDPT_TO_WRAP = 0x03,
- DDPT_INV_VERS = 0x04,
- DDPU_INV_QN = 0x01,
- DDPU_INV_MSN_NOBUF = 0x02,
- DDPU_INV_MSN_RANGE = 0x03,
- DDPU_INV_MO = 0x04,
- DDPU_MSG_TOOBIG = 0x05,
- DDPU_INV_VERS = 0x06
-};
-
-enum iwch_mpa_ecodes {
- MPA_CRC_ERR = 0x02,
- MPA_MARKER_ERR = 0x03
-};
-
-enum iwch_ep_state {
- IDLE = 0,
- LISTEN,
- CONNECTING,
- MPA_REQ_WAIT,
- MPA_REQ_SENT,
- MPA_REQ_RCVD,
- MPA_REP_SENT,
- FPDU_MODE,
- ABORTING,
- CLOSING,
- MORIBUND,
- DEAD,
-};
-
-enum iwch_ep_flags {
- PEER_ABORT_IN_PROGRESS = 0,
- ABORT_REQ_IN_PROGRESS = 1,
- RELEASE_RESOURCES = 2,
- CLOSE_SENT = 3,
-};
-
-struct iwch_ep_common {
- struct iw_cm_id *cm_id;
- struct iwch_qp *qp;
- struct t3cdev *tdev;
- enum iwch_ep_state state;
- struct kref kref;
- spinlock_t lock;
- struct sockaddr_in local_addr;
- struct sockaddr_in remote_addr;
- wait_queue_head_t waitq;
- int rpl_done;
- int rpl_err;
- unsigned long flags;
-};
-
-struct iwch_listen_ep {
- struct iwch_ep_common com;
- unsigned int stid;
- int backlog;
-};
-
-struct iwch_ep {
- struct iwch_ep_common com;
- struct iwch_ep *parent_ep;
- struct timer_list timer;
- unsigned int atid;
- u32 hwtid;
- u32 snd_seq;
- u32 rcv_seq;
- struct l2t_entry *l2t;
- struct dst_entry *dst;
- struct sk_buff *mpa_skb;
- struct iwch_mpa_attributes mpa_attr;
- unsigned int mpa_pkt_len;
- u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA];
- u8 tos;
- u16 emss;
- u16 plen;
- u32 ird;
- u32 ord;
-};
-
-static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id)
-{
- return cm_id->provider_data;
-}
-
-static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id)
-{
- return cm_id->provider_data;
-}
-
-static inline int compute_wscale(int win)
-{
- int wscale = 0;
-
- while (wscale < 14 && (65535<<wscale) < win)
- wscale++;
- return wscale;
-}
-
-/* CM prototypes */
-
-int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
-int iwch_create_listen(struct iw_cm_id *cm_id, int backlog);
-int iwch_destroy_listen(struct iw_cm_id *cm_id);
-int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
-int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
-int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp);
-int iwch_quiesce_tid(struct iwch_ep *ep);
-int iwch_resume_tid(struct iwch_ep *ep);
-void __free_ep(struct kref *kref);
-void iwch_rearp(struct iwch_ep *ep);
-int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t);
-
-int __init iwch_cm_init(void);
-void __exit iwch_cm_term(void);
-extern int peer2peer;
-
-#endif /* _IWCH_CM_H_ */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
deleted file mode 100644
index a098c0140580..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "iwch_provider.h"
-#include "iwch.h"
-
-static int __iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
- struct iwch_qp *qhp, struct ib_wc *wc)
-{
- struct t3_wq *wq = qhp ? &qhp->wq : NULL;
- struct t3_cqe cqe;
- u32 credit = 0;
- u8 cqe_flushed;
- u64 cookie;
- int ret = 1;
-
- ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
- &credit);
- if (t3a_device(chp->rhp) && credit) {
- pr_debug("%s updating %d cq credits on id %d\n", __func__,
- credit, chp->cq.cqid);
- cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit);
- }
-
- if (ret) {
- ret = -EAGAIN;
- goto out;
- }
- ret = 1;
-
- wc->wr_id = cookie;
- wc->qp = qhp ? &qhp->ibqp : NULL;
- wc->vendor_err = CQE_STATUS(cqe);
- wc->wc_flags = 0;
-
- pr_debug("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
- __func__,
- CQE_QPID(cqe), CQE_TYPE(cqe),
- CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
- CQE_WRID_LOW(cqe), (unsigned long long)cookie);
-
- if (CQE_TYPE(cqe) == 0) {
- if (!CQE_STATUS(cqe))
- wc->byte_len = CQE_LEN(cqe);
- else
- wc->byte_len = 0;
- wc->opcode = IB_WC_RECV;
- if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV ||
- CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) {
- wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe);
- wc->wc_flags |= IB_WC_WITH_INVALIDATE;
- }
- } else {
- switch (CQE_OPCODE(cqe)) {
- case T3_RDMA_WRITE:
- wc->opcode = IB_WC_RDMA_WRITE;
- break;
- case T3_READ_REQ:
- wc->opcode = IB_WC_RDMA_READ;
- wc->byte_len = CQE_LEN(cqe);
- break;
- case T3_SEND:
- case T3_SEND_WITH_SE:
- case T3_SEND_WITH_INV:
- case T3_SEND_WITH_SE_INV:
- wc->opcode = IB_WC_SEND;
- break;
- case T3_LOCAL_INV:
- wc->opcode = IB_WC_LOCAL_INV;
- break;
- case T3_FAST_REGISTER:
- wc->opcode = IB_WC_REG_MR;
- break;
- default:
- pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
- CQE_OPCODE(cqe), CQE_QPID(cqe));
- ret = -EINVAL;
- goto out;
- }
- }
-
- if (cqe_flushed)
- wc->status = IB_WC_WR_FLUSH_ERR;
- else {
-
- switch (CQE_STATUS(cqe)) {
- case TPT_ERR_SUCCESS:
- wc->status = IB_WC_SUCCESS;
- break;
- case TPT_ERR_STAG:
- wc->status = IB_WC_LOC_ACCESS_ERR;
- break;
- case TPT_ERR_PDID:
- wc->status = IB_WC_LOC_PROT_ERR;
- break;
- case TPT_ERR_QPID:
- case TPT_ERR_ACCESS:
- wc->status = IB_WC_LOC_ACCESS_ERR;
- break;
- case TPT_ERR_WRAP:
- wc->status = IB_WC_GENERAL_ERR;
- break;
- case TPT_ERR_BOUND:
- wc->status = IB_WC_LOC_LEN_ERR;
- break;
- case TPT_ERR_INVALIDATE_SHARED_MR:
- case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
- wc->status = IB_WC_MW_BIND_ERR;
- break;
- case TPT_ERR_CRC:
- case TPT_ERR_MARKER:
- case TPT_ERR_PDU_LEN_ERR:
- case TPT_ERR_OUT_OF_RQE:
- case TPT_ERR_DDP_VERSION:
- case TPT_ERR_RDMA_VERSION:
- case TPT_ERR_DDP_QUEUE_NUM:
- case TPT_ERR_MSN:
- case TPT_ERR_TBIT:
- case TPT_ERR_MO:
- case TPT_ERR_MSN_RANGE:
- case TPT_ERR_IRD_OVERFLOW:
- case TPT_ERR_OPCODE:
- wc->status = IB_WC_FATAL_ERR;
- break;
- case TPT_ERR_SWFLUSH:
- wc->status = IB_WC_WR_FLUSH_ERR;
- break;
- default:
- pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n",
- CQE_STATUS(cqe), CQE_QPID(cqe));
- ret = -EINVAL;
- }
- }
-out:
- return ret;
-}
-
-/*
- * Get one cq entry from cxio and map it to openib.
- *
- * Returns:
- * 0 EMPTY;
- * 1 cqe returned
- * -EAGAIN caller must try again
- * any other -errno fatal error
- */
-static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
- struct ib_wc *wc)
-{
- struct iwch_qp *qhp;
- struct t3_cqe *rd_cqe;
- int ret;
-
- rd_cqe = cxio_next_cqe(&chp->cq);
-
- if (!rd_cqe)
- return 0;
-
- qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
- if (qhp) {
- spin_lock(&qhp->lock);
- ret = __iwch_poll_cq_one(rhp, chp, qhp, wc);
- spin_unlock(&qhp->lock);
- } else {
- ret = __iwch_poll_cq_one(rhp, chp, NULL, wc);
- }
- return ret;
-}
-
-int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
-{
- struct iwch_dev *rhp;
- struct iwch_cq *chp;
- unsigned long flags;
- int npolled;
- int err = 0;
-
- chp = to_iwch_cq(ibcq);
- rhp = chp->rhp;
-
- spin_lock_irqsave(&chp->lock, flags);
- for (npolled = 0; npolled < num_entries; ++npolled) {
-
- /*
- * Because T3 can post CQEs that are _not_ associated
- * with a WR, we might have to poll again after removing
- * one of these.
- */
- do {
- err = iwch_poll_cq_one(rhp, chp, wc + npolled);
- } while (err == -EAGAIN);
- if (err <= 0)
- break;
- }
- spin_unlock_irqrestore(&chp->lock, flags);
-
- if (err < 0)
- return err;
- else {
- return npolled;
- }
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
deleted file mode 100644
index 9d356c1301c7..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/gfp.h>
-#include <linux/mman.h>
-#include <net/sock.h>
-#include "iwch_provider.h"
-#include "iwch.h"
-#include "iwch_cm.h"
-#include "cxio_hal.h"
-#include "cxio_wr.h"
-
-static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
- struct respQ_msg_t *rsp_msg,
- enum ib_event_type ib_event,
- int send_term)
-{
- struct ib_event event;
- struct iwch_qp_attributes attrs;
- struct iwch_qp *qhp;
- unsigned long flag;
-
- xa_lock(&rnicp->qps);
- qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe));
-
- if (!qhp) {
- pr_err("%s unaffiliated error 0x%x qpid 0x%x\n",
- __func__, CQE_STATUS(rsp_msg->cqe),
- CQE_QPID(rsp_msg->cqe));
- xa_unlock(&rnicp->qps);
- return;
- }
-
- if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
- (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
- pr_debug("%s AE received after RTS - qp state %d qpid 0x%x status 0x%x\n",
- __func__,
- qhp->attr.state, qhp->wq.qpid,
- CQE_STATUS(rsp_msg->cqe));
- xa_unlock(&rnicp->qps);
- return;
- }
-
- pr_err("%s - AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
- __func__,
- CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
- CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
- CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
-
- atomic_inc(&qhp->refcnt);
- xa_unlock(&rnicp->qps);
-
- if (qhp->attr.state == IWCH_QP_STATE_RTS) {
- attrs.next_state = IWCH_QP_STATE_TERMINATE;
- iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- if (send_term)
- iwch_post_terminate(qhp, rsp_msg);
- }
-
- event.event = ib_event;
- event.device = chp->ibcq.device;
- if (ib_event == IB_EVENT_CQ_ERR)
- event.element.cq = &chp->ibcq;
- else
- event.element.qp = &qhp->ibqp;
-
- if (qhp->ibqp.event_handler)
- (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
-
- spin_lock_irqsave(&chp->comp_handler_lock, flag);
- (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
- spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
-
- if (atomic_dec_and_test(&qhp->refcnt))
- wake_up(&qhp->wait);
-}
-
-void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
-{
- struct iwch_dev *rnicp;
- struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
- struct iwch_cq *chp;
- struct iwch_qp *qhp;
- u32 cqid = RSPQ_CQID(rsp_msg);
- unsigned long flag;
-
- rnicp = (struct iwch_dev *) rdev_p->ulp;
- xa_lock(&rnicp->qps);
- chp = get_chp(rnicp, cqid);
- qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe));
- if (!chp || !qhp) {
- pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
- cqid, CQE_QPID(rsp_msg->cqe),
- CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
- CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
- CQE_WRID_LOW(rsp_msg->cqe));
- xa_unlock(&rnicp->qps);
- goto out;
- }
- iwch_qp_add_ref(&qhp->ibqp);
- atomic_inc(&chp->refcnt);
- xa_unlock(&rnicp->qps);
-
- /*
- * 1) completion of our sending a TERMINATE.
- * 2) incoming TERMINATE message.
- */
- if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) &&
- (CQE_STATUS(rsp_msg->cqe) == 0)) {
- if (SQ_TYPE(rsp_msg->cqe)) {
- pr_debug("%s QPID 0x%x ep %p disconnecting\n",
- __func__, qhp->wq.qpid, qhp->ep);
- iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
- } else {
- pr_debug("%s post REQ_ERR AE QPID 0x%x\n", __func__,
- qhp->wq.qpid);
- post_qp_event(rnicp, chp, rsp_msg,
- IB_EVENT_QP_REQ_ERR, 0);
- iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
- }
- goto done;
- }
-
- /* Bad incoming Read request */
- if (SQ_TYPE(rsp_msg->cqe) &&
- (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) {
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
- goto done;
- }
-
- /* Bad incoming write */
- if (RQ_TYPE(rsp_msg->cqe) &&
- (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) {
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
- goto done;
- }
-
- switch (CQE_STATUS(rsp_msg->cqe)) {
-
- /* Completion Events */
- case TPT_ERR_SUCCESS:
-
- /*
- * Confirm the destination entry if this is a RECV completion.
- */
- if (qhp->ep && SQ_TYPE(rsp_msg->cqe))
- dst_confirm(qhp->ep->dst);
- spin_lock_irqsave(&chp->comp_handler_lock, flag);
- (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
- spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
- break;
-
- case TPT_ERR_STAG:
- case TPT_ERR_PDID:
- case TPT_ERR_QPID:
- case TPT_ERR_ACCESS:
- case TPT_ERR_WRAP:
- case TPT_ERR_BOUND:
- case TPT_ERR_INVALIDATE_SHARED_MR:
- case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1);
- break;
-
- /* Device Fatal Errors */
- case TPT_ERR_ECC:
- case TPT_ERR_ECC_PSTAG:
- case TPT_ERR_INTERNAL_ERR:
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1);
- break;
-
- /* QP Fatal Errors */
- case TPT_ERR_OUT_OF_RQE:
- case TPT_ERR_PBL_ADDR_BOUND:
- case TPT_ERR_CRC:
- case TPT_ERR_MARKER:
- case TPT_ERR_PDU_LEN_ERR:
- case TPT_ERR_DDP_VERSION:
- case TPT_ERR_RDMA_VERSION:
- case TPT_ERR_OPCODE:
- case TPT_ERR_DDP_QUEUE_NUM:
- case TPT_ERR_MSN:
- case TPT_ERR_TBIT:
- case TPT_ERR_MO:
- case TPT_ERR_MSN_GAP:
- case TPT_ERR_MSN_RANGE:
- case TPT_ERR_RQE_ADDR_BOUND:
- case TPT_ERR_IRD_OVERFLOW:
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
- break;
-
- default:
- pr_err("Unknown T3 status 0x%x QPID 0x%x\n",
- CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid);
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
- break;
- }
-done:
- if (atomic_dec_and_test(&chp->refcnt))
- wake_up(&chp->wait);
- iwch_qp_rem_ref(&qhp->ibqp);
-out:
- dev_kfree_skb_irq(skb);
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
deleted file mode 100644
index ce0f2741821d..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/slab.h>
-#include <asm/byteorder.h>
-
-#include <rdma/iw_cm.h>
-#include <rdma/ib_verbs.h>
-
-#include "cxio_hal.h"
-#include "cxio_resource.h"
-#include "iwch.h"
-#include "iwch_provider.h"
-
-static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
-{
- u32 mmid;
-
- mhp->attr.state = 1;
- mhp->attr.stag = stag;
- mmid = stag >> 8;
- mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
- return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL);
-}
-
-int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp, int shift)
-{
- u32 stag;
- int ret;
-
- if (cxio_register_phys_mem(&rhp->rdev,
- &stag, mhp->attr.pdid,
- mhp->attr.perms,
- mhp->attr.zbva,
- mhp->attr.va_fbo,
- mhp->attr.len,
- shift - 12,
- mhp->attr.pbl_size, mhp->attr.pbl_addr))
- return -ENOMEM;
-
- ret = iwch_finish_mem_reg(mhp, stag);
- if (ret)
- cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
- return ret;
-}
-
-int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
-{
- mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
- npages << 3);
-
- if (!mhp->attr.pbl_addr)
- return -ENOMEM;
-
- mhp->attr.pbl_size = npages;
-
- return 0;
-}
-
-void iwch_free_pbl(struct iwch_mr *mhp)
-{
- cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
- mhp->attr.pbl_size << 3);
-}
-
-int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
-{
- return cxio_write_pbl(&mhp->rhp->rdev, pages,
- mhp->attr.pbl_addr + (offset << 3), npages);
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
deleted file mode 100644
index e775c1a1a450..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ /dev/null
@@ -1,1326 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/device.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/list.h>
-#include <linux/sched/mm.h>
-#include <linux/spinlock.h>
-#include <linux/ethtool.h>
-#include <linux/rtnetlink.h>
-#include <linux/inetdevice.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include <rdma/iw_cm.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_smi.h>
-#include <rdma/ib_umem.h>
-#include <rdma/ib_user_verbs.h>
-#include <rdma/uverbs_ioctl.h>
-
-#include "cxio_hal.h"
-#include "iwch.h"
-#include "iwch_provider.h"
-#include "iwch_cm.h"
-#include <rdma/cxgb3-abi.h>
-#include "common.h"
-
-static void iwch_dealloc_ucontext(struct ib_ucontext *context)
-{
- struct iwch_dev *rhp = to_iwch_dev(context->device);
- struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
- struct iwch_mm_entry *mm, *tmp;
-
- pr_debug("%s context %p\n", __func__, context);
- list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
- kfree(mm);
- cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
-}
-
-static int iwch_alloc_ucontext(struct ib_ucontext *ucontext,
- struct ib_udata *udata)
-{
- struct ib_device *ibdev = ucontext->device;
- struct iwch_ucontext *context = to_iwch_ucontext(ucontext);
- struct iwch_dev *rhp = to_iwch_dev(ibdev);
-
- pr_debug("%s ibdev %p\n", __func__, ibdev);
- cxio_init_ucontext(&rhp->rdev, &context->uctx);
- INIT_LIST_HEAD(&context->mmaps);
- spin_lock_init(&context->mmap_lock);
- return 0;
-}
-
-static void iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
-{
- struct iwch_cq *chp;
-
- pr_debug("%s ib_cq %p\n", __func__, ib_cq);
- chp = to_iwch_cq(ib_cq);
-
- xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid);
- atomic_dec(&chp->refcnt);
- wait_event(chp->wait, !atomic_read(&chp->refcnt));
-
- cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
-}
-
-static int iwch_create_cq(struct ib_cq *ibcq,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
-{
- struct ib_device *ibdev = ibcq->device;
- int entries = attr->cqe;
- struct iwch_dev *rhp = to_iwch_dev(ibcq->device);
- struct iwch_cq *chp = to_iwch_cq(ibcq);
- struct iwch_create_cq_resp uresp;
- struct iwch_create_cq_req ureq;
- static int warned;
- size_t resplen;
-
- pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
- if (attr->flags)
- return -EINVAL;
-
- if (udata) {
- if (!t3a_device(rhp)) {
- if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
- return -EFAULT;
-
- chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr;
- }
- }
-
- if (t3a_device(rhp)) {
-
- /*
- * T3A: Add some fluff to handle extra CQEs inserted
- * for various errors.
- * Additional CQE possibilities:
- * TERMINATE,
- * incoming RDMA WRITE Failures
- * incoming RDMA READ REQUEST FAILUREs
- * NOTE: We cannot ensure the CQ won't overflow.
- */
- entries += 16;
- }
- entries = roundup_pow_of_two(entries);
- chp->cq.size_log2 = ilog2(entries);
-
- if (cxio_create_cq(&rhp->rdev, &chp->cq, !udata))
- return -ENOMEM;
-
- chp->rhp = rhp;
- chp->ibcq.cqe = 1 << chp->cq.size_log2;
- spin_lock_init(&chp->lock);
- spin_lock_init(&chp->comp_handler_lock);
- atomic_set(&chp->refcnt, 1);
- init_waitqueue_head(&chp->wait);
- if (xa_store_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL)) {
- cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
- return -ENOMEM;
- }
-
- if (udata) {
- struct iwch_mm_entry *mm;
- struct iwch_ucontext *ucontext = rdma_udata_to_drv_context(
- udata, struct iwch_ucontext, ibucontext);
-
- mm = kmalloc(sizeof(*mm), GFP_KERNEL);
- if (!mm) {
- iwch_destroy_cq(&chp->ibcq, udata);
- return -ENOMEM;
- }
- uresp.cqid = chp->cq.cqid;
- uresp.size_log2 = chp->cq.size_log2;
- spin_lock(&ucontext->mmap_lock);
- uresp.key = ucontext->key;
- ucontext->key += PAGE_SIZE;
- spin_unlock(&ucontext->mmap_lock);
- mm->key = uresp.key;
- mm->addr = virt_to_phys(chp->cq.queue);
- if (udata->outlen < sizeof(uresp)) {
- if (!warned++)
- pr_warn("Warning - downlevel libcxgb3 (non-fatal)\n");
- mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
- sizeof(struct t3_cqe));
- resplen = sizeof(struct iwch_create_cq_resp_v0);
- } else {
- mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) *
- sizeof(struct t3_cqe));
- uresp.memsize = mm->len;
- uresp.reserved = 0;
- resplen = sizeof(uresp);
- }
- if (ib_copy_to_udata(udata, &uresp, resplen)) {
- kfree(mm);
- iwch_destroy_cq(&chp->ibcq, udata);
- return -EFAULT;
- }
- insert_mmap(ucontext, mm);
- }
- pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr %pad\n",
- chp->cq.cqid, chp, (1 << chp->cq.size_log2),
- &chp->cq.dma_addr);
- return 0;
-}
-
-static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
-{
- struct iwch_dev *rhp;
- struct iwch_cq *chp;
- enum t3_cq_opcode cq_op;
- int err;
- unsigned long flag;
- u32 rptr;
-
- chp = to_iwch_cq(ibcq);
- rhp = chp->rhp;
- if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
- cq_op = CQ_ARM_SE;
- else
- cq_op = CQ_ARM_AN;
- if (chp->user_rptr_addr) {
- if (get_user(rptr, chp->user_rptr_addr))
- return -EFAULT;
- spin_lock_irqsave(&chp->lock, flag);
- chp->cq.rptr = rptr;
- } else
- spin_lock_irqsave(&chp->lock, flag);
- pr_debug("%s rptr 0x%x\n", __func__, chp->cq.rptr);
- err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
- spin_unlock_irqrestore(&chp->lock, flag);
- if (err < 0)
- pr_err("Error %d rearming CQID 0x%x\n", err, chp->cq.cqid);
- if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
- err = 0;
- return err;
-}
-
-static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
- int len = vma->vm_end - vma->vm_start;
- u32 key = vma->vm_pgoff << PAGE_SHIFT;
- struct cxio_rdev *rdev_p;
- int ret = 0;
- struct iwch_mm_entry *mm;
- struct iwch_ucontext *ucontext;
- u64 addr;
-
- pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
- key, len);
-
- if (vma->vm_start & (PAGE_SIZE-1)) {
- return -EINVAL;
- }
-
- rdev_p = &(to_iwch_dev(context->device)->rdev);
- ucontext = to_iwch_ucontext(context);
-
- mm = remove_mmap(ucontext, key, len);
- if (!mm)
- return -EINVAL;
- addr = mm->addr;
- kfree(mm);
-
- if ((addr >= rdev_p->rnic_info.udbell_physbase) &&
- (addr < (rdev_p->rnic_info.udbell_physbase +
- rdev_p->rnic_info.udbell_len))) {
-
- /*
- * Map T3 DB register.
- */
- if (vma->vm_flags & VM_READ) {
- return -EPERM;
- }
-
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
- vma->vm_flags &= ~VM_MAYREAD;
- ret = io_remap_pfn_range(vma, vma->vm_start,
- addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
- } else {
-
- /*
- * Map WQ or CQ contig dma memory...
- */
- ret = remap_pfn_range(vma, vma->vm_start,
- addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
- }
-
- return ret;
-}
-
-static void iwch_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_pd *php;
-
- php = to_iwch_pd(pd);
- rhp = php->rhp;
- pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
- cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
-}
-
-static int iwch_allocate_pd(struct ib_pd *pd, struct ib_udata *udata)
-{
- struct iwch_pd *php = to_iwch_pd(pd);
- struct ib_device *ibdev = pd->device;
- u32 pdid;
- struct iwch_dev *rhp;
-
- pr_debug("%s ibdev %p\n", __func__, ibdev);
- rhp = (struct iwch_dev *) ibdev;
- pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
- if (!pdid)
- return -EINVAL;
-
- php->pdid = pdid;
- php->rhp = rhp;
- if (udata) {
- struct iwch_alloc_pd_resp resp = {.pdid = php->pdid};
-
- if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
- iwch_deallocate_pd(&php->ibpd, udata);
- return -EFAULT;
- }
- }
- pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
- return 0;
-}
-
-static int iwch_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_mr *mhp;
- u32 mmid;
-
- pr_debug("%s ib_mr %p\n", __func__, ib_mr);
-
- mhp = to_iwch_mr(ib_mr);
- kfree(mhp->pages);
- rhp = mhp->rhp;
- mmid = mhp->attr.stag >> 8;
- cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
- iwch_free_pbl(mhp);
- xa_erase_irq(&rhp->mrs, mmid);
- if (mhp->kva)
- kfree((void *) (unsigned long) mhp->kva);
- ib_umem_release(mhp->umem);
- pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
- kfree(mhp);
- return 0;
-}
-
-static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
-{
- const u64 total_size = 0xffffffff;
- const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK;
- struct iwch_pd *php = to_iwch_pd(pd);
- struct iwch_dev *rhp = php->rhp;
- struct iwch_mr *mhp;
- __be64 *page_list;
- int shift = 26, npages, ret, i;
-
- pr_debug("%s ib_pd %p\n", __func__, pd);
-
- /*
- * T3 only supports 32 bits of size.
- */
- if (sizeof(phys_addr_t) > 4) {
- pr_warn_once("Cannot support dma_mrs on this platform\n");
- return ERR_PTR(-ENOTSUPP);
- }
-
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
-
- mhp->rhp = rhp;
-
- npages = (total_size + (1ULL << shift) - 1) >> shift;
- if (!npages) {
- ret = -EINVAL;
- goto err;
- }
-
- page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL);
- if (!page_list) {
- ret = -ENOMEM;
- goto err;
- }
-
- for (i = 0; i < npages; i++)
- page_list[i] = cpu_to_be64((u64)i << shift);
-
- pr_debug("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
- __func__, mask, shift, total_size, npages);
-
- ret = iwch_alloc_pbl(mhp, npages);
- if (ret) {
- kfree(page_list);
- goto err_pbl;
- }
-
- ret = iwch_write_pbl(mhp, page_list, npages, 0);
- kfree(page_list);
- if (ret)
- goto err_pbl;
-
- mhp->attr.pdid = php->pdid;
- mhp->attr.zbva = 0;
-
- mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = 0;
- mhp->attr.page_size = shift - 12;
-
- mhp->attr.len = (u32) total_size;
- mhp->attr.pbl_size = npages;
- ret = iwch_register_mem(rhp, php, mhp, shift);
- if (ret)
- goto err_pbl;
-
- return &mhp->ibmr;
-
-err_pbl:
- iwch_free_pbl(mhp);
-
-err:
- kfree(mhp);
- return ERR_PTR(ret);
-}
-
-static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *udata)
-{
- __be64 *pages;
- int shift, n, i;
- int err = 0;
- struct iwch_dev *rhp;
- struct iwch_pd *php;
- struct iwch_mr *mhp;
- struct iwch_reg_user_mr_resp uresp;
- struct sg_dma_page_iter sg_iter;
- pr_debug("%s ib_pd %p\n", __func__, pd);
-
- php = to_iwch_pd(pd);
- rhp = php->rhp;
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
-
- mhp->rhp = rhp;
-
- mhp->umem = ib_umem_get(udata, start, length, acc, 0);
- if (IS_ERR(mhp->umem)) {
- err = PTR_ERR(mhp->umem);
- kfree(mhp);
- return ERR_PTR(err);
- }
-
- shift = PAGE_SHIFT;
-
- n = ib_umem_num_pages(mhp->umem);
-
- err = iwch_alloc_pbl(mhp, n);
- if (err)
- goto err;
-
- pages = (__be64 *) __get_free_page(GFP_KERNEL);
- if (!pages) {
- err = -ENOMEM;
- goto err_pbl;
- }
-
- i = n = 0;
-
- for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) {
- pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter));
- if (i == PAGE_SIZE / sizeof(*pages)) {
- err = iwch_write_pbl(mhp, pages, i, n);
- if (err)
- goto pbl_done;
- n += i;
- i = 0;
- }
- }
-
- if (i)
- err = iwch_write_pbl(mhp, pages, i, n);
-
-pbl_done:
- free_page((unsigned long) pages);
- if (err)
- goto err_pbl;
-
- mhp->attr.pdid = php->pdid;
- mhp->attr.zbva = 0;
- mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = virt;
- mhp->attr.page_size = shift - 12;
- mhp->attr.len = (u32) length;
-
- err = iwch_register_mem(rhp, php, mhp, shift);
- if (err)
- goto err_pbl;
-
- if (udata && !t3a_device(rhp)) {
- uresp.pbl_addr = (mhp->attr.pbl_addr -
- rhp->rdev.rnic_info.pbl_base) >> 3;
- pr_debug("%s user resp pbl_addr 0x%x\n", __func__,
- uresp.pbl_addr);
-
- if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
- iwch_dereg_mr(&mhp->ibmr, udata);
- err = -EFAULT;
- goto err;
- }
- }
-
- return &mhp->ibmr;
-
-err_pbl:
- iwch_free_pbl(mhp);
-
-err:
- ib_umem_release(mhp->umem);
- kfree(mhp);
- return ERR_PTR(err);
-}
-
-static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_pd *php;
- struct iwch_mw *mhp;
- u32 mmid;
- u32 stag = 0;
- int ret;
-
- if (type != IB_MW_TYPE_1)
- return ERR_PTR(-EINVAL);
-
- php = to_iwch_pd(pd);
- rhp = php->rhp;
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
- ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid);
- if (ret) {
- kfree(mhp);
- return ERR_PTR(ret);
- }
- mhp->rhp = rhp;
- mhp->attr.pdid = php->pdid;
- mhp->attr.type = TPT_MW;
- mhp->attr.stag = stag;
- mmid = (stag) >> 8;
- mhp->ibmw.rkey = stag;
- if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) {
- cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
- kfree(mhp);
- return ERR_PTR(-ENOMEM);
- }
- pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
- return &(mhp->ibmw);
-}
-
-static int iwch_dealloc_mw(struct ib_mw *mw)
-{
- struct iwch_dev *rhp;
- struct iwch_mw *mhp;
- u32 mmid;
-
- mhp = to_iwch_mw(mw);
- rhp = mhp->rhp;
- mmid = (mw->rkey) >> 8;
- cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
- xa_erase_irq(&rhp->mrs, mmid);
- pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
- kfree(mhp);
- return 0;
-}
-
-static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_pd *php;
- struct iwch_mr *mhp;
- u32 mmid;
- u32 stag = 0;
- int ret = -ENOMEM;
-
- if (mr_type != IB_MR_TYPE_MEM_REG ||
- max_num_sg > T3_MAX_FASTREG_DEPTH)
- return ERR_PTR(-EINVAL);
-
- php = to_iwch_pd(pd);
- rhp = php->rhp;
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- goto err;
-
- mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
- if (!mhp->pages)
- goto pl_err;
-
- mhp->rhp = rhp;
- ret = iwch_alloc_pbl(mhp, max_num_sg);
- if (ret)
- goto err1;
- mhp->attr.pbl_size = max_num_sg;
- ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
- mhp->attr.pbl_size, mhp->attr.pbl_addr);
- if (ret)
- goto err2;
- mhp->attr.pdid = php->pdid;
- mhp->attr.type = TPT_NON_SHARED_MR;
- mhp->attr.stag = stag;
- mhp->attr.state = 1;
- mmid = (stag) >> 8;
- mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- ret = xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL);
- if (ret)
- goto err3;
-
- pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
- return &(mhp->ibmr);
-err3:
- cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
-err2:
- iwch_free_pbl(mhp);
-err1:
- kfree(mhp->pages);
-pl_err:
- kfree(mhp);
-err:
- return ERR_PTR(ret);
-}
-
-static int iwch_set_page(struct ib_mr *ibmr, u64 addr)
-{
- struct iwch_mr *mhp = to_iwch_mr(ibmr);
-
- if (unlikely(mhp->npages == mhp->attr.pbl_size))
- return -ENOMEM;
-
- mhp->pages[mhp->npages++] = addr;
-
- return 0;
-}
-
-static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
- int sg_nents, unsigned int *sg_offset)
-{
- struct iwch_mr *mhp = to_iwch_mr(ibmr);
-
- mhp->npages = 0;
-
- return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page);
-}
-
-static int iwch_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_qp *qhp;
- struct iwch_qp_attributes attrs;
- struct iwch_ucontext *ucontext;
-
- qhp = to_iwch_qp(ib_qp);
- rhp = qhp->rhp;
-
- attrs.next_state = IWCH_QP_STATE_ERROR;
- iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0);
- wait_event(qhp->wait, !qhp->ep);
-
- xa_erase_irq(&rhp->qps, qhp->wq.qpid);
-
- atomic_dec(&qhp->refcnt);
- wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
-
- ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext,
- ibucontext);
- cxio_destroy_qp(&rhp->rdev, &qhp->wq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
-
- pr_debug("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__,
- ib_qp, qhp->wq.qpid, qhp);
- kfree(qhp);
- return 0;
-}
-
-static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *attrs,
- struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_qp *qhp;
- struct iwch_pd *php;
- struct iwch_cq *schp;
- struct iwch_cq *rchp;
- struct iwch_create_qp_resp uresp;
- int wqsize, sqsize, rqsize;
- struct iwch_ucontext *ucontext;
-
- pr_debug("%s ib_pd %p\n", __func__, pd);
- if (attrs->qp_type != IB_QPT_RC)
- return ERR_PTR(-EINVAL);
- php = to_iwch_pd(pd);
- rhp = php->rhp;
- schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid);
- rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid);
- if (!schp || !rchp)
- return ERR_PTR(-EINVAL);
-
- /* The RQT size must be # of entries + 1 rounded up to a power of two */
- rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr);
- if (rqsize == attrs->cap.max_recv_wr)
- rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1);
-
- /* T3 doesn't support RQT depth < 16 */
- if (rqsize < 16)
- rqsize = 16;
-
- if (rqsize > T3_MAX_RQ_SIZE)
- return ERR_PTR(-EINVAL);
-
- if (attrs->cap.max_inline_data > T3_MAX_INLINE)
- return ERR_PTR(-EINVAL);
-
- /*
- * NOTE: The SQ and total WQ sizes don't need to be
- * a power of two. However, all the code assumes
- * they are. EG: Q_FREECNT() and friends.
- */
- sqsize = roundup_pow_of_two(attrs->cap.max_send_wr);
- wqsize = roundup_pow_of_two(rqsize + sqsize);
-
- /*
- * Kernel users need more wq space for fastreg WRs which can take
- * 2 WR fragments.
- */
- ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext,
- ibucontext);
- if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
- wqsize = roundup_pow_of_two(rqsize +
- roundup_pow_of_two(attrs->cap.max_send_wr * 2));
- pr_debug("%s wqsize %d sqsize %d rqsize %d\n", __func__,
- wqsize, sqsize, rqsize);
- qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
- if (!qhp)
- return ERR_PTR(-ENOMEM);
- qhp->wq.size_log2 = ilog2(wqsize);
- qhp->wq.rq_size_log2 = ilog2(rqsize);
- qhp->wq.sq_size_log2 = ilog2(sqsize);
- if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) {
- kfree(qhp);
- return ERR_PTR(-ENOMEM);
- }
-
- attrs->cap.max_recv_wr = rqsize - 1;
- attrs->cap.max_send_wr = sqsize;
- attrs->cap.max_inline_data = T3_MAX_INLINE;
-
- qhp->rhp = rhp;
- qhp->attr.pd = php->pdid;
- qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid;
- qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid;
- qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
- qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
- qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
- qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
- qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
- qhp->attr.state = IWCH_QP_STATE_IDLE;
- qhp->attr.next_state = IWCH_QP_STATE_IDLE;
-
- /*
- * XXX - These don't get passed in from the openib user
- * at create time. The CM sets them via a QP modify.
- * Need to fix... I think the CM should
- */
- qhp->attr.enable_rdma_read = 1;
- qhp->attr.enable_rdma_write = 1;
- qhp->attr.enable_bind = 1;
- qhp->attr.max_ord = 1;
- qhp->attr.max_ird = 1;
-
- spin_lock_init(&qhp->lock);
- init_waitqueue_head(&qhp->wait);
- atomic_set(&qhp->refcnt, 1);
-
- if (xa_store_irq(&rhp->qps, qhp->wq.qpid, qhp, GFP_KERNEL)) {
- cxio_destroy_qp(&rhp->rdev, &qhp->wq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
- kfree(qhp);
- return ERR_PTR(-ENOMEM);
- }
-
- if (udata) {
-
- struct iwch_mm_entry *mm1, *mm2;
-
- mm1 = kmalloc(sizeof(*mm1), GFP_KERNEL);
- if (!mm1) {
- iwch_destroy_qp(&qhp->ibqp, udata);
- return ERR_PTR(-ENOMEM);
- }
-
- mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL);
- if (!mm2) {
- kfree(mm1);
- iwch_destroy_qp(&qhp->ibqp, udata);
- return ERR_PTR(-ENOMEM);
- }
-
- uresp.qpid = qhp->wq.qpid;
- uresp.size_log2 = qhp->wq.size_log2;
- uresp.sq_size_log2 = qhp->wq.sq_size_log2;
- uresp.rq_size_log2 = qhp->wq.rq_size_log2;
- spin_lock(&ucontext->mmap_lock);
- uresp.key = ucontext->key;
- ucontext->key += PAGE_SIZE;
- uresp.db_key = ucontext->key;
- ucontext->key += PAGE_SIZE;
- spin_unlock(&ucontext->mmap_lock);
- if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
- kfree(mm1);
- kfree(mm2);
- iwch_destroy_qp(&qhp->ibqp, udata);
- return ERR_PTR(-EFAULT);
- }
- mm1->key = uresp.key;
- mm1->addr = virt_to_phys(qhp->wq.queue);
- mm1->len = PAGE_ALIGN(wqsize * sizeof(union t3_wr));
- insert_mmap(ucontext, mm1);
- mm2->key = uresp.db_key;
- mm2->addr = qhp->wq.udb & PAGE_MASK;
- mm2->len = PAGE_SIZE;
- insert_mmap(ucontext, mm2);
- }
- qhp->ibqp.qp_num = qhp->wq.qpid;
- pr_debug(
- "%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr %pad size %d rq_addr 0x%x\n",
- __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
- qhp->wq.qpid, qhp, &qhp->wq.dma_addr, 1 << qhp->wq.size_log2,
- qhp->wq.rq_addr);
- return &qhp->ibqp;
-}
-
-static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_qp *qhp;
- enum iwch_qp_attr_mask mask = 0;
- struct iwch_qp_attributes attrs = {};
-
- pr_debug("%s ib_qp %p\n", __func__, ibqp);
-
- /* iwarp does not support the RTR state */
- if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
- attr_mask &= ~IB_QP_STATE;
-
- /* Make sure we still have something left to do */
- if (!attr_mask)
- return 0;
-
- qhp = to_iwch_qp(ibqp);
- rhp = qhp->rhp;
-
- attrs.next_state = iwch_convert_state(attr->qp_state);
- attrs.enable_rdma_read = (attr->qp_access_flags &
- IB_ACCESS_REMOTE_READ) ? 1 : 0;
- attrs.enable_rdma_write = (attr->qp_access_flags &
- IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
- attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
-
-
- mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0;
- mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
- (IWCH_QP_ATTR_ENABLE_RDMA_READ |
- IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
- IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0;
-
- return iwch_modify_qp(rhp, qhp, mask, &attrs, 0);
-}
-
-void iwch_qp_add_ref(struct ib_qp *qp)
-{
- pr_debug("%s ib_qp %p\n", __func__, qp);
- atomic_inc(&(to_iwch_qp(qp)->refcnt));
-}
-
-void iwch_qp_rem_ref(struct ib_qp *qp)
-{
- pr_debug("%s ib_qp %p\n", __func__, qp);
- if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt)))
- wake_up(&(to_iwch_qp(qp)->wait));
-}
-
-static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
-{
- pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
- return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
-}
-
-
-static int iwch_query_pkey(struct ib_device *ibdev,
- u8 port, u16 index, u16 * pkey)
-{
- pr_debug("%s ibdev %p\n", __func__, ibdev);
- *pkey = 0;
- return 0;
-}
-
-static int iwch_query_gid(struct ib_device *ibdev, u8 port,
- int index, union ib_gid *gid)
-{
- struct iwch_dev *dev;
-
- pr_debug("%s ibdev %p, port %d, index %d, gid %p\n",
- __func__, ibdev, port, index, gid);
- dev = to_iwch_dev(ibdev);
- BUG_ON(port == 0 || port > 2);
- memset(&(gid->raw[0]), 0, sizeof(gid->raw));
- memcpy(&(gid->raw[0]), dev->rdev.port_info.lldevs[port-1]->dev_addr, 6);
- return 0;
-}
-
-static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev)
-{
- struct ethtool_drvinfo info;
- struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
- char *cp, *next;
- unsigned fw_maj, fw_min, fw_mic;
-
- lldev->ethtool_ops->get_drvinfo(lldev, &info);
-
- next = info.fw_version + 1;
- cp = strsep(&next, ".");
- sscanf(cp, "%i", &fw_maj);
- cp = strsep(&next, ".");
- sscanf(cp, "%i", &fw_min);
- cp = strsep(&next, ".");
- sscanf(cp, "%i", &fw_mic);
-
- return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) |
- (fw_mic & 0xffff);
-}
-
-static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
- struct ib_udata *uhw)
-{
-
- struct iwch_dev *dev;
-
- pr_debug("%s ibdev %p\n", __func__, ibdev);
-
- if (uhw->inlen || uhw->outlen)
- return -EINVAL;
-
- dev = to_iwch_dev(ibdev);
- memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
- props->hw_ver = dev->rdev.t3cdev_p->type;
- props->fw_ver = fw_vers_string_to_u64(dev);
- props->device_cap_flags = dev->device_cap_flags;
- props->page_size_cap = dev->attr.mem_pgsizes_bitmask;
- props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
- props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
- props->max_mr_size = dev->attr.max_mr_size;
- props->max_qp = dev->attr.max_qps;
- props->max_qp_wr = dev->attr.max_wrs;
- props->max_send_sge = dev->attr.max_sge_per_wr;
- props->max_recv_sge = dev->attr.max_sge_per_wr;
- props->max_sge_rd = 1;
- props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp;
- props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp;
- props->max_cq = dev->attr.max_cqs;
- props->max_cqe = dev->attr.max_cqes_per_cq;
- props->max_mr = dev->attr.max_mem_regs;
- props->max_pd = dev->attr.max_pds;
- props->local_ca_ack_delay = 0;
- props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH;
-
- return 0;
-}
-
-static int iwch_query_port(struct ib_device *ibdev,
- u8 port, struct ib_port_attr *props)
-{
- struct iwch_dev *dev;
- struct net_device *netdev;
- struct in_device *inetdev;
-
- pr_debug("%s ibdev %p\n", __func__, ibdev);
-
- dev = to_iwch_dev(ibdev);
- netdev = dev->rdev.port_info.lldevs[port-1];
-
- /* props being zeroed by the caller, avoid zeroing it here */
- props->max_mtu = IB_MTU_4096;
- props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
-
- if (!netif_carrier_ok(netdev))
- props->state = IB_PORT_DOWN;
- else {
- inetdev = in_dev_get(netdev);
- if (inetdev) {
- if (inetdev->ifa_list)
- props->state = IB_PORT_ACTIVE;
- else
- props->state = IB_PORT_INIT;
- in_dev_put(inetdev);
- } else
- props->state = IB_PORT_INIT;
- }
-
- props->port_cap_flags =
- IB_PORT_CM_SUP |
- IB_PORT_SNMP_TUNNEL_SUP |
- IB_PORT_REINIT_SUP |
- IB_PORT_DEVICE_MGMT_SUP |
- IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
- props->gid_tbl_len = 1;
- props->pkey_tbl_len = 1;
- props->active_width = 2;
- props->active_speed = IB_SPEED_DDR;
- props->max_msg_sz = -1;
-
- return 0;
-}
-
-static ssize_t hw_rev_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct iwch_dev *iwch_dev =
- rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
-
- pr_debug("%s dev 0x%p\n", __func__, dev);
- return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
-}
-static DEVICE_ATTR_RO(hw_rev);
-
-static ssize_t hca_type_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct iwch_dev *iwch_dev =
- rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
- struct ethtool_drvinfo info;
- struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
-
- pr_debug("%s dev 0x%p\n", __func__, dev);
- lldev->ethtool_ops->get_drvinfo(lldev, &info);
- return sprintf(buf, "%s\n", info.driver);
-}
-static DEVICE_ATTR_RO(hca_type);
-
-static ssize_t board_id_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct iwch_dev *iwch_dev =
- rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
-
- pr_debug("%s dev 0x%p\n", __func__, dev);
- return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
- iwch_dev->rdev.rnic_info.pdev->device);
-}
-static DEVICE_ATTR_RO(board_id);
-
-enum counters {
- IPINRECEIVES,
- IPINHDRERRORS,
- IPINADDRERRORS,
- IPINUNKNOWNPROTOS,
- IPINDISCARDS,
- IPINDELIVERS,
- IPOUTREQUESTS,
- IPOUTDISCARDS,
- IPOUTNOROUTES,
- IPREASMTIMEOUT,
- IPREASMREQDS,
- IPREASMOKS,
- IPREASMFAILS,
- TCPACTIVEOPENS,
- TCPPASSIVEOPENS,
- TCPATTEMPTFAILS,
- TCPESTABRESETS,
- TCPCURRESTAB,
- TCPINSEGS,
- TCPOUTSEGS,
- TCPRETRANSSEGS,
- TCPINERRS,
- TCPOUTRSTS,
- TCPRTOMIN,
- TCPRTOMAX,
- NR_COUNTERS
-};
-
-static const char * const names[] = {
- [IPINRECEIVES] = "ipInReceives",
- [IPINHDRERRORS] = "ipInHdrErrors",
- [IPINADDRERRORS] = "ipInAddrErrors",
- [IPINUNKNOWNPROTOS] = "ipInUnknownProtos",
- [IPINDISCARDS] = "ipInDiscards",
- [IPINDELIVERS] = "ipInDelivers",
- [IPOUTREQUESTS] = "ipOutRequests",
- [IPOUTDISCARDS] = "ipOutDiscards",
- [IPOUTNOROUTES] = "ipOutNoRoutes",
- [IPREASMTIMEOUT] = "ipReasmTimeout",
- [IPREASMREQDS] = "ipReasmReqds",
- [IPREASMOKS] = "ipReasmOKs",
- [IPREASMFAILS] = "ipReasmFails",
- [TCPACTIVEOPENS] = "tcpActiveOpens",
- [TCPPASSIVEOPENS] = "tcpPassiveOpens",
- [TCPATTEMPTFAILS] = "tcpAttemptFails",
- [TCPESTABRESETS] = "tcpEstabResets",
- [TCPCURRESTAB] = "tcpCurrEstab",
- [TCPINSEGS] = "tcpInSegs",
- [TCPOUTSEGS] = "tcpOutSegs",
- [TCPRETRANSSEGS] = "tcpRetransSegs",
- [TCPINERRS] = "tcpInErrs",
- [TCPOUTRSTS] = "tcpOutRsts",
- [TCPRTOMIN] = "tcpRtoMin",
- [TCPRTOMAX] = "tcpRtoMax",
-};
-
-static struct rdma_hw_stats *iwch_alloc_stats(struct ib_device *ibdev,
- u8 port_num)
-{
- BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
-
- /* Our driver only supports device level stats */
- if (port_num != 0)
- return NULL;
-
- return rdma_alloc_hw_stats_struct(names, NR_COUNTERS,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
-}
-
-static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
- u8 port, int index)
-{
- struct iwch_dev *dev;
- struct tp_mib_stats m;
- int ret;
-
- if (port != 0 || !stats)
- return -ENOSYS;
-
- pr_debug("%s ibdev %p\n", __func__, ibdev);
- dev = to_iwch_dev(ibdev);
- ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m);
- if (ret)
- return -ENOSYS;
-
- stats->value[IPINRECEIVES] = ((u64)m.ipInReceive_hi << 32) + m.ipInReceive_lo;
- stats->value[IPINHDRERRORS] = ((u64)m.ipInHdrErrors_hi << 32) + m.ipInHdrErrors_lo;
- stats->value[IPINADDRERRORS] = ((u64)m.ipInAddrErrors_hi << 32) + m.ipInAddrErrors_lo;
- stats->value[IPINUNKNOWNPROTOS] = ((u64)m.ipInUnknownProtos_hi << 32) + m.ipInUnknownProtos_lo;
- stats->value[IPINDISCARDS] = ((u64)m.ipInDiscards_hi << 32) + m.ipInDiscards_lo;
- stats->value[IPINDELIVERS] = ((u64)m.ipInDelivers_hi << 32) + m.ipInDelivers_lo;
- stats->value[IPOUTREQUESTS] = ((u64)m.ipOutRequests_hi << 32) + m.ipOutRequests_lo;
- stats->value[IPOUTDISCARDS] = ((u64)m.ipOutDiscards_hi << 32) + m.ipOutDiscards_lo;
- stats->value[IPOUTNOROUTES] = ((u64)m.ipOutNoRoutes_hi << 32) + m.ipOutNoRoutes_lo;
- stats->value[IPREASMTIMEOUT] = m.ipReasmTimeout;
- stats->value[IPREASMREQDS] = m.ipReasmReqds;
- stats->value[IPREASMOKS] = m.ipReasmOKs;
- stats->value[IPREASMFAILS] = m.ipReasmFails;
- stats->value[TCPACTIVEOPENS] = m.tcpActiveOpens;
- stats->value[TCPPASSIVEOPENS] = m.tcpPassiveOpens;
- stats->value[TCPATTEMPTFAILS] = m.tcpAttemptFails;
- stats->value[TCPESTABRESETS] = m.tcpEstabResets;
- stats->value[TCPCURRESTAB] = m.tcpOutRsts;
- stats->value[TCPINSEGS] = m.tcpCurrEstab;
- stats->value[TCPOUTSEGS] = ((u64)m.tcpInSegs_hi << 32) + m.tcpInSegs_lo;
- stats->value[TCPRETRANSSEGS] = ((u64)m.tcpOutSegs_hi << 32) + m.tcpOutSegs_lo;
- stats->value[TCPINERRS] = ((u64)m.tcpRetransSeg_hi << 32) + m.tcpRetransSeg_lo,
- stats->value[TCPOUTRSTS] = ((u64)m.tcpInErrs_hi << 32) + m.tcpInErrs_lo;
- stats->value[TCPRTOMIN] = m.tcpRtoMin;
- stats->value[TCPRTOMAX] = m.tcpRtoMax;
-
- return stats->num_counters;
-}
-
-static struct attribute *iwch_class_attributes[] = {
- &dev_attr_hw_rev.attr,
- &dev_attr_hca_type.attr,
- &dev_attr_board_id.attr,
- NULL
-};
-
-static const struct attribute_group iwch_attr_group = {
- .attrs = iwch_class_attributes,
-};
-
-static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
- struct ib_port_immutable *immutable)
-{
- struct ib_port_attr attr;
- int err;
-
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
-
- err = ib_query_port(ibdev, port_num, &attr);
- if (err)
- return err;
-
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
-
- return 0;
-}
-
-static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str)
-{
- struct iwch_dev *iwch_dev = to_iwch_dev(ibdev);
- struct ethtool_drvinfo info;
- struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
-
- pr_debug("%s dev 0x%p\n", __func__, iwch_dev);
- lldev->ethtool_ops->get_drvinfo(lldev, &info);
- snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version);
-}
-
-static const struct ib_device_ops iwch_dev_ops = {
- .owner = THIS_MODULE,
- .driver_id = RDMA_DRIVER_CXGB3,
- .uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION,
- .uverbs_no_driver_id_binding = 1,
-
- .alloc_hw_stats = iwch_alloc_stats,
- .alloc_mr = iwch_alloc_mr,
- .alloc_mw = iwch_alloc_mw,
- .alloc_pd = iwch_allocate_pd,
- .alloc_ucontext = iwch_alloc_ucontext,
- .create_cq = iwch_create_cq,
- .create_qp = iwch_create_qp,
- .dealloc_mw = iwch_dealloc_mw,
- .dealloc_pd = iwch_deallocate_pd,
- .dealloc_ucontext = iwch_dealloc_ucontext,
- .dereg_mr = iwch_dereg_mr,
- .destroy_cq = iwch_destroy_cq,
- .destroy_qp = iwch_destroy_qp,
- .get_dev_fw_str = get_dev_fw_ver_str,
- .get_dma_mr = iwch_get_dma_mr,
- .get_hw_stats = iwch_get_mib,
- .get_port_immutable = iwch_port_immutable,
- .iw_accept = iwch_accept_cr,
- .iw_add_ref = iwch_qp_add_ref,
- .iw_connect = iwch_connect,
- .iw_create_listen = iwch_create_listen,
- .iw_destroy_listen = iwch_destroy_listen,
- .iw_get_qp = iwch_get_qp,
- .iw_reject = iwch_reject_cr,
- .iw_rem_ref = iwch_qp_rem_ref,
- .map_mr_sg = iwch_map_mr_sg,
- .mmap = iwch_mmap,
- .modify_qp = iwch_ib_modify_qp,
- .poll_cq = iwch_poll_cq,
- .post_recv = iwch_post_receive,
- .post_send = iwch_post_send,
- .query_device = iwch_query_device,
- .query_gid = iwch_query_gid,
- .query_pkey = iwch_query_pkey,
- .query_port = iwch_query_port,
- .reg_user_mr = iwch_reg_user_mr,
- .req_notify_cq = iwch_arm_cq,
- INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd),
- INIT_RDMA_OBJ_SIZE(ib_cq, iwch_cq, ibcq),
- INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext),
-};
-
-int iwch_register_device(struct iwch_dev *dev)
-{
- pr_debug("%s iwch_dev %p\n", __func__, dev);
- memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
- memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
- dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
- IB_DEVICE_MEM_WINDOW |
- IB_DEVICE_MEM_MGT_EXTENSIONS;
-
- /* cxgb3 supports STag 0. */
- dev->ibdev.local_dma_lkey = 0;
-
- dev->ibdev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_POST_SEND) |
- (1ull << IB_USER_VERBS_CMD_POST_RECV);
- dev->ibdev.node_type = RDMA_NODE_RNIC;
- BUILD_BUG_ON(sizeof(IWCH_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
- memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
- dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
- dev->ibdev.num_comp_vectors = 1;
- dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev;
-
- memcpy(dev->ibdev.iw_ifname, dev->rdev.t3cdev_p->lldev->name,
- sizeof(dev->ibdev.iw_ifname));
-
- rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
- ib_set_device_ops(&dev->ibdev, &iwch_dev_ops);
- return ib_register_device(&dev->ibdev, "cxgb3_%d");
-}
-
-void iwch_unregister_device(struct iwch_dev *dev)
-{
- pr_debug("%s iwch_dev %p\n", __func__, dev);
- ib_unregister_device(&dev->ibdev);
- return;
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
deleted file mode 100644
index 8adbe9658935..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __IWCH_PROVIDER_H__
-#define __IWCH_PROVIDER_H__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <rdma/ib_verbs.h>
-#include <asm/types.h>
-#include "t3cdev.h"
-#include "iwch.h"
-#include "cxio_wr.h"
-#include "cxio_hal.h"
-
-struct iwch_pd {
- struct ib_pd ibpd;
- u32 pdid;
- struct iwch_dev *rhp;
-};
-
-static inline struct iwch_pd *to_iwch_pd(struct ib_pd *ibpd)
-{
- return container_of(ibpd, struct iwch_pd, ibpd);
-}
-
-struct tpt_attributes {
- u32 stag;
- u32 state:1;
- u32 type:2;
- u32 rsvd:1;
- enum tpt_mem_perm perms;
- u32 remote_invaliate_disable:1;
- u32 zbva:1;
- u32 mw_bind_enable:1;
- u32 page_size:5;
-
- u32 pdid;
- u32 qpid;
- u32 pbl_addr;
- u32 len;
- u64 va_fbo;
- u32 pbl_size;
-};
-
-struct iwch_mr {
- struct ib_mr ibmr;
- struct ib_umem *umem;
- struct iwch_dev *rhp;
- u64 kva;
- struct tpt_attributes attr;
- u64 *pages;
- u32 npages;
-};
-
-typedef struct iwch_mw iwch_mw_handle;
-
-static inline struct iwch_mr *to_iwch_mr(struct ib_mr *ibmr)
-{
- return container_of(ibmr, struct iwch_mr, ibmr);
-}
-
-struct iwch_mw {
- struct ib_mw ibmw;
- struct iwch_dev *rhp;
- u64 kva;
- struct tpt_attributes attr;
-};
-
-static inline struct iwch_mw *to_iwch_mw(struct ib_mw *ibmw)
-{
- return container_of(ibmw, struct iwch_mw, ibmw);
-}
-
-struct iwch_cq {
- struct ib_cq ibcq;
- struct iwch_dev *rhp;
- struct t3_cq cq;
- spinlock_t lock;
- spinlock_t comp_handler_lock;
- atomic_t refcnt;
- wait_queue_head_t wait;
- u32 __user *user_rptr_addr;
-};
-
-static inline struct iwch_cq *to_iwch_cq(struct ib_cq *ibcq)
-{
- return container_of(ibcq, struct iwch_cq, ibcq);
-}
-
-enum IWCH_QP_FLAGS {
- QP_QUIESCED = 0x01
-};
-
-struct iwch_mpa_attributes {
- u8 initiator;
- u8 recv_marker_enabled;
- u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */
- u8 crc_enabled;
- u8 version; /* 0 or 1 */
-};
-
-struct iwch_qp_attributes {
- u32 scq;
- u32 rcq;
- u32 sq_num_entries;
- u32 rq_num_entries;
- u32 sq_max_sges;
- u32 sq_max_sges_rdma_write;
- u32 rq_max_sges;
- u32 state;
- u8 enable_rdma_read;
- u8 enable_rdma_write; /* enable inbound Read Resp. */
- u8 enable_bind;
- u8 enable_mmid0_fastreg; /* Enable STAG0 + Fast-register */
- /*
- * Next QP state. If specify the current state, only the
- * QP attributes will be modified.
- */
- u32 max_ord;
- u32 max_ird;
- u32 pd; /* IN */
- u32 next_state;
- char terminate_buffer[52];
- u32 terminate_msg_len;
- u8 is_terminate_local;
- struct iwch_mpa_attributes mpa_attr; /* IN-OUT */
- struct iwch_ep *llp_stream_handle;
- char *stream_msg_buf; /* Last stream msg. before Idle -> RTS */
- u32 stream_msg_buf_len; /* Only on Idle -> RTS */
-};
-
-struct iwch_qp {
- struct ib_qp ibqp;
- struct iwch_dev *rhp;
- struct iwch_ep *ep;
- struct iwch_qp_attributes attr;
- struct t3_wq wq;
- spinlock_t lock;
- atomic_t refcnt;
- wait_queue_head_t wait;
- enum IWCH_QP_FLAGS flags;
-};
-
-static inline int qp_quiesced(struct iwch_qp *qhp)
-{
- return qhp->flags & QP_QUIESCED;
-}
-
-static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp)
-{
- return container_of(ibqp, struct iwch_qp, ibqp);
-}
-
-void iwch_qp_add_ref(struct ib_qp *qp);
-void iwch_qp_rem_ref(struct ib_qp *qp);
-
-struct iwch_ucontext {
- struct ib_ucontext ibucontext;
- struct cxio_ucontext uctx;
- u32 key;
- spinlock_t mmap_lock;
- struct list_head mmaps;
-};
-
-static inline struct iwch_ucontext *to_iwch_ucontext(struct ib_ucontext *c)
-{
- return container_of(c, struct iwch_ucontext, ibucontext);
-}
-
-struct iwch_mm_entry {
- struct list_head entry;
- u64 addr;
- u32 key;
- unsigned len;
-};
-
-static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext,
- u32 key, unsigned len)
-{
- struct list_head *pos, *nxt;
- struct iwch_mm_entry *mm;
-
- spin_lock(&ucontext->mmap_lock);
- list_for_each_safe(pos, nxt, &ucontext->mmaps) {
-
- mm = list_entry(pos, struct iwch_mm_entry, entry);
- if (mm->key == key && mm->len == len) {
- list_del_init(&mm->entry);
- spin_unlock(&ucontext->mmap_lock);
- pr_debug("%s key 0x%x addr 0x%llx len %d\n",
- __func__, key,
- (unsigned long long)mm->addr, mm->len);
- return mm;
- }
- }
- spin_unlock(&ucontext->mmap_lock);
- return NULL;
-}
-
-static inline void insert_mmap(struct iwch_ucontext *ucontext,
- struct iwch_mm_entry *mm)
-{
- spin_lock(&ucontext->mmap_lock);
- pr_debug("%s key 0x%x addr 0x%llx len %d\n",
- __func__, mm->key, (unsigned long long)mm->addr, mm->len);
- list_add_tail(&mm->entry, &ucontext->mmaps);
- spin_unlock(&ucontext->mmap_lock);
-}
-
-enum iwch_qp_attr_mask {
- IWCH_QP_ATTR_NEXT_STATE = 1 << 0,
- IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
- IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
- IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
- IWCH_QP_ATTR_MAX_ORD = 1 << 11,
- IWCH_QP_ATTR_MAX_IRD = 1 << 12,
- IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22,
- IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23,
- IWCH_QP_ATTR_MPA_ATTR = 1 << 24,
- IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25,
- IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ |
- IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
- IWCH_QP_ATTR_MAX_ORD |
- IWCH_QP_ATTR_MAX_IRD |
- IWCH_QP_ATTR_LLP_STREAM_HANDLE |
- IWCH_QP_ATTR_STREAM_MSG_BUFFER |
- IWCH_QP_ATTR_MPA_ATTR |
- IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE)
-};
-
-int iwch_modify_qp(struct iwch_dev *rhp,
- struct iwch_qp *qhp,
- enum iwch_qp_attr_mask mask,
- struct iwch_qp_attributes *attrs,
- int internal);
-
-enum iwch_qp_state {
- IWCH_QP_STATE_IDLE,
- IWCH_QP_STATE_RTS,
- IWCH_QP_STATE_ERROR,
- IWCH_QP_STATE_TERMINATE,
- IWCH_QP_STATE_CLOSING,
- IWCH_QP_STATE_TOT
-};
-
-static inline int iwch_convert_state(enum ib_qp_state ib_state)
-{
- switch (ib_state) {
- case IB_QPS_RESET:
- case IB_QPS_INIT:
- return IWCH_QP_STATE_IDLE;
- case IB_QPS_RTS:
- return IWCH_QP_STATE_RTS;
- case IB_QPS_SQD:
- return IWCH_QP_STATE_CLOSING;
- case IB_QPS_SQE:
- return IWCH_QP_STATE_TERMINATE;
- case IB_QPS_ERR:
- return IWCH_QP_STATE_ERROR;
- default:
- return -1;
- }
-}
-
-static inline u32 iwch_ib_to_tpt_access(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) |
- (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) |
- (acc & IB_ACCESS_MW_BIND ? TPT_MW_BIND : 0) |
- TPT_LOCAL_READ;
-}
-
-static inline u32 iwch_ib_to_tpt_bind_access(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0);
-}
-
-enum iwch_mmid_state {
- IWCH_STAG_STATE_VALID,
- IWCH_STAG_STATE_INVALID
-};
-
-enum iwch_qp_query_flags {
- IWCH_QP_QUERY_CONTEXT_NONE = 0x0, /* No ctx; Only attrs */
- IWCH_QP_QUERY_CONTEXT_GET = 0x1, /* Get ctx + attrs */
- IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2, /* Not Supported */
-
- /*
- * Quiesce QP context; Consumer
- * will NOT replay outstanding WR
- */
- IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4,
- IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8,
- IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */
-};
-
-u16 iwch_rqes_posted(struct iwch_qp *qhp);
-int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
- const struct ib_send_wr **bad_wr);
-int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
- const struct ib_recv_wr **bad_wr);
-int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
-int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
-int iwch_post_zb_read(struct iwch_ep *ep);
-int iwch_register_device(struct iwch_dev *dev);
-void iwch_unregister_device(struct iwch_dev *dev);
-void stop_read_rep_timer(struct iwch_qp *qhp);
-int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp, int shift);
-int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
-void iwch_free_pbl(struct iwch_mr *mhp);
-int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
-
-#define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
deleted file mode 100644
index c649faad63f9..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ /dev/null
@@ -1,1082 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/sched.h>
-#include <linux/gfp.h>
-#include "iwch_provider.h"
-#include "iwch.h"
-#include "iwch_cm.h"
-#include "cxio_hal.h"
-#include "cxio_resource.h"
-
-#define NO_SUPPORT -1
-
-static int build_rdma_send(union t3_wr *wqe, const struct ib_send_wr *wr,
- u8 *flit_cnt)
-{
- int i;
- u32 plen;
-
- switch (wr->opcode) {
- case IB_WR_SEND:
- if (wr->send_flags & IB_SEND_SOLICITED)
- wqe->send.rdmaop = T3_SEND_WITH_SE;
- else
- wqe->send.rdmaop = T3_SEND;
- wqe->send.rem_stag = 0;
- break;
- case IB_WR_SEND_WITH_INV:
- if (wr->send_flags & IB_SEND_SOLICITED)
- wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
- else
- wqe->send.rdmaop = T3_SEND_WITH_INV;
- wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey);
- break;
- default:
- return -EINVAL;
- }
- if (wr->num_sge > T3_MAX_SGE)
- return -EINVAL;
- wqe->send.reserved[0] = 0;
- wqe->send.reserved[1] = 0;
- wqe->send.reserved[2] = 0;
- plen = 0;
- for (i = 0; i < wr->num_sge; i++) {
- if ((plen + wr->sg_list[i].length) < plen)
- return -EMSGSIZE;
-
- plen += wr->sg_list[i].length;
- wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
- wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
- wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
- }
- wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
- *flit_cnt = 4 + ((wr->num_sge) << 1);
- wqe->send.plen = cpu_to_be32(plen);
- return 0;
-}
-
-static int build_rdma_write(union t3_wr *wqe, const struct ib_send_wr *wr,
- u8 *flit_cnt)
-{
- int i;
- u32 plen;
- if (wr->num_sge > T3_MAX_SGE)
- return -EINVAL;
- wqe->write.rdmaop = T3_RDMA_WRITE;
- wqe->write.reserved[0] = 0;
- wqe->write.reserved[1] = 0;
- wqe->write.reserved[2] = 0;
- wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
- wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
-
- if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
- plen = 4;
- wqe->write.sgl[0].stag = wr->ex.imm_data;
- wqe->write.sgl[0].len = cpu_to_be32(0);
- wqe->write.num_sgle = cpu_to_be32(0);
- *flit_cnt = 6;
- } else {
- plen = 0;
- for (i = 0; i < wr->num_sge; i++) {
- if ((plen + wr->sg_list[i].length) < plen) {
- return -EMSGSIZE;
- }
- plen += wr->sg_list[i].length;
- wqe->write.sgl[i].stag =
- cpu_to_be32(wr->sg_list[i].lkey);
- wqe->write.sgl[i].len =
- cpu_to_be32(wr->sg_list[i].length);
- wqe->write.sgl[i].to =
- cpu_to_be64(wr->sg_list[i].addr);
- }
- wqe->write.num_sgle = cpu_to_be32(wr->num_sge);
- *flit_cnt = 5 + ((wr->num_sge) << 1);
- }
- wqe->write.plen = cpu_to_be32(plen);
- return 0;
-}
-
-static int build_rdma_read(union t3_wr *wqe, const struct ib_send_wr *wr,
- u8 *flit_cnt)
-{
- if (wr->num_sge > 1)
- return -EINVAL;
- wqe->read.rdmaop = T3_READ_REQ;
- if (wr->opcode == IB_WR_RDMA_READ_WITH_INV)
- wqe->read.local_inv = 1;
- else
- wqe->read.local_inv = 0;
- wqe->read.reserved[0] = 0;
- wqe->read.reserved[1] = 0;
- wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey);
- wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr);
- wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
- wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length);
- wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr);
- *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
- return 0;
-}
-
-static int build_memreg(union t3_wr *wqe, const struct ib_reg_wr *wr,
- u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
-{
- struct iwch_mr *mhp = to_iwch_mr(wr->mr);
- int i;
- __be64 *p;
-
- if (mhp->npages > T3_MAX_FASTREG_DEPTH)
- return -EINVAL;
- *wr_cnt = 1;
- wqe->fastreg.stag = cpu_to_be32(wr->key);
- wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length);
- wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
- wqe->fastreg.va_base_lo_fbo =
- cpu_to_be32(mhp->ibmr.iova & 0xffffffff);
- wqe->fastreg.page_type_perms = cpu_to_be32(
- V_FR_PAGE_COUNT(mhp->npages) |
- V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) |
- V_FR_TYPE(TPT_VATO) |
- V_FR_PERMS(iwch_ib_to_tpt_access(wr->access)));
- p = &wqe->fastreg.pbl_addrs[0];
- for (i = 0; i < mhp->npages; i++, p++) {
-
- /* If we need a 2nd WR, then set it up */
- if (i == T3_MAX_FASTREG_FRAG) {
- *wr_cnt = 2;
- wqe = (union t3_wr *)(wq->queue +
- Q_PTR2IDX((wq->wptr+1), wq->size_log2));
- build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0,
- Q_GENBIT(wq->wptr + 1, wq->size_log2),
- 0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG,
- T3_EOP);
-
- p = &wqe->pbl_frag.pbl_addrs[0];
- }
- *p = cpu_to_be64((u64)mhp->pages[i]);
- }
- *flit_cnt = 5 + mhp->npages;
- if (*flit_cnt > 15)
- *flit_cnt = 15;
- return 0;
-}
-
-static int build_inv_stag(union t3_wr *wqe, const struct ib_send_wr *wr,
- u8 *flit_cnt)
-{
- wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey);
- wqe->local_inv.reserved = 0;
- *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3;
- return 0;
-}
-
-static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
- u32 num_sgle, u32 * pbl_addr, u8 * page_size)
-{
- int i;
- struct iwch_mr *mhp;
- u64 offset;
- for (i = 0; i < num_sgle; i++) {
-
- mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
- if (!mhp) {
- pr_debug("%s %d\n", __func__, __LINE__);
- return -EIO;
- }
- if (!mhp->attr.state) {
- pr_debug("%s %d\n", __func__, __LINE__);
- return -EIO;
- }
- if (mhp->attr.zbva) {
- pr_debug("%s %d\n", __func__, __LINE__);
- return -EIO;
- }
-
- if (sg_list[i].addr < mhp->attr.va_fbo) {
- pr_debug("%s %d\n", __func__, __LINE__);
- return -EINVAL;
- }
- if (sg_list[i].addr + ((u64) sg_list[i].length) <
- sg_list[i].addr) {
- pr_debug("%s %d\n", __func__, __LINE__);
- return -EINVAL;
- }
- if (sg_list[i].addr + ((u64) sg_list[i].length) >
- mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
- pr_debug("%s %d\n", __func__, __LINE__);
- return -EINVAL;
- }
- offset = sg_list[i].addr - mhp->attr.va_fbo;
- offset += mhp->attr.va_fbo &
- ((1UL << (12 + mhp->attr.page_size)) - 1);
- pbl_addr[i] = ((mhp->attr.pbl_addr -
- rhp->rdev.rnic_info.pbl_base) >> 3) +
- (offset >> (12 + mhp->attr.page_size));
- page_size[i] = mhp->attr.page_size;
- }
- return 0;
-}
-
-static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
- const struct ib_recv_wr *wr)
-{
- int i, err = 0;
- u32 pbl_addr[T3_MAX_SGE];
- u8 page_size[T3_MAX_SGE];
-
- err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr,
- page_size);
- if (err)
- return err;
- wqe->recv.pagesz[0] = page_size[0];
- wqe->recv.pagesz[1] = page_size[1];
- wqe->recv.pagesz[2] = page_size[2];
- wqe->recv.pagesz[3] = page_size[3];
- wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
- for (i = 0; i < wr->num_sge; i++) {
- wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
- wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
-
- /* to in the WQE == the offset into the page */
- wqe->recv.sgl[i].to = cpu_to_be64(((u32)wr->sg_list[i].addr) &
- ((1UL << (12 + page_size[i])) - 1));
-
- /* pbl_addr is the adapters address in the PBL */
- wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
- }
- for (; i < T3_MAX_SGE; i++) {
- wqe->recv.sgl[i].stag = 0;
- wqe->recv.sgl[i].len = 0;
- wqe->recv.sgl[i].to = 0;
- wqe->recv.pbl_addr[i] = 0;
- }
- qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
- qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
- qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
- qhp->wq.rq_size_log2)].pbl_addr = 0;
- return 0;
-}
-
-static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
- const struct ib_recv_wr *wr)
-{
- int i;
- u32 pbl_addr;
- u32 pbl_offset;
-
-
- /*
- * The T3 HW requires the PBL in the HW recv descriptor to reference
- * a PBL entry. So we allocate the max needed PBL memory here and pass
- * it to the uP in the recv WR. The uP will build the PBL and setup
- * the HW recv descriptor.
- */
- pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE);
- if (!pbl_addr)
- return -ENOMEM;
-
- /*
- * Compute the 8B aligned offset.
- */
- pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3;
-
- wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
-
- for (i = 0; i < wr->num_sge; i++) {
-
- /*
- * Use a 128MB page size. This and an imposed 128MB
- * sge length limit allows us to require only a 2-entry HW
- * PBL for each SGE. This restriction is acceptable since
- * since it is not possible to allocate 128MB of contiguous
- * DMA coherent memory!
- */
- if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN)
- return -EINVAL;
- wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT;
-
- /*
- * T3 restricts a recv to all zero-stag or all non-zero-stag.
- */
- if (wr->sg_list[i].lkey != 0)
- return -EINVAL;
- wqe->recv.sgl[i].stag = 0;
- wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
- wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
- wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset);
- pbl_offset += 2;
- }
- for (; i < T3_MAX_SGE; i++) {
- wqe->recv.pagesz[i] = 0;
- wqe->recv.sgl[i].stag = 0;
- wqe->recv.sgl[i].len = 0;
- wqe->recv.sgl[i].to = 0;
- wqe->recv.pbl_addr[i] = 0;
- }
- qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
- qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
- qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
- qhp->wq.rq_size_log2)].pbl_addr = pbl_addr;
- return 0;
-}
-
-int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
- const struct ib_send_wr **bad_wr)
-{
- int err = 0;
- u8 uninitialized_var(t3_wr_flit_cnt);
- enum t3_wr_opcode t3_wr_opcode = 0;
- enum t3_wr_flags t3_wr_flags;
- struct iwch_qp *qhp;
- u32 idx;
- union t3_wr *wqe;
- u32 num_wrs;
- unsigned long flag;
- struct t3_swsq *sqp;
- int wr_cnt = 1;
-
- qhp = to_iwch_qp(ibqp);
- spin_lock_irqsave(&qhp->lock, flag);
- if (qhp->attr.state > IWCH_QP_STATE_RTS) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- err = -EINVAL;
- goto out;
- }
- num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
- qhp->wq.sq_size_log2);
- if (num_wrs == 0) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- err = -ENOMEM;
- goto out;
- }
- while (wr) {
- if (num_wrs == 0) {
- err = -ENOMEM;
- break;
- }
- idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
- wqe = (union t3_wr *) (qhp->wq.queue + idx);
- t3_wr_flags = 0;
- if (wr->send_flags & IB_SEND_SOLICITED)
- t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
- if (wr->send_flags & IB_SEND_SIGNALED)
- t3_wr_flags |= T3_COMPLETION_FLAG;
- sqp = qhp->wq.sq +
- Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
- switch (wr->opcode) {
- case IB_WR_SEND:
- case IB_WR_SEND_WITH_INV:
- if (wr->send_flags & IB_SEND_FENCE)
- t3_wr_flags |= T3_READ_FENCE_FLAG;
- t3_wr_opcode = T3_WR_SEND;
- err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
- break;
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- t3_wr_opcode = T3_WR_WRITE;
- err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
- break;
- case IB_WR_RDMA_READ:
- case IB_WR_RDMA_READ_WITH_INV:
- t3_wr_opcode = T3_WR_READ;
- t3_wr_flags = 0; /* T3 reads are always signaled */
- err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
- if (err)
- break;
- sqp->read_len = wqe->read.local_len;
- if (!qhp->wq.oldest_read)
- qhp->wq.oldest_read = sqp;
- break;
- case IB_WR_REG_MR:
- t3_wr_opcode = T3_WR_FASTREG;
- err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt,
- &wr_cnt, &qhp->wq);
- break;
- case IB_WR_LOCAL_INV:
- if (wr->send_flags & IB_SEND_FENCE)
- t3_wr_flags |= T3_LOCAL_FENCE_FLAG;
- t3_wr_opcode = T3_WR_INV_STAG;
- err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt);
- break;
- default:
- pr_debug("%s post of type=%d TBD!\n", __func__,
- wr->opcode);
- err = -EINVAL;
- }
- if (err)
- break;
- wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
- sqp->wr_id = wr->wr_id;
- sqp->opcode = wr2opcode(t3_wr_opcode);
- sqp->sq_wptr = qhp->wq.sq_wptr;
- sqp->complete = 0;
- sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED);
-
- build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
- Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
- 0, t3_wr_flit_cnt,
- (wr_cnt == 1) ? T3_SOPEOP : T3_SOP);
- pr_debug("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
- __func__, (unsigned long long)wr->wr_id, idx,
- Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
- sqp->opcode);
- wr = wr->next;
- num_wrs--;
- qhp->wq.wptr += wr_cnt;
- ++(qhp->wq.sq_wptr);
- }
- spin_unlock_irqrestore(&qhp->lock, flag);
- if (cxio_wq_db_enabled(&qhp->wq))
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
-
-out:
- if (err)
- *bad_wr = wr;
- return err;
-}
-
-int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
- const struct ib_recv_wr **bad_wr)
-{
- int err = 0;
- struct iwch_qp *qhp;
- u32 idx;
- union t3_wr *wqe;
- u32 num_wrs;
- unsigned long flag;
-
- qhp = to_iwch_qp(ibqp);
- spin_lock_irqsave(&qhp->lock, flag);
- if (qhp->attr.state > IWCH_QP_STATE_RTS) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- err = -EINVAL;
- goto out;
- }
- num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
- qhp->wq.rq_size_log2) - 1;
- if (!wr) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- err = -ENOMEM;
- goto out;
- }
- while (wr) {
- if (wr->num_sge > T3_MAX_SGE) {
- err = -EINVAL;
- break;
- }
- idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
- wqe = (union t3_wr *) (qhp->wq.queue + idx);
- if (num_wrs)
- if (wr->sg_list[0].lkey)
- err = build_rdma_recv(qhp, wqe, wr);
- else
- err = build_zero_stag_recv(qhp, wqe, wr);
- else
- err = -ENOMEM;
-
- if (err)
- break;
-
- build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
- Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
- 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
- pr_debug("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x wqe %p\n",
- __func__, (unsigned long long)wr->wr_id,
- idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
- ++(qhp->wq.rq_wptr);
- ++(qhp->wq.wptr);
- wr = wr->next;
- num_wrs--;
- }
- spin_unlock_irqrestore(&qhp->lock, flag);
- if (cxio_wq_db_enabled(&qhp->wq))
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
-
-out:
- if (err)
- *bad_wr = wr;
- return err;
-}
-
-static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
- u8 *layer_type, u8 *ecode)
-{
- int status = TPT_ERR_INTERNAL_ERR;
- int tagged = 0;
- int opcode = -1;
- int rqtype = 0;
- int send_inv = 0;
-
- if (rsp_msg) {
- status = CQE_STATUS(rsp_msg->cqe);
- opcode = CQE_OPCODE(rsp_msg->cqe);
- rqtype = RQ_TYPE(rsp_msg->cqe);
- send_inv = (opcode == T3_SEND_WITH_INV) ||
- (opcode == T3_SEND_WITH_SE_INV);
- tagged = (opcode == T3_RDMA_WRITE) ||
- (rqtype && (opcode == T3_READ_RESP));
- }
-
- switch (status) {
- case TPT_ERR_STAG:
- if (send_inv) {
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
- *ecode = RDMAP_CANT_INV_STAG;
- } else {
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_INV_STAG;
- }
- break;
- case TPT_ERR_PDID:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- if ((opcode == T3_SEND_WITH_INV) ||
- (opcode == T3_SEND_WITH_SE_INV))
- *ecode = RDMAP_CANT_INV_STAG;
- else
- *ecode = RDMAP_STAG_NOT_ASSOC;
- break;
- case TPT_ERR_QPID:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_STAG_NOT_ASSOC;
- break;
- case TPT_ERR_ACCESS:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_ACC_VIOL;
- break;
- case TPT_ERR_WRAP:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_TO_WRAP;
- break;
- case TPT_ERR_BOUND:
- if (tagged) {
- *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
- *ecode = DDPT_BASE_BOUNDS;
- } else {
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_BASE_BOUNDS;
- }
- break;
- case TPT_ERR_INVALIDATE_SHARED_MR:
- case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
- *ecode = RDMAP_CANT_INV_STAG;
- break;
- case TPT_ERR_ECC:
- case TPT_ERR_ECC_PSTAG:
- case TPT_ERR_INTERNAL_ERR:
- *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
- *ecode = 0;
- break;
- case TPT_ERR_OUT_OF_RQE:
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_INV_MSN_NOBUF;
- break;
- case TPT_ERR_PBL_ADDR_BOUND:
- *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
- *ecode = DDPT_BASE_BOUNDS;
- break;
- case TPT_ERR_CRC:
- *layer_type = LAYER_MPA|DDP_LLP;
- *ecode = MPA_CRC_ERR;
- break;
- case TPT_ERR_MARKER:
- *layer_type = LAYER_MPA|DDP_LLP;
- *ecode = MPA_MARKER_ERR;
- break;
- case TPT_ERR_PDU_LEN_ERR:
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_MSG_TOOBIG;
- break;
- case TPT_ERR_DDP_VERSION:
- if (tagged) {
- *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
- *ecode = DDPT_INV_VERS;
- } else {
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_INV_VERS;
- }
- break;
- case TPT_ERR_RDMA_VERSION:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
- *ecode = RDMAP_INV_VERS;
- break;
- case TPT_ERR_OPCODE:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
- *ecode = RDMAP_INV_OPCODE;
- break;
- case TPT_ERR_DDP_QUEUE_NUM:
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_INV_QN;
- break;
- case TPT_ERR_MSN:
- case TPT_ERR_MSN_GAP:
- case TPT_ERR_MSN_RANGE:
- case TPT_ERR_IRD_OVERFLOW:
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_INV_MSN_RANGE;
- break;
- case TPT_ERR_TBIT:
- *layer_type = LAYER_DDP|DDP_LOCAL_CATA;
- *ecode = 0;
- break;
- case TPT_ERR_MO:
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_INV_MO;
- break;
- default:
- *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
- *ecode = 0;
- break;
- }
-}
-
-int iwch_post_zb_read(struct iwch_ep *ep)
-{
- union t3_wr *wqe;
- struct sk_buff *skb;
- u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
-
- pr_debug("%s enter\n", __func__);
- skb = alloc_skb(40, GFP_KERNEL);
- if (!skb) {
- pr_err("%s cannot send zb_read!!\n", __func__);
- return -ENOMEM;
- }
- wqe = skb_put_zero(skb, sizeof(struct t3_rdma_read_wr));
- wqe->read.rdmaop = T3_READ_REQ;
- wqe->read.reserved[0] = 0;
- wqe->read.reserved[1] = 0;
- wqe->read.rem_stag = cpu_to_be32(1);
- wqe->read.rem_to = cpu_to_be64(1);
- wqe->read.local_stag = cpu_to_be32(1);
- wqe->read.local_len = cpu_to_be32(0);
- wqe->read.local_to = cpu_to_be64(1);
- wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ));
- wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(ep->hwtid)|
- V_FW_RIWR_LEN(flit_cnt));
- skb->priority = CPL_PRIORITY_DATA;
- return iwch_cxgb3_ofld_send(ep->com.qp->rhp->rdev.t3cdev_p, skb);
-}
-
-/*
- * This posts a TERMINATE with layer=RDMA, type=catastrophic.
- */
-int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
-{
- union t3_wr *wqe;
- struct terminate_message *term;
- struct sk_buff *skb;
-
- pr_debug("%s %d\n", __func__, __LINE__);
- skb = alloc_skb(40, GFP_ATOMIC);
- if (!skb) {
- pr_err("%s cannot send TERMINATE!\n", __func__);
- return -ENOMEM;
- }
- wqe = skb_put_zero(skb, 40);
- wqe->send.rdmaop = T3_TERMINATE;
-
- /* immediate data length */
- wqe->send.plen = htonl(4);
-
- /* immediate data starts here. */
- term = (struct terminate_message *)wqe->send.sgl;
- build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
- wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) |
- V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
- wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid));
- skb->priority = CPL_PRIORITY_DATA;
- return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
-}
-
-/*
- * Assumes qhp lock is held.
- */
-static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
- struct iwch_cq *schp)
- __releases(&qhp->lock)
- __acquires(&qhp->lock)
-{
- int count;
- int flushed;
-
- lockdep_assert_held(&qhp->lock);
-
- pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
- /* take a ref on the qhp since we must release the lock */
- atomic_inc(&qhp->refcnt);
- spin_unlock(&qhp->lock);
-
- /* locking hierarchy: cq lock first, then qp lock. */
- spin_lock(&rchp->lock);
- spin_lock(&qhp->lock);
- cxio_flush_hw_cq(&rchp->cq);
- cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
- flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
- spin_unlock(&qhp->lock);
- spin_unlock(&rchp->lock);
- if (flushed) {
- spin_lock(&rchp->comp_handler_lock);
- (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
- spin_unlock(&rchp->comp_handler_lock);
- }
-
- /* locking hierarchy: cq lock first, then qp lock. */
- spin_lock(&schp->lock);
- spin_lock(&qhp->lock);
- cxio_flush_hw_cq(&schp->cq);
- cxio_count_scqes(&schp->cq, &qhp->wq, &count);
- flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
- spin_unlock(&qhp->lock);
- spin_unlock(&schp->lock);
- if (flushed) {
- spin_lock(&schp->comp_handler_lock);
- (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
- spin_unlock(&schp->comp_handler_lock);
- }
-
- /* deref */
- if (atomic_dec_and_test(&qhp->refcnt))
- wake_up(&qhp->wait);
-
- spin_lock(&qhp->lock);
-}
-
-static void flush_qp(struct iwch_qp *qhp)
-{
- struct iwch_cq *rchp, *schp;
-
- rchp = get_chp(qhp->rhp, qhp->attr.rcq);
- schp = get_chp(qhp->rhp, qhp->attr.scq);
-
- if (qhp->ibqp.uobject) {
- cxio_set_wq_in_error(&qhp->wq);
- cxio_set_cq_in_error(&rchp->cq);
- spin_lock(&rchp->comp_handler_lock);
- (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
- spin_unlock(&rchp->comp_handler_lock);
- if (schp != rchp) {
- cxio_set_cq_in_error(&schp->cq);
- spin_lock(&schp->comp_handler_lock);
- (*schp->ibcq.comp_handler)(&schp->ibcq,
- schp->ibcq.cq_context);
- spin_unlock(&schp->comp_handler_lock);
- }
- return;
- }
- __flush_qp(qhp, rchp, schp);
-}
-
-
-/*
- * Return count of RECV WRs posted
- */
-u16 iwch_rqes_posted(struct iwch_qp *qhp)
-{
- union t3_wr *wqe = qhp->wq.queue;
- u16 count = 0;
-
- while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
- count++;
- wqe++;
- }
- pr_debug("%s qhp %p count %u\n", __func__, qhp, count);
- return count;
-}
-
-static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
- enum iwch_qp_attr_mask mask,
- struct iwch_qp_attributes *attrs)
-{
- struct t3_rdma_init_attr init_attr;
- int ret;
-
- init_attr.tid = qhp->ep->hwtid;
- init_attr.qpid = qhp->wq.qpid;
- init_attr.pdid = qhp->attr.pd;
- init_attr.scqid = qhp->attr.scq;
- init_attr.rcqid = qhp->attr.rcq;
- init_attr.rq_addr = qhp->wq.rq_addr;
- init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
- init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE |
- qhp->attr.mpa_attr.recv_marker_enabled |
- (qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
- (qhp->attr.mpa_attr.crc_enabled << 2);
-
- init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE |
- uP_RI_QP_RDMA_WRITE_ENABLE |
- uP_RI_QP_BIND_ENABLE;
- if (!qhp->ibqp.uobject)
- init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE |
- uP_RI_QP_FAST_REGISTER_ENABLE;
-
- init_attr.tcp_emss = qhp->ep->emss;
- init_attr.ord = qhp->attr.max_ord;
- init_attr.ird = qhp->attr.max_ird;
- init_attr.qp_dma_addr = qhp->wq.dma_addr;
- init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
- init_attr.rqe_count = iwch_rqes_posted(qhp);
- init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
- init_attr.chan = qhp->ep->l2t->smt_idx;
- if (peer2peer) {
- init_attr.rtr_type = RTR_READ;
- if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
- init_attr.ord = 1;
- if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator)
- init_attr.ird = 1;
- } else
- init_attr.rtr_type = 0;
- init_attr.irs = qhp->ep->rcv_seq;
- pr_debug("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d flags 0x%x qpcaps 0x%x\n",
- __func__,
- init_attr.rq_addr, init_attr.rq_size,
- init_attr.flags, init_attr.qpcaps);
- ret = cxio_rdma_init(&rhp->rdev, &init_attr);
- pr_debug("%s ret %d\n", __func__, ret);
- return ret;
-}
-
-int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
- enum iwch_qp_attr_mask mask,
- struct iwch_qp_attributes *attrs,
- int internal)
-{
- int ret = 0;
- struct iwch_qp_attributes newattr = qhp->attr;
- unsigned long flag;
- int disconnect = 0;
- int terminate = 0;
- int abort = 0;
- int free = 0;
- struct iwch_ep *ep = NULL;
-
- pr_debug("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__,
- qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
- (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
-
- spin_lock_irqsave(&qhp->lock, flag);
-
- /* Process attr changes if in IDLE */
- if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
- if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
- ret = -EIO;
- goto out;
- }
- if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
- newattr.enable_rdma_read = attrs->enable_rdma_read;
- if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
- newattr.enable_rdma_write = attrs->enable_rdma_write;
- if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
- newattr.enable_bind = attrs->enable_bind;
- if (mask & IWCH_QP_ATTR_MAX_ORD) {
- if (attrs->max_ord >
- rhp->attr.max_rdma_read_qp_depth) {
- ret = -EINVAL;
- goto out;
- }
- newattr.max_ord = attrs->max_ord;
- }
- if (mask & IWCH_QP_ATTR_MAX_IRD) {
- if (attrs->max_ird >
- rhp->attr.max_rdma_reads_per_qp) {
- ret = -EINVAL;
- goto out;
- }
- newattr.max_ird = attrs->max_ird;
- }
- qhp->attr = newattr;
- }
-
- if (!(mask & IWCH_QP_ATTR_NEXT_STATE))
- goto out;
- if (qhp->attr.state == attrs->next_state)
- goto out;
-
- switch (qhp->attr.state) {
- case IWCH_QP_STATE_IDLE:
- switch (attrs->next_state) {
- case IWCH_QP_STATE_RTS:
- if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
- ret = -EINVAL;
- goto out;
- }
- if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
- ret = -EINVAL;
- goto out;
- }
- qhp->attr.mpa_attr = attrs->mpa_attr;
- qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
- qhp->ep = qhp->attr.llp_stream_handle;
- qhp->attr.state = IWCH_QP_STATE_RTS;
-
- /*
- * Ref the endpoint here and deref when we
- * disassociate the endpoint from the QP. This
- * happens in CLOSING->IDLE transition or *->ERROR
- * transition.
- */
- get_ep(&qhp->ep->com);
- spin_unlock_irqrestore(&qhp->lock, flag);
- ret = rdma_init(rhp, qhp, mask, attrs);
- spin_lock_irqsave(&qhp->lock, flag);
- if (ret)
- goto err;
- break;
- case IWCH_QP_STATE_ERROR:
- qhp->attr.state = IWCH_QP_STATE_ERROR;
- flush_qp(qhp);
- break;
- default:
- ret = -EINVAL;
- goto out;
- }
- break;
- case IWCH_QP_STATE_RTS:
- switch (attrs->next_state) {
- case IWCH_QP_STATE_CLOSING:
- BUG_ON(kref_read(&qhp->ep->com.kref) < 2);
- qhp->attr.state = IWCH_QP_STATE_CLOSING;
- if (!internal) {
- abort=0;
- disconnect = 1;
- ep = qhp->ep;
- get_ep(&ep->com);
- }
- break;
- case IWCH_QP_STATE_TERMINATE:
- qhp->attr.state = IWCH_QP_STATE_TERMINATE;
- if (qhp->ibqp.uobject)
- cxio_set_wq_in_error(&qhp->wq);
- if (!internal)
- terminate = 1;
- break;
- case IWCH_QP_STATE_ERROR:
- qhp->attr.state = IWCH_QP_STATE_ERROR;
- if (!internal) {
- abort=1;
- disconnect = 1;
- ep = qhp->ep;
- get_ep(&ep->com);
- }
- goto err;
- break;
- default:
- ret = -EINVAL;
- goto out;
- }
- break;
- case IWCH_QP_STATE_CLOSING:
- if (!internal) {
- ret = -EINVAL;
- goto out;
- }
- switch (attrs->next_state) {
- case IWCH_QP_STATE_IDLE:
- flush_qp(qhp);
- qhp->attr.state = IWCH_QP_STATE_IDLE;
- qhp->attr.llp_stream_handle = NULL;
- put_ep(&qhp->ep->com);
- qhp->ep = NULL;
- wake_up(&qhp->wait);
- break;
- case IWCH_QP_STATE_ERROR:
- goto err;
- default:
- ret = -EINVAL;
- goto err;
- }
- break;
- case IWCH_QP_STATE_ERROR:
- if (attrs->next_state != IWCH_QP_STATE_IDLE) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) ||
- !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
- ret = -EINVAL;
- goto out;
- }
- qhp->attr.state = IWCH_QP_STATE_IDLE;
- break;
- case IWCH_QP_STATE_TERMINATE:
- if (!internal) {
- ret = -EINVAL;
- goto out;
- }
- goto err;
- break;
- default:
- pr_err("%s in a bad state %d\n", __func__, qhp->attr.state);
- ret = -EINVAL;
- goto err;
- break;
- }
- goto out;
-err:
- pr_debug("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
- qhp->wq.qpid);
-
- /* disassociate the LLP connection */
- qhp->attr.llp_stream_handle = NULL;
- ep = qhp->ep;
- qhp->ep = NULL;
- qhp->attr.state = IWCH_QP_STATE_ERROR;
- free=1;
- wake_up(&qhp->wait);
- BUG_ON(!ep);
- flush_qp(qhp);
-out:
- spin_unlock_irqrestore(&qhp->lock, flag);
-
- if (terminate)
- iwch_post_terminate(qhp, NULL);
-
- /*
- * If disconnect is 1, then we need to initiate a disconnect
- * on the EP. This can be a normal close (RTS->CLOSING) or
- * an abnormal close (RTS/CLOSING->ERROR).
- */
- if (disconnect) {
- iwch_ep_disconnect(ep, abort, GFP_KERNEL);
- put_ep(&ep->com);
- }
-
- /*
- * If free is 1, then we've disassociated the EP from the QP
- * and we need to dereference the EP.
- */
- if (free)
- put_ep(&ep->com);
-
- pr_debug("%s exit state %d\n", __func__, qhp->attr.state);
- return ret;
-}
diff --git a/drivers/infiniband/hw/cxgb3/tcb.h b/drivers/infiniband/hw/cxgb3/tcb.h
deleted file mode 100644
index c702dc199e18..000000000000
--- a/drivers/infiniband/hw/cxgb3/tcb.h
+++ /dev/null
@@ -1,632 +0,0 @@
-/*
- * Copyright (c) 2007 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _TCB_DEFS_H
-#define _TCB_DEFS_H
-
-#define W_TCB_T_STATE 0
-#define S_TCB_T_STATE 0
-#define M_TCB_T_STATE 0xfULL
-#define V_TCB_T_STATE(x) ((x) << S_TCB_T_STATE)
-
-#define W_TCB_TIMER 0
-#define S_TCB_TIMER 4
-#define M_TCB_TIMER 0x1ULL
-#define V_TCB_TIMER(x) ((x) << S_TCB_TIMER)
-
-#define W_TCB_DACK_TIMER 0
-#define S_TCB_DACK_TIMER 5
-#define M_TCB_DACK_TIMER 0x1ULL
-#define V_TCB_DACK_TIMER(x) ((x) << S_TCB_DACK_TIMER)
-
-#define W_TCB_DEL_FLAG 0
-#define S_TCB_DEL_FLAG 6
-#define M_TCB_DEL_FLAG 0x1ULL
-#define V_TCB_DEL_FLAG(x) ((x) << S_TCB_DEL_FLAG)
-
-#define W_TCB_L2T_IX 0
-#define S_TCB_L2T_IX 7
-#define M_TCB_L2T_IX 0x7ffULL
-#define V_TCB_L2T_IX(x) ((x) << S_TCB_L2T_IX)
-
-#define W_TCB_SMAC_SEL 0
-#define S_TCB_SMAC_SEL 18
-#define M_TCB_SMAC_SEL 0x3ULL
-#define V_TCB_SMAC_SEL(x) ((x) << S_TCB_SMAC_SEL)
-
-#define W_TCB_TOS 0
-#define S_TCB_TOS 20
-#define M_TCB_TOS 0x3fULL
-#define V_TCB_TOS(x) ((x) << S_TCB_TOS)
-
-#define W_TCB_MAX_RT 0
-#define S_TCB_MAX_RT 26
-#define M_TCB_MAX_RT 0xfULL
-#define V_TCB_MAX_RT(x) ((x) << S_TCB_MAX_RT)
-
-#define W_TCB_T_RXTSHIFT 0
-#define S_TCB_T_RXTSHIFT 30
-#define M_TCB_T_RXTSHIFT 0xfULL
-#define V_TCB_T_RXTSHIFT(x) ((x) << S_TCB_T_RXTSHIFT)
-
-#define W_TCB_T_DUPACKS 1
-#define S_TCB_T_DUPACKS 2
-#define M_TCB_T_DUPACKS 0xfULL
-#define V_TCB_T_DUPACKS(x) ((x) << S_TCB_T_DUPACKS)
-
-#define W_TCB_T_MAXSEG 1
-#define S_TCB_T_MAXSEG 6
-#define M_TCB_T_MAXSEG 0xfULL
-#define V_TCB_T_MAXSEG(x) ((x) << S_TCB_T_MAXSEG)
-
-#define W_TCB_T_FLAGS1 1
-#define S_TCB_T_FLAGS1 10
-#define M_TCB_T_FLAGS1 0xffffffffULL
-#define V_TCB_T_FLAGS1(x) ((x) << S_TCB_T_FLAGS1)
-
-#define W_TCB_T_MIGRATION 1
-#define S_TCB_T_MIGRATION 20
-#define M_TCB_T_MIGRATION 0x1ULL
-#define V_TCB_T_MIGRATION(x) ((x) << S_TCB_T_MIGRATION)
-
-#define W_TCB_T_FLAGS2 2
-#define S_TCB_T_FLAGS2 10
-#define M_TCB_T_FLAGS2 0x7fULL
-#define V_TCB_T_FLAGS2(x) ((x) << S_TCB_T_FLAGS2)
-
-#define W_TCB_SND_SCALE 2
-#define S_TCB_SND_SCALE 17
-#define M_TCB_SND_SCALE 0xfULL
-#define V_TCB_SND_SCALE(x) ((x) << S_TCB_SND_SCALE)
-
-#define W_TCB_RCV_SCALE 2
-#define S_TCB_RCV_SCALE 21
-#define M_TCB_RCV_SCALE 0xfULL
-#define V_TCB_RCV_SCALE(x) ((x) << S_TCB_RCV_SCALE)
-
-#define W_TCB_SND_UNA_RAW 2
-#define S_TCB_SND_UNA_RAW 25
-#define M_TCB_SND_UNA_RAW 0x7ffffffULL
-#define V_TCB_SND_UNA_RAW(x) ((x) << S_TCB_SND_UNA_RAW)
-
-#define W_TCB_SND_NXT_RAW 3
-#define S_TCB_SND_NXT_RAW 20
-#define M_TCB_SND_NXT_RAW 0x7ffffffULL
-#define V_TCB_SND_NXT_RAW(x) ((x) << S_TCB_SND_NXT_RAW)
-
-#define W_TCB_RCV_NXT 4
-#define S_TCB_RCV_NXT 15
-#define M_TCB_RCV_NXT 0xffffffffULL
-#define V_TCB_RCV_NXT(x) ((x) << S_TCB_RCV_NXT)
-
-#define W_TCB_RCV_ADV 5
-#define S_TCB_RCV_ADV 15
-#define M_TCB_RCV_ADV 0xffffULL
-#define V_TCB_RCV_ADV(x) ((x) << S_TCB_RCV_ADV)
-
-#define W_TCB_SND_MAX_RAW 5
-#define S_TCB_SND_MAX_RAW 31
-#define M_TCB_SND_MAX_RAW 0x7ffffffULL
-#define V_TCB_SND_MAX_RAW(x) ((x) << S_TCB_SND_MAX_RAW)
-
-#define W_TCB_SND_CWND 6
-#define S_TCB_SND_CWND 26
-#define M_TCB_SND_CWND 0x7ffffffULL
-#define V_TCB_SND_CWND(x) ((x) << S_TCB_SND_CWND)
-
-#define W_TCB_SND_SSTHRESH 7
-#define S_TCB_SND_SSTHRESH 21
-#define M_TCB_SND_SSTHRESH 0x7ffffffULL
-#define V_TCB_SND_SSTHRESH(x) ((x) << S_TCB_SND_SSTHRESH)
-
-#define W_TCB_T_RTT_TS_RECENT_AGE 8
-#define S_TCB_T_RTT_TS_RECENT_AGE 16
-#define M_TCB_T_RTT_TS_RECENT_AGE 0xffffffffULL
-#define V_TCB_T_RTT_TS_RECENT_AGE(x) ((x) << S_TCB_T_RTT_TS_RECENT_AGE)
-
-#define W_TCB_T_RTSEQ_RECENT 9
-#define S_TCB_T_RTSEQ_RECENT 16
-#define M_TCB_T_RTSEQ_RECENT 0xffffffffULL
-#define V_TCB_T_RTSEQ_RECENT(x) ((x) << S_TCB_T_RTSEQ_RECENT)
-
-#define W_TCB_T_SRTT 10
-#define S_TCB_T_SRTT 16
-#define M_TCB_T_SRTT 0xffffULL
-#define V_TCB_T_SRTT(x) ((x) << S_TCB_T_SRTT)
-
-#define W_TCB_T_RTTVAR 11
-#define S_TCB_T_RTTVAR 0
-#define M_TCB_T_RTTVAR 0xffffULL
-#define V_TCB_T_RTTVAR(x) ((x) << S_TCB_T_RTTVAR)
-
-#define W_TCB_TS_LAST_ACK_SENT_RAW 11
-#define S_TCB_TS_LAST_ACK_SENT_RAW 16
-#define M_TCB_TS_LAST_ACK_SENT_RAW 0x7ffffffULL
-#define V_TCB_TS_LAST_ACK_SENT_RAW(x) ((x) << S_TCB_TS_LAST_ACK_SENT_RAW)
-
-#define W_TCB_DIP 12
-#define S_TCB_DIP 11
-#define M_TCB_DIP 0xffffffffULL
-#define V_TCB_DIP(x) ((x) << S_TCB_DIP)
-
-#define W_TCB_SIP 13
-#define S_TCB_SIP 11
-#define M_TCB_SIP 0xffffffffULL
-#define V_TCB_SIP(x) ((x) << S_TCB_SIP)
-
-#define W_TCB_DP 14
-#define S_TCB_DP 11
-#define M_TCB_DP 0xffffULL
-#define V_TCB_DP(x) ((x) << S_TCB_DP)
-
-#define W_TCB_SP 14
-#define S_TCB_SP 27
-#define M_TCB_SP 0xffffULL
-#define V_TCB_SP(x) ((x) << S_TCB_SP)
-
-#define W_TCB_TIMESTAMP 15
-#define S_TCB_TIMESTAMP 11
-#define M_TCB_TIMESTAMP 0xffffffffULL
-#define V_TCB_TIMESTAMP(x) ((x) << S_TCB_TIMESTAMP)
-
-#define W_TCB_TIMESTAMP_OFFSET 16
-#define S_TCB_TIMESTAMP_OFFSET 11
-#define M_TCB_TIMESTAMP_OFFSET 0xfULL
-#define V_TCB_TIMESTAMP_OFFSET(x) ((x) << S_TCB_TIMESTAMP_OFFSET)
-
-#define W_TCB_TX_MAX 16
-#define S_TCB_TX_MAX 15
-#define M_TCB_TX_MAX 0xffffffffULL
-#define V_TCB_TX_MAX(x) ((x) << S_TCB_TX_MAX)
-
-#define W_TCB_TX_HDR_PTR_RAW 17
-#define S_TCB_TX_HDR_PTR_RAW 15
-#define M_TCB_TX_HDR_PTR_RAW 0x1ffffULL
-#define V_TCB_TX_HDR_PTR_RAW(x) ((x) << S_TCB_TX_HDR_PTR_RAW)
-
-#define W_TCB_TX_LAST_PTR_RAW 18
-#define S_TCB_TX_LAST_PTR_RAW 0
-#define M_TCB_TX_LAST_PTR_RAW 0x1ffffULL
-#define V_TCB_TX_LAST_PTR_RAW(x) ((x) << S_TCB_TX_LAST_PTR_RAW)
-
-#define W_TCB_TX_COMPACT 18
-#define S_TCB_TX_COMPACT 17
-#define M_TCB_TX_COMPACT 0x1ULL
-#define V_TCB_TX_COMPACT(x) ((x) << S_TCB_TX_COMPACT)
-
-#define W_TCB_RX_COMPACT 18
-#define S_TCB_RX_COMPACT 18
-#define M_TCB_RX_COMPACT 0x1ULL
-#define V_TCB_RX_COMPACT(x) ((x) << S_TCB_RX_COMPACT)
-
-#define W_TCB_RCV_WND 18
-#define S_TCB_RCV_WND 19
-#define M_TCB_RCV_WND 0x7ffffffULL
-#define V_TCB_RCV_WND(x) ((x) << S_TCB_RCV_WND)
-
-#define W_TCB_RX_HDR_OFFSET 19
-#define S_TCB_RX_HDR_OFFSET 14
-#define M_TCB_RX_HDR_OFFSET 0x7ffffffULL
-#define V_TCB_RX_HDR_OFFSET(x) ((x) << S_TCB_RX_HDR_OFFSET)
-
-#define W_TCB_RX_FRAG0_START_IDX_RAW 20
-#define S_TCB_RX_FRAG0_START_IDX_RAW 9
-#define M_TCB_RX_FRAG0_START_IDX_RAW 0x7ffffffULL
-#define V_TCB_RX_FRAG0_START_IDX_RAW(x) ((x) << S_TCB_RX_FRAG0_START_IDX_RAW)
-
-#define W_TCB_RX_FRAG1_START_IDX_OFFSET 21
-#define S_TCB_RX_FRAG1_START_IDX_OFFSET 4
-#define M_TCB_RX_FRAG1_START_IDX_OFFSET 0x7ffffffULL
-#define V_TCB_RX_FRAG1_START_IDX_OFFSET(x) ((x) << S_TCB_RX_FRAG1_START_IDX_OFFSET)
-
-#define W_TCB_RX_FRAG0_LEN 21
-#define S_TCB_RX_FRAG0_LEN 31
-#define M_TCB_RX_FRAG0_LEN 0x7ffffffULL
-#define V_TCB_RX_FRAG0_LEN(x) ((x) << S_TCB_RX_FRAG0_LEN)
-
-#define W_TCB_RX_FRAG1_LEN 22
-#define S_TCB_RX_FRAG1_LEN 26
-#define M_TCB_RX_FRAG1_LEN 0x7ffffffULL
-#define V_TCB_RX_FRAG1_LEN(x) ((x) << S_TCB_RX_FRAG1_LEN)
-
-#define W_TCB_NEWRENO_RECOVER 23
-#define S_TCB_NEWRENO_RECOVER 21
-#define M_TCB_NEWRENO_RECOVER 0x7ffffffULL
-#define V_TCB_NEWRENO_RECOVER(x) ((x) << S_TCB_NEWRENO_RECOVER)
-
-#define W_TCB_PDU_HAVE_LEN 24
-#define S_TCB_PDU_HAVE_LEN 16
-#define M_TCB_PDU_HAVE_LEN 0x1ULL
-#define V_TCB_PDU_HAVE_LEN(x) ((x) << S_TCB_PDU_HAVE_LEN)
-
-#define W_TCB_PDU_LEN 24
-#define S_TCB_PDU_LEN 17
-#define M_TCB_PDU_LEN 0xffffULL
-#define V_TCB_PDU_LEN(x) ((x) << S_TCB_PDU_LEN)
-
-#define W_TCB_RX_QUIESCE 25
-#define S_TCB_RX_QUIESCE 1
-#define M_TCB_RX_QUIESCE 0x1ULL
-#define V_TCB_RX_QUIESCE(x) ((x) << S_TCB_RX_QUIESCE)
-
-#define W_TCB_RX_PTR_RAW 25
-#define S_TCB_RX_PTR_RAW 2
-#define M_TCB_RX_PTR_RAW 0x1ffffULL
-#define V_TCB_RX_PTR_RAW(x) ((x) << S_TCB_RX_PTR_RAW)
-
-#define W_TCB_CPU_NO 25
-#define S_TCB_CPU_NO 19
-#define M_TCB_CPU_NO 0x7fULL
-#define V_TCB_CPU_NO(x) ((x) << S_TCB_CPU_NO)
-
-#define W_TCB_ULP_TYPE 25
-#define S_TCB_ULP_TYPE 26
-#define M_TCB_ULP_TYPE 0xfULL
-#define V_TCB_ULP_TYPE(x) ((x) << S_TCB_ULP_TYPE)
-
-#define W_TCB_RX_FRAG1_PTR_RAW 25
-#define S_TCB_RX_FRAG1_PTR_RAW 30
-#define M_TCB_RX_FRAG1_PTR_RAW 0x1ffffULL
-#define V_TCB_RX_FRAG1_PTR_RAW(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW)
-
-#define W_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 26
-#define S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 15
-#define M_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 0x7ffffffULL
-#define V_TCB_RX_FRAG2_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW)
-
-#define W_TCB_RX_FRAG2_PTR_RAW 27
-#define S_TCB_RX_FRAG2_PTR_RAW 10
-#define M_TCB_RX_FRAG2_PTR_RAW 0x1ffffULL
-#define V_TCB_RX_FRAG2_PTR_RAW(x) ((x) << S_TCB_RX_FRAG2_PTR_RAW)
-
-#define W_TCB_RX_FRAG2_LEN_RAW 27
-#define S_TCB_RX_FRAG2_LEN_RAW 27
-#define M_TCB_RX_FRAG2_LEN_RAW 0x7ffffffULL
-#define V_TCB_RX_FRAG2_LEN_RAW(x) ((x) << S_TCB_RX_FRAG2_LEN_RAW)
-
-#define W_TCB_RX_FRAG3_PTR_RAW 28
-#define S_TCB_RX_FRAG3_PTR_RAW 22
-#define M_TCB_RX_FRAG3_PTR_RAW 0x1ffffULL
-#define V_TCB_RX_FRAG3_PTR_RAW(x) ((x) << S_TCB_RX_FRAG3_PTR_RAW)
-
-#define W_TCB_RX_FRAG3_LEN_RAW 29
-#define S_TCB_RX_FRAG3_LEN_RAW 7
-#define M_TCB_RX_FRAG3_LEN_RAW 0x7ffffffULL
-#define V_TCB_RX_FRAG3_LEN_RAW(x) ((x) << S_TCB_RX_FRAG3_LEN_RAW)
-
-#define W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 30
-#define S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 2
-#define M_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 0x7ffffffULL
-#define V_TCB_RX_FRAG3_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW)
-
-#define W_TCB_PDU_HDR_LEN 30
-#define S_TCB_PDU_HDR_LEN 29
-#define M_TCB_PDU_HDR_LEN 0xffULL
-#define V_TCB_PDU_HDR_LEN(x) ((x) << S_TCB_PDU_HDR_LEN)
-
-#define W_TCB_SLUSH1 31
-#define S_TCB_SLUSH1 5
-#define M_TCB_SLUSH1 0x7ffffULL
-#define V_TCB_SLUSH1(x) ((x) << S_TCB_SLUSH1)
-
-#define W_TCB_ULP_RAW 31
-#define S_TCB_ULP_RAW 24
-#define M_TCB_ULP_RAW 0xffULL
-#define V_TCB_ULP_RAW(x) ((x) << S_TCB_ULP_RAW)
-
-#define W_TCB_DDP_RDMAP_VERSION 25
-#define S_TCB_DDP_RDMAP_VERSION 30
-#define M_TCB_DDP_RDMAP_VERSION 0x1ULL
-#define V_TCB_DDP_RDMAP_VERSION(x) ((x) << S_TCB_DDP_RDMAP_VERSION)
-
-#define W_TCB_MARKER_ENABLE_RX 25
-#define S_TCB_MARKER_ENABLE_RX 31
-#define M_TCB_MARKER_ENABLE_RX 0x1ULL
-#define V_TCB_MARKER_ENABLE_RX(x) ((x) << S_TCB_MARKER_ENABLE_RX)
-
-#define W_TCB_MARKER_ENABLE_TX 26
-#define S_TCB_MARKER_ENABLE_TX 0
-#define M_TCB_MARKER_ENABLE_TX 0x1ULL
-#define V_TCB_MARKER_ENABLE_TX(x) ((x) << S_TCB_MARKER_ENABLE_TX)
-
-#define W_TCB_CRC_ENABLE 26
-#define S_TCB_CRC_ENABLE 1
-#define M_TCB_CRC_ENABLE 0x1ULL
-#define V_TCB_CRC_ENABLE(x) ((x) << S_TCB_CRC_ENABLE)
-
-#define W_TCB_IRS_ULP 26
-#define S_TCB_IRS_ULP 2
-#define M_TCB_IRS_ULP 0x1ffULL
-#define V_TCB_IRS_ULP(x) ((x) << S_TCB_IRS_ULP)
-
-#define W_TCB_ISS_ULP 26
-#define S_TCB_ISS_ULP 11
-#define M_TCB_ISS_ULP 0x1ffULL
-#define V_TCB_ISS_ULP(x) ((x) << S_TCB_ISS_ULP)
-
-#define W_TCB_TX_PDU_LEN 26
-#define S_TCB_TX_PDU_LEN 20
-#define M_TCB_TX_PDU_LEN 0x3fffULL
-#define V_TCB_TX_PDU_LEN(x) ((x) << S_TCB_TX_PDU_LEN)
-
-#define W_TCB_TX_PDU_OUT 27
-#define S_TCB_TX_PDU_OUT 2
-#define M_TCB_TX_PDU_OUT 0x1ULL
-#define V_TCB_TX_PDU_OUT(x) ((x) << S_TCB_TX_PDU_OUT)
-
-#define W_TCB_CQ_IDX_SQ 27
-#define S_TCB_CQ_IDX_SQ 3
-#define M_TCB_CQ_IDX_SQ 0xffffULL
-#define V_TCB_CQ_IDX_SQ(x) ((x) << S_TCB_CQ_IDX_SQ)
-
-#define W_TCB_CQ_IDX_RQ 27
-#define S_TCB_CQ_IDX_RQ 19
-#define M_TCB_CQ_IDX_RQ 0xffffULL
-#define V_TCB_CQ_IDX_RQ(x) ((x) << S_TCB_CQ_IDX_RQ)
-
-#define W_TCB_QP_ID 28
-#define S_TCB_QP_ID 3
-#define M_TCB_QP_ID 0xffffULL
-#define V_TCB_QP_ID(x) ((x) << S_TCB_QP_ID)
-
-#define W_TCB_PD_ID 28
-#define S_TCB_PD_ID 19
-#define M_TCB_PD_ID 0xffffULL
-#define V_TCB_PD_ID(x) ((x) << S_TCB_PD_ID)
-
-#define W_TCB_STAG 29
-#define S_TCB_STAG 3
-#define M_TCB_STAG 0xffffffffULL
-#define V_TCB_STAG(x) ((x) << S_TCB_STAG)
-
-#define W_TCB_RQ_START 30
-#define S_TCB_RQ_START 3
-#define M_TCB_RQ_START 0x3ffffffULL
-#define V_TCB_RQ_START(x) ((x) << S_TCB_RQ_START)
-
-#define W_TCB_RQ_MSN 30
-#define S_TCB_RQ_MSN 29
-#define M_TCB_RQ_MSN 0x3ffULL
-#define V_TCB_RQ_MSN(x) ((x) << S_TCB_RQ_MSN)
-
-#define W_TCB_RQ_MAX_OFFSET 31
-#define S_TCB_RQ_MAX_OFFSET 7
-#define M_TCB_RQ_MAX_OFFSET 0xfULL
-#define V_TCB_RQ_MAX_OFFSET(x) ((x) << S_TCB_RQ_MAX_OFFSET)
-
-#define W_TCB_RQ_WRITE_PTR 31
-#define S_TCB_RQ_WRITE_PTR 11
-#define M_TCB_RQ_WRITE_PTR 0x3ffULL
-#define V_TCB_RQ_WRITE_PTR(x) ((x) << S_TCB_RQ_WRITE_PTR)
-
-#define W_TCB_INB_WRITE_PERM 31
-#define S_TCB_INB_WRITE_PERM 21
-#define M_TCB_INB_WRITE_PERM 0x1ULL
-#define V_TCB_INB_WRITE_PERM(x) ((x) << S_TCB_INB_WRITE_PERM)
-
-#define W_TCB_INB_READ_PERM 31
-#define S_TCB_INB_READ_PERM 22
-#define M_TCB_INB_READ_PERM 0x1ULL
-#define V_TCB_INB_READ_PERM(x) ((x) << S_TCB_INB_READ_PERM)
-
-#define W_TCB_ORD_L_BIT_VLD 31
-#define S_TCB_ORD_L_BIT_VLD 23
-#define M_TCB_ORD_L_BIT_VLD 0x1ULL
-#define V_TCB_ORD_L_BIT_VLD(x) ((x) << S_TCB_ORD_L_BIT_VLD)
-
-#define W_TCB_RDMAP_OPCODE 31
-#define S_TCB_RDMAP_OPCODE 24
-#define M_TCB_RDMAP_OPCODE 0xfULL
-#define V_TCB_RDMAP_OPCODE(x) ((x) << S_TCB_RDMAP_OPCODE)
-
-#define W_TCB_TX_FLUSH 31
-#define S_TCB_TX_FLUSH 28
-#define M_TCB_TX_FLUSH 0x1ULL
-#define V_TCB_TX_FLUSH(x) ((x) << S_TCB_TX_FLUSH)
-
-#define W_TCB_TX_OOS_RXMT 31
-#define S_TCB_TX_OOS_RXMT 29
-#define M_TCB_TX_OOS_RXMT 0x1ULL
-#define V_TCB_TX_OOS_RXMT(x) ((x) << S_TCB_TX_OOS_RXMT)
-
-#define W_TCB_TX_OOS_TXMT 31
-#define S_TCB_TX_OOS_TXMT 30
-#define M_TCB_TX_OOS_TXMT 0x1ULL
-#define V_TCB_TX_OOS_TXMT(x) ((x) << S_TCB_TX_OOS_TXMT)
-
-#define W_TCB_SLUSH_AUX2 31
-#define S_TCB_SLUSH_AUX2 31
-#define M_TCB_SLUSH_AUX2 0x1ULL
-#define V_TCB_SLUSH_AUX2(x) ((x) << S_TCB_SLUSH_AUX2)
-
-#define W_TCB_RX_FRAG1_PTR_RAW2 25
-#define S_TCB_RX_FRAG1_PTR_RAW2 30
-#define M_TCB_RX_FRAG1_PTR_RAW2 0x1ffffULL
-#define V_TCB_RX_FRAG1_PTR_RAW2(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW2)
-
-#define W_TCB_RX_DDP_FLAGS 26
-#define S_TCB_RX_DDP_FLAGS 15
-#define M_TCB_RX_DDP_FLAGS 0x3ffULL
-#define V_TCB_RX_DDP_FLAGS(x) ((x) << S_TCB_RX_DDP_FLAGS)
-
-#define W_TCB_SLUSH_AUX3 26
-#define S_TCB_SLUSH_AUX3 31
-#define M_TCB_SLUSH_AUX3 0x1ffULL
-#define V_TCB_SLUSH_AUX3(x) ((x) << S_TCB_SLUSH_AUX3)
-
-#define W_TCB_RX_DDP_BUF0_OFFSET 27
-#define S_TCB_RX_DDP_BUF0_OFFSET 8
-#define M_TCB_RX_DDP_BUF0_OFFSET 0x3fffffULL
-#define V_TCB_RX_DDP_BUF0_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF0_OFFSET)
-
-#define W_TCB_RX_DDP_BUF0_LEN 27
-#define S_TCB_RX_DDP_BUF0_LEN 30
-#define M_TCB_RX_DDP_BUF0_LEN 0x3fffffULL
-#define V_TCB_RX_DDP_BUF0_LEN(x) ((x) << S_TCB_RX_DDP_BUF0_LEN)
-
-#define W_TCB_RX_DDP_BUF1_OFFSET 28
-#define S_TCB_RX_DDP_BUF1_OFFSET 20
-#define M_TCB_RX_DDP_BUF1_OFFSET 0x3fffffULL
-#define V_TCB_RX_DDP_BUF1_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF1_OFFSET)
-
-#define W_TCB_RX_DDP_BUF1_LEN 29
-#define S_TCB_RX_DDP_BUF1_LEN 10
-#define M_TCB_RX_DDP_BUF1_LEN 0x3fffffULL
-#define V_TCB_RX_DDP_BUF1_LEN(x) ((x) << S_TCB_RX_DDP_BUF1_LEN)
-
-#define W_TCB_RX_DDP_BUF0_TAG 30
-#define S_TCB_RX_DDP_BUF0_TAG 0
-#define M_TCB_RX_DDP_BUF0_TAG 0xffffffffULL
-#define V_TCB_RX_DDP_BUF0_TAG(x) ((x) << S_TCB_RX_DDP_BUF0_TAG)
-
-#define W_TCB_RX_DDP_BUF1_TAG 31
-#define S_TCB_RX_DDP_BUF1_TAG 0
-#define M_TCB_RX_DDP_BUF1_TAG 0xffffffffULL
-#define V_TCB_RX_DDP_BUF1_TAG(x) ((x) << S_TCB_RX_DDP_BUF1_TAG)
-
-#define S_TF_DACK 10
-#define V_TF_DACK(x) ((x) << S_TF_DACK)
-
-#define S_TF_NAGLE 11
-#define V_TF_NAGLE(x) ((x) << S_TF_NAGLE)
-
-#define S_TF_RECV_SCALE 12
-#define V_TF_RECV_SCALE(x) ((x) << S_TF_RECV_SCALE)
-
-#define S_TF_RECV_TSTMP 13
-#define V_TF_RECV_TSTMP(x) ((x) << S_TF_RECV_TSTMP)
-
-#define S_TF_RECV_SACK 14
-#define V_TF_RECV_SACK(x) ((x) << S_TF_RECV_SACK)
-
-#define S_TF_TURBO 15
-#define V_TF_TURBO(x) ((x) << S_TF_TURBO)
-
-#define S_TF_KEEPALIVE 16
-#define V_TF_KEEPALIVE(x) ((x) << S_TF_KEEPALIVE)
-
-#define S_TF_TCAM_BYPASS 17
-#define V_TF_TCAM_BYPASS(x) ((x) << S_TF_TCAM_BYPASS)
-
-#define S_TF_CORE_FIN 18
-#define V_TF_CORE_FIN(x) ((x) << S_TF_CORE_FIN)
-
-#define S_TF_CORE_MORE 19
-#define V_TF_CORE_MORE(x) ((x) << S_TF_CORE_MORE)
-
-#define S_TF_MIGRATING 20
-#define V_TF_MIGRATING(x) ((x) << S_TF_MIGRATING)
-
-#define S_TF_ACTIVE_OPEN 21
-#define V_TF_ACTIVE_OPEN(x) ((x) << S_TF_ACTIVE_OPEN)
-
-#define S_TF_ASK_MODE 22
-#define V_TF_ASK_MODE(x) ((x) << S_TF_ASK_MODE)
-
-#define S_TF_NON_OFFLOAD 23
-#define V_TF_NON_OFFLOAD(x) ((x) << S_TF_NON_OFFLOAD)
-
-#define S_TF_MOD_SCHD 24
-#define V_TF_MOD_SCHD(x) ((x) << S_TF_MOD_SCHD)
-
-#define S_TF_MOD_SCHD_REASON0 25
-#define V_TF_MOD_SCHD_REASON0(x) ((x) << S_TF_MOD_SCHD_REASON0)
-
-#define S_TF_MOD_SCHD_REASON1 26
-#define V_TF_MOD_SCHD_REASON1(x) ((x) << S_TF_MOD_SCHD_REASON1)
-
-#define S_TF_MOD_SCHD_RX 27
-#define V_TF_MOD_SCHD_RX(x) ((x) << S_TF_MOD_SCHD_RX)
-
-#define S_TF_CORE_PUSH 28
-#define V_TF_CORE_PUSH(x) ((x) << S_TF_CORE_PUSH)
-
-#define S_TF_RCV_COALESCE_ENABLE 29
-#define V_TF_RCV_COALESCE_ENABLE(x) ((x) << S_TF_RCV_COALESCE_ENABLE)
-
-#define S_TF_RCV_COALESCE_PUSH 30
-#define V_TF_RCV_COALESCE_PUSH(x) ((x) << S_TF_RCV_COALESCE_PUSH)
-
-#define S_TF_RCV_COALESCE_LAST_PSH 31
-#define V_TF_RCV_COALESCE_LAST_PSH(x) ((x) << S_TF_RCV_COALESCE_LAST_PSH)
-
-#define S_TF_RCV_COALESCE_HEARTBEAT 32
-#define V_TF_RCV_COALESCE_HEARTBEAT(x) ((x) << S_TF_RCV_COALESCE_HEARTBEAT)
-
-#define S_TF_HALF_CLOSE 33
-#define V_TF_HALF_CLOSE(x) ((x) << S_TF_HALF_CLOSE)
-
-#define S_TF_DACK_MSS 34
-#define V_TF_DACK_MSS(x) ((x) << S_TF_DACK_MSS)
-
-#define S_TF_CCTRL_SEL0 35
-#define V_TF_CCTRL_SEL0(x) ((x) << S_TF_CCTRL_SEL0)
-
-#define S_TF_CCTRL_SEL1 36
-#define V_TF_CCTRL_SEL1(x) ((x) << S_TF_CCTRL_SEL1)
-
-#define S_TF_TCP_NEWRENO_FAST_RECOVERY 37
-#define V_TF_TCP_NEWRENO_FAST_RECOVERY(x) ((x) << S_TF_TCP_NEWRENO_FAST_RECOVERY)
-
-#define S_TF_TX_PACE_AUTO 38
-#define V_TF_TX_PACE_AUTO(x) ((x) << S_TF_TX_PACE_AUTO)
-
-#define S_TF_PEER_FIN_HELD 39
-#define V_TF_PEER_FIN_HELD(x) ((x) << S_TF_PEER_FIN_HELD)
-
-#define S_TF_CORE_URG 40
-#define V_TF_CORE_URG(x) ((x) << S_TF_CORE_URG)
-
-#define S_TF_RDMA_ERROR 41
-#define V_TF_RDMA_ERROR(x) ((x) << S_TF_RDMA_ERROR)
-
-#define S_TF_SSWS_DISABLED 42
-#define V_TF_SSWS_DISABLED(x) ((x) << S_TF_SSWS_DISABLED)
-
-#define S_TF_DUPACK_COUNT_ODD 43
-#define V_TF_DUPACK_COUNT_ODD(x) ((x) << S_TF_DUPACK_COUNT_ODD)
-
-#define S_TF_TX_CHANNEL 44
-#define V_TF_TX_CHANNEL(x) ((x) << S_TF_TX_CHANNEL)
-
-#define S_TF_RX_CHANNEL 45
-#define V_TF_RX_CHANNEL(x) ((x) << S_TF_RX_CHANNEL)
-
-#define S_TF_TX_PACE_FIXED 46
-#define V_TF_TX_PACE_FIXED(x) ((x) << S_TF_TX_PACE_FIXED)
-
-#define S_TF_RDMA_FLM_ERROR 47
-#define V_TF_RDMA_FLM_ERROR(x) ((x) << S_TF_RDMA_FLM_ERROR)
-
-#define S_TF_RX_FLOW_CONTROL_DISABLE 48
-#define V_TF_RX_FLOW_CONTROL_DISABLE(x) ((x) << S_TF_RX_FLOW_CONTROL_DISABLE)
-
-#endif /* _TCB_DEFS_H */
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index e87fc0408470..d69dece3b1d5 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -495,7 +495,6 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
release_ep_resources(ep);
- kfree_skb(skb);
return 0;
}
@@ -506,7 +505,6 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
c4iw_put_ep(&ep->parent_ep->com);
release_ep_resources(ep);
- kfree_skb(skb);
return 0;
}
@@ -2424,20 +2422,6 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
pr_debug("ep %p tid %u\n", ep, ep->hwtid);
-
- skb_get(skb);
- rpl = cplhdr(skb);
- if (!is_t4(adapter_type)) {
- skb_trim(skb, roundup(sizeof(*rpl5), 16));
- rpl5 = (void *)rpl;
- INIT_TP_WR(rpl5, ep->hwtid);
- } else {
- skb_trim(skb, sizeof(*rpl));
- INIT_TP_WR(rpl, ep->hwtid);
- }
- OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
- ep->hwtid));
-
cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
enable_tcp_timestamps && req->tcpopt.tstamp,
(ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
@@ -2483,6 +2467,20 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
if (tcph->ece && tcph->cwr)
opt2 |= CCTRL_ECN_V(1);
}
+
+ skb_get(skb);
+ rpl = cplhdr(skb);
+ if (!is_t4(adapter_type)) {
+ skb_trim(skb, roundup(sizeof(*rpl5), 16));
+ rpl5 = (void *)rpl;
+ INIT_TP_WR(rpl5, ep->hwtid);
+ } else {
+ skb_trim(skb, sizeof(*rpl));
+ INIT_TP_WR(rpl, ep->hwtid);
+ }
+ OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+ ep->hwtid));
+
if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
u32 isn = (prandom_u32() & ~7UL) - 1;
opt2 |= T5_OPT_2_VALID_F;
@@ -3038,6 +3036,10 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
}
+ /* As per draft-hilland-iwarp-verbs-v1.0, sec 6.2.3,
+ * when entering the TERM state the RNIC MUST initiate a CLOSE.
+ */
+ c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
c4iw_put_ep(&ep->com);
} else
pr_warn("TERM received tid %u no ep/qp\n", tid);
@@ -3381,7 +3383,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
err = pick_local_ipaddrs(dev, cm_id);
if (err)
- goto fail2;
+ goto fail3;
}
/* find a route */
@@ -3403,7 +3405,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
err = pick_local_ip6addrs(dev, cm_id);
if (err)
- goto fail2;
+ goto fail3;
}
/* find a route */
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index a8b9548bd1a2..599340c1f0b8 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -242,10 +242,13 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep,
}
}
-static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd)
+static int dump_qp(unsigned long id, struct c4iw_qp *qp,
+ struct c4iw_debugfs_data *qpd)
{
int space;
int cc;
+ if (id != qp->wq.sq.qid)
+ return 0;
space = qpd->bufsize - qpd->pos - 1;
if (space == 0)
@@ -350,7 +353,7 @@ static int qp_open(struct inode *inode, struct file *file)
xa_lock_irq(&qpd->devp->qps);
xa_for_each(&qpd->devp->qps, index, qp)
- dump_qp(qp, qpd);
+ dump_qp(index, qp, qpd);
xa_unlock_irq(&qpd->devp->qps);
qpd->buf[qpd->pos++] = 0;
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index aa772ee0706f..962dc97a8ff2 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -275,13 +275,17 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp)
{
int err;
- struct fw_ri_tpte tpt;
+ struct fw_ri_tpte *tpt;
u32 stag_idx;
static atomic_t key;
if (c4iw_fatal_error(rdev))
return -EIO;
+ tpt = kmalloc(sizeof(*tpt), GFP_KERNEL);
+ if (!tpt)
+ return -ENOMEM;
+
stag_state = stag_state > 0;
stag_idx = (*stag) >> 8;
@@ -291,6 +295,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
mutex_lock(&rdev->stats.lock);
rdev->stats.stag.fail++;
mutex_unlock(&rdev->stats.lock);
+ kfree(tpt);
return -ENOMEM;
}
mutex_lock(&rdev->stats.lock);
@@ -305,28 +310,28 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
/* write TPT entry */
if (reset_tpt_entry)
- memset(&tpt, 0, sizeof(tpt));
+ memset(tpt, 0, sizeof(*tpt));
else {
- tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
+ tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) |
FW_RI_TPTE_STAGSTATE_V(stag_state) |
FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid));
- tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
+ tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
(bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) |
FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO :
FW_RI_VA_BASED_TO))|
FW_RI_TPTE_PS_V(page_size));
- tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
+ tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3));
- tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
- tpt.va_hi = cpu_to_be32((u32)(to >> 32));
- tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
- tpt.dca_mwbcnt_pstag = cpu_to_be32(0);
- tpt.len_hi = cpu_to_be32((u32)(len >> 32));
+ tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
+ tpt->va_hi = cpu_to_be32((u32)(to >> 32));
+ tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
+ tpt->dca_mwbcnt_pstag = cpu_to_be32(0);
+ tpt->len_hi = cpu_to_be32((u32)(len >> 32));
}
err = write_adapter_mem(rdev, stag_idx +
(rdev->lldi.vr->stag.start >> 5),
- sizeof(tpt), &tpt, skb, wr_waitp);
+ sizeof(*tpt), tpt, skb, wr_waitp);
if (reset_tpt_entry) {
c4iw_put_resource(&rdev->resource.tpt_table, stag_idx);
@@ -334,6 +339,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
rdev->stats.stag.cur -= 32;
mutex_unlock(&rdev->stats.lock);
}
+ kfree(tpt);
return err;
}
@@ -537,7 +543,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mhp->rhp = rhp;
- mhp->umem = ib_umem_get(udata, start, length, acc, 0);
+ mhp->umem = ib_umem_get(pd->device, start, length, acc);
if (IS_ERR(mhp->umem))
goto err_free_skb;
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 5e59c5708729..ba83d942997c 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -305,31 +305,10 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
static int c4iw_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
- struct c4iw_dev *dev;
- struct net_device *netdev;
- struct in_device *inetdev;
-
+ int ret = 0;
pr_debug("ibdev %p\n", ibdev);
-
- dev = to_c4iw_dev(ibdev);
- netdev = dev->rdev.lldi.ports[port-1];
- /* props being zeroed by the caller, avoid zeroing it here */
- props->max_mtu = IB_MTU_4096;
- props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
-
- if (!netif_carrier_ok(netdev))
- props->state = IB_PORT_DOWN;
- else {
- inetdev = in_dev_get(netdev);
- if (inetdev) {
- if (inetdev->ifa_list)
- props->state = IB_PORT_ACTIVE;
- else
- props->state = IB_PORT_INIT;
- in_dev_put(inetdev);
- } else
- props->state = IB_PORT_INIT;
- }
+ ret = ib_get_eth_speed(ibdev, port, &props->active_speed,
+ &props->active_width);
props->port_cap_flags =
IB_PORT_CM_SUP |
@@ -339,11 +318,9 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
props->gid_tbl_len = 1;
props->pkey_tbl_len = 1;
- props->active_width = 2;
- props->active_speed = IB_SPEED_DDR;
props->max_msg_sz = -1;
- return 0;
+ return ret;
}
static ssize_t hw_rev_show(struct device *dev,
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index eb9368be28c1..89ac2f9ae6dd 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1948,10 +1948,10 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
qhp->attr.layer_etype = attrs->layer_etype;
qhp->attr.ecode = attrs->ecode;
ep = qhp->ep;
- c4iw_get_ep(&ep->com);
- disconnect = 1;
if (!internal) {
+ c4iw_get_ep(&ep->com);
terminate = 1;
+ disconnect = 1;
} else {
terminate = qhp->attr.send_term;
ret = rdma_fini(rhp, qhp, ep);
@@ -2737,15 +2737,11 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
srq->flags = T4_SRQ_LIMIT_SUPPORT;
- ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL);
- if (ret)
- goto err_free_queue;
-
if (udata) {
srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
if (!srq_key_mm) {
ret = -ENOMEM;
- goto err_remove_handle;
+ goto err_free_queue;
}
srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
if (!srq_db_key_mm) {
@@ -2789,8 +2785,6 @@ err_free_srq_db_key_mm:
kfree(srq_db_key_mm);
err_free_srq_key_mm:
kfree(srq_key_mm);
-err_remove_handle:
- xa_erase_irq(&rhp->qps, srq->wq.qid);
err_free_queue:
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
srq->wr_waitp);
@@ -2813,8 +2807,6 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
rhp = srq->rhp;
pr_debug("%s id %d\n", __func__, srq->wq.qid);
-
- xa_erase_irq(&rhp->qps, srq->wq.qid);
ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
ibucontext);
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index 119f8efec564..aa7396a1588a 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -60,8 +60,6 @@ struct efa_dev {
u64 mem_bar_len;
u64 db_bar_addr;
u64 db_bar_len;
- u8 addr[EFA_GID_SIZE];
- u32 mtu;
int admin_msix_vector_idx;
struct efa_irq admin_irq;
@@ -71,8 +69,6 @@ struct efa_dev {
struct efa_ucontext {
struct ib_ucontext ibucontext;
- struct xarray mmap_xa;
- u32 mmap_xa_page;
u16 uarn;
};
@@ -91,6 +87,7 @@ struct efa_cq {
struct efa_ucontext *ucontext;
dma_addr_t dma_addr;
void *cpu_addr;
+ struct rdma_user_mmap_entry *mmap_entry;
size_t size;
u16 cq_idx;
};
@@ -101,6 +98,13 @@ struct efa_qp {
void *rq_cpu_addr;
size_t rq_size;
enum ib_qp_state state;
+
+ /* Used for saving mmap_xa entries */
+ struct rdma_user_mmap_entry *sq_db_mmap_entry;
+ struct rdma_user_mmap_entry *llq_desc_mmap_entry;
+ struct rdma_user_mmap_entry *rq_db_mmap_entry;
+ struct rdma_user_mmap_entry *rq_mmap_entry;
+
u32 qp_handle;
u32 max_send_wr;
u32 max_recv_wr;
@@ -147,6 +151,7 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata);
void efa_dealloc_ucontext(struct ib_ucontext *ibucontext);
int efa_mmap(struct ib_ucontext *ibucontext,
struct vm_area_struct *vma);
+void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
int efa_create_ah(struct ib_ah *ibah,
struct rdma_ah_attr *ah_attr,
u32 flags,
@@ -156,5 +161,8 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_udata *udata);
enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
u8 port_num);
+struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num);
+int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u8 port_num, int index);
#endif /* _EFA_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
index 2be0469d545f..74b787a90660 100644
--- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -160,10 +160,16 @@ struct efa_admin_create_qp_resp {
/* Common Admin Queue completion descriptor */
struct efa_admin_acq_common_desc acq_common_desc;
- /* Opaque handle to be used for consequent operations on the QP */
+ /*
+ * Opaque handle to be used for consequent admin operations on the
+ * QP
+ */
u32 qp_handle;
- /* QP number in the given EFA virtual device */
+ /*
+ * QP number in the given EFA virtual device. Least-significant bits
+ * (as needed according to max_qp) carry unique QP ID
+ */
u16 qp_num;
/* MBZ */
@@ -286,6 +292,7 @@ struct efa_admin_create_ah_cmd {
/* PD number */
u16 pd;
+ /* MBZ */
u16 reserved;
};
@@ -296,6 +303,7 @@ struct efa_admin_create_ah_resp {
/* Target interface address handle (opaque) */
u16 ah;
+ /* MBZ */
u16 reserved;
};
@@ -362,12 +370,17 @@ struct efa_admin_reg_mr_cmd {
/*
* permissions
- * 0 : local_write_enable - Write permissions: value
- * of 1 needed for RQ buffers and for RDMA write
- * 7:1 : reserved1 - remote access flags, etc
+ * 0 : local_write_enable - Local write permissions:
+ * must be set for RQ buffers and buffers posted for
+ * RDMA Read requests
+ * 1 : reserved1 - MBZ
+ * 2 : remote_read_enable - Remote read permissions:
+ * must be set to enable RDMA read from the region
+ * 7:3 : reserved2 - MBZ
*/
u8 permissions;
+ /* MBZ */
u16 reserved16_w5;
/* number of pages in PBL (redundant, could be calculated) */
@@ -415,20 +428,20 @@ struct efa_admin_create_cq_cmd {
struct efa_admin_aq_common_desc aq_common_desc;
/*
- * 4:0 : reserved5
+ * 4:0 : reserved5 - MBZ
* 5 : interrupt_mode_enabled - if set, cq operates
* in interrupt mode (i.e. CQ events and MSI-X are
* generated), otherwise - polling
* 6 : virt - If set, ring base address is virtual
* (IOVA returned by MR registration)
- * 7 : reserved6
+ * 7 : reserved6 - MBZ
*/
u8 cq_caps_1;
/*
* 4:0 : cq_entry_size_words - size of CQ entry in
* 32-bit words, valid values: 4, 8.
- * 7:5 : reserved7
+ * 7:5 : reserved7 - MBZ
*/
u8 cq_caps_2;
@@ -474,6 +487,7 @@ struct efa_admin_destroy_cq_cmd {
u16 cq_idx;
+ /* MBZ */
u16 reserved1;
};
@@ -526,7 +540,7 @@ struct efa_admin_get_set_feature_common_desc {
/*
* 1:0 : select - 0x1 - current value; 0x3 - default
* value
- * 7:3 : reserved3
+ * 7:3 : reserved3 - MBZ
*/
u8 flags;
@@ -553,38 +567,49 @@ struct efa_admin_feature_device_attr_desc {
/* Bar used for SQ and RQ doorbells */
u16 db_bar;
- /* Indicates how many bits are used physical address access */
+ /* Indicates how many bits are used on physical address access */
u8 phys_addr_width;
- /* Indicates how many bits are used virtual address access */
+ /* Indicates how many bits are used on virtual address access */
u8 virt_addr_width;
+
+ /*
+ * 0 : rdma_read - If set, RDMA Read is supported on
+ * TX queues
+ * 31:1 : reserved - MBZ
+ */
+ u32 device_caps;
+
+ /* Max RDMA transfer size in bytes */
+ u32 max_rdma_size;
};
struct efa_admin_feature_queue_attr_desc {
/* The maximum number of queue pairs supported */
u32 max_qp;
+ /* Maximum number of WQEs per Send Queue */
u32 max_sq_depth;
- /* max send wr used in inline-buf */
+ /* Maximum size of data that can be sent inline in a Send WQE */
u32 inline_buf_size;
+ /* Maximum number of buffer descriptors per Recv Queue */
u32 max_rq_depth;
/* The maximum number of completion queues supported per VF */
u32 max_cq;
+ /* Maximum number of CQEs per Completion Queue */
u32 max_cq_depth;
/* Number of sub-CQs to be created for each CQ */
u16 sub_cqs_per_cq;
+ /* MBZ */
u16 reserved;
- /*
- * Maximum number of SGEs (buffs) allowed for a single send work
- * queue element (WQE)
- */
+ /* Maximum number of SGEs (buffers) allowed for a single send WQE */
u16 max_wr_send_sges;
/* Maximum number of SGEs allowed for a single recv WQE */
@@ -604,6 +629,9 @@ struct efa_admin_feature_queue_attr_desc {
/* The maximum size of LLQ in bytes */
u32 max_llq_size;
+
+ /* Maximum number of SGEs for a single RDMA read WQE */
+ u16 max_wr_rdma_sges;
};
struct efa_admin_feature_aenq_desc {
@@ -618,6 +646,7 @@ struct efa_admin_feature_network_attr_desc {
/* Raw address data in network byte order */
u8 addr[16];
+ /* max packet payload size in bytes */
u32 mtu;
};
@@ -780,6 +809,8 @@ struct efa_admin_mmio_req_read_less_resp {
#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_SHIFT 7
#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7)
#define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK BIT(0)
+#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_SHIFT 2
+#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_MASK BIT(2)
/* create_cq_cmd */
#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5
@@ -791,4 +822,7 @@ struct efa_admin_mmio_req_read_less_resp {
/* get_set_feature_common_desc */
#define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0)
+/* feature_device_attr_desc */
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0)
+
#endif /* _EFA_ADMIN_CMDS_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
index 2cb42484b0f8..0778f4f7dccd 100644
--- a/drivers/infiniband/hw/efa/efa_com.c
+++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -109,17 +109,19 @@ static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
} while (time_is_after_jiffies(exp_time));
if (read_resp->req_id != mmio_read->seq_num) {
- ibdev_err(edev->efa_dev,
- "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n",
- mmio_read->seq_num, offset, read_resp->req_id,
- read_resp->reg_off);
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n",
+ mmio_read->seq_num, offset, read_resp->req_id,
+ read_resp->reg_off);
err = EFA_MMIO_READ_INVALID;
goto out;
}
if (read_resp->reg_off != offset) {
- ibdev_err(edev->efa_dev,
- "Reading register failed: wrong offset provided\n");
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Reading register failed: wrong offset provided\n");
err = EFA_MMIO_READ_INVALID;
goto out;
}
@@ -293,9 +295,10 @@ static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq,
u16 ctx_id = cmd_id & (aq->depth - 1);
if (aq->comp_ctx[ctx_id].occupied && capture) {
- ibdev_err(aq->efa_dev,
- "Completion context for command_id %#x is occupied\n",
- cmd_id);
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "Completion context for command_id %#x is occupied\n",
+ cmd_id);
return NULL;
}
@@ -314,6 +317,7 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu
struct efa_admin_acq_entry *comp,
size_t comp_size_in_bytes)
{
+ struct efa_admin_aq_entry *aqe;
struct efa_comp_ctx *comp_ctx;
u16 queue_size_mask;
u16 cmd_id;
@@ -347,7 +351,9 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu
reinit_completion(&comp_ctx->wait_event);
- memcpy(&aq->sq.entries[pi], cmd, cmd_size_in_bytes);
+ aqe = &aq->sq.entries[pi];
+ memset(aqe, 0, sizeof(*aqe));
+ memcpy(aqe, cmd, cmd_size_in_bytes);
aq->sq.pc++;
atomic64_inc(&aq->stats.submitted_cmd);
@@ -401,7 +407,7 @@ static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue
spin_lock(&aq->sq.lock);
if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) {
- ibdev_err(aq->efa_dev, "Admin queue is closed\n");
+ ibdev_err_ratelimited(aq->efa_dev, "Admin queue is closed\n");
spin_unlock(&aq->sq.lock);
return ERR_PTR(-ENODEV);
}
@@ -519,8 +525,9 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c
break;
if (time_is_before_jiffies(timeout)) {
- ibdev_err(aq->efa_dev,
- "Wait for completion (polling) timeout\n");
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "Wait for completion (polling) timeout\n");
/* EFA didn't have any completion */
atomic64_inc(&aq->stats.no_completion);
@@ -561,17 +568,19 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com
atomic64_inc(&aq->stats.no_completion);
if (comp_ctx->status == EFA_CMD_COMPLETED)
- ibdev_err(aq->efa_dev,
- "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
- efa_com_cmd_str(comp_ctx->cmd_opcode),
- comp_ctx->cmd_opcode, comp_ctx->status,
- comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
+ efa_com_cmd_str(comp_ctx->cmd_opcode),
+ comp_ctx->cmd_opcode, comp_ctx->status,
+ comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
else
- ibdev_err(aq->efa_dev,
- "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
- efa_com_cmd_str(comp_ctx->cmd_opcode),
- comp_ctx->cmd_opcode, comp_ctx->status,
- comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
+ efa_com_cmd_str(comp_ctx->cmd_opcode),
+ comp_ctx->cmd_opcode, comp_ctx->status,
+ comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
err = -ETIME;
@@ -633,10 +642,11 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
cmd->aq_common_descriptor.opcode);
comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size);
if (IS_ERR(comp_ctx)) {
- ibdev_err(aq->efa_dev,
- "Failed to submit command %s (opcode %u) err %ld\n",
- efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
- cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "Failed to submit command %s (opcode %u) err %ld\n",
+ efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
+ cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
up(&aq->avail_cmds);
return PTR_ERR(comp_ctx);
@@ -644,11 +654,12 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
err = efa_com_wait_and_process_admin_cq(comp_ctx, aq);
if (err)
- ibdev_err(aq->efa_dev,
- "Failed to process command %s (opcode %u) comp_status %d err %d\n",
- efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
- cmd->aq_common_descriptor.opcode,
- comp_ctx->comp_status, err);
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "Failed to process command %s (opcode %u) comp_status %d err %d\n",
+ efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
+ cmd->aq_common_descriptor.opcode, comp_ctx->comp_status,
+ err);
up(&aq->avail_cmds);
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
index 62345d8abf3c..e20bd84a1014 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.c
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -44,7 +44,8 @@ int efa_com_create_qp(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to create qp [%d]\n", err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to create qp [%d]\n", err);
return err;
}
@@ -82,9 +83,10 @@ int efa_com_modify_qp(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&resp,
sizeof(resp));
if (err) {
- ibdev_err(edev->efa_dev,
- "Failed to modify qp-%u modify_mask[%#x] [%d]\n",
- cmd.qp_handle, cmd.modify_mask, err);
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to modify qp-%u modify_mask[%#x] [%d]\n",
+ cmd.qp_handle, cmd.modify_mask, err);
return err;
}
@@ -109,8 +111,9 @@ int efa_com_query_qp(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&resp,
sizeof(resp));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to query qp-%u [%d]\n",
- cmd.qp_handle, err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to query qp-%u [%d]\n",
+ cmd.qp_handle, err);
return err;
}
@@ -139,8 +142,9 @@ int efa_com_destroy_qp(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to destroy qp-%u [%d]\n",
- qp_cmd.qp_handle, err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to destroy qp-%u [%d]\n",
+ qp_cmd.qp_handle, err);
return err;
}
@@ -173,7 +177,8 @@ int efa_com_create_cq(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to create cq[%d]\n", err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to create cq[%d]\n", err);
return err;
}
@@ -201,8 +206,9 @@ int efa_com_destroy_cq(struct efa_com_dev *edev,
sizeof(destroy_resp));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to destroy CQ-%u [%d]\n",
- params->cq_idx, err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to destroy CQ-%u [%d]\n",
+ params->cq_idx, err);
return err;
}
@@ -224,8 +230,7 @@ int efa_com_register_mr(struct efa_com_dev *edev,
mr_cmd.flags |= params->page_shift &
EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK;
mr_cmd.iova = params->iova;
- mr_cmd.permissions |= params->permissions &
- EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK;
+ mr_cmd.permissions = params->permissions;
if (params->inline_pbl) {
memcpy(mr_cmd.pbl.inline_pbl_array,
@@ -250,7 +255,8 @@ int efa_com_register_mr(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to register mr [%d]\n", err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to register mr [%d]\n", err);
return err;
}
@@ -277,9 +283,9 @@ int efa_com_dereg_mr(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
if (err) {
- ibdev_err(edev->efa_dev,
- "Failed to de-register mr(lkey-%u) [%d]\n",
- mr_cmd.l_key, err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to de-register mr(lkey-%u) [%d]\n",
+ mr_cmd.l_key, err);
return err;
}
@@ -306,8 +312,9 @@ int efa_com_create_ah(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to create ah for %pI6 [%d]\n",
- ah_cmd.dest_addr, err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to create ah for %pI6 [%d]\n",
+ ah_cmd.dest_addr, err);
return err;
}
@@ -334,8 +341,9 @@ int efa_com_destroy_ah(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to destroy ah-%d pd-%d [%d]\n",
- ah_cmd.ah, ah_cmd.pd, err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to destroy ah-%d pd-%d [%d]\n",
+ ah_cmd.ah, ah_cmd.pd, err);
return err;
}
@@ -367,8 +375,9 @@ static int efa_com_get_feature_ex(struct efa_com_dev *edev,
int err;
if (!efa_com_check_supported_feature_id(edev, feature_id)) {
- ibdev_err(edev->efa_dev, "Feature %d isn't supported\n",
- feature_id);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Feature %d isn't supported\n",
+ feature_id);
return -EOPNOTSUPP;
}
@@ -396,9 +405,10 @@ static int efa_com_get_feature_ex(struct efa_com_dev *edev,
sizeof(*get_resp));
if (err) {
- ibdev_err(edev->efa_dev,
- "Failed to submit get_feature command %d [%d]\n",
- feature_id, err);
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to submit get_feature command %d [%d]\n",
+ feature_id, err);
return err;
}
@@ -412,27 +422,6 @@ static int efa_com_get_feature(struct efa_com_dev *edev,
return efa_com_get_feature_ex(edev, get_resp, feature_id, 0, 0);
}
-int efa_com_get_network_attr(struct efa_com_dev *edev,
- struct efa_com_get_network_attr_result *result)
-{
- struct efa_admin_get_feature_resp resp;
- int err;
-
- err = efa_com_get_feature(edev, &resp,
- EFA_ADMIN_NETWORK_ATTR);
- if (err) {
- ibdev_err(edev->efa_dev,
- "Failed to get network attributes %d\n", err);
- return err;
- }
-
- memcpy(result->addr, resp.u.network_attr.addr,
- sizeof(resp.u.network_attr.addr));
- result->mtu = resp.u.network_attr.mtu;
-
- return 0;
-}
-
int efa_com_get_device_attr(struct efa_com_dev *edev,
struct efa_com_get_device_attr_result *result)
{
@@ -441,8 +430,9 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
err = efa_com_get_feature(edev, &resp, EFA_ADMIN_DEVICE_ATTR);
if (err) {
- ibdev_err(edev->efa_dev, "Failed to get device attributes %d\n",
- err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to get device attributes %d\n",
+ err);
return err;
}
@@ -454,11 +444,14 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
result->phys_addr_width = resp.u.device_attr.phys_addr_width;
result->virt_addr_width = resp.u.device_attr.virt_addr_width;
result->db_bar = resp.u.device_attr.db_bar;
+ result->max_rdma_size = resp.u.device_attr.max_rdma_size;
+ result->device_caps = resp.u.device_attr.device_caps;
if (result->admin_api_version < 1) {
- ibdev_err(edev->efa_dev,
- "Failed to get device attr api version [%u < 1]\n",
- result->admin_api_version);
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to get device attr api version [%u < 1]\n",
+ result->admin_api_version);
return -EINVAL;
}
@@ -466,8 +459,9 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
err = efa_com_get_feature(edev, &resp,
EFA_ADMIN_QUEUE_ATTR);
if (err) {
- ibdev_err(edev->efa_dev,
- "Failed to get network attributes %d\n", err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to get queue attributes %d\n",
+ err);
return err;
}
@@ -485,6 +479,19 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
result->max_ah = resp.u.queue_attr.max_ah;
result->max_llq_size = resp.u.queue_attr.max_llq_size;
result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq;
+ result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges;
+
+ err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR);
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to get network attributes %d\n",
+ err);
+ return err;
+ }
+
+ memcpy(result->addr, resp.u.network_attr.addr,
+ sizeof(resp.u.network_attr.addr));
+ result->mtu = resp.u.network_attr.mtu;
return 0;
}
@@ -497,7 +504,8 @@ int efa_com_get_hw_hints(struct efa_com_dev *edev,
err = efa_com_get_feature(edev, &resp, EFA_ADMIN_HW_HINTS);
if (err) {
- ibdev_err(edev->efa_dev, "Failed to get hw hints %d\n", err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to get hw hints %d\n", err);
return err;
}
@@ -520,8 +528,9 @@ static int efa_com_set_feature_ex(struct efa_com_dev *edev,
int err;
if (!efa_com_check_supported_feature_id(edev, feature_id)) {
- ibdev_err(edev->efa_dev, "Feature %d isn't supported\n",
- feature_id);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Feature %d isn't supported\n",
+ feature_id);
return -EOPNOTSUPP;
}
@@ -545,9 +554,10 @@ static int efa_com_set_feature_ex(struct efa_com_dev *edev,
sizeof(*set_resp));
if (err) {
- ibdev_err(edev->efa_dev,
- "Failed to submit set_feature command %d error: %d\n",
- feature_id, err);
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to submit set_feature command %d error: %d\n",
+ feature_id, err);
return err;
}
@@ -574,8 +584,9 @@ int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups)
err = efa_com_get_feature(edev, &get_resp, EFA_ADMIN_AENQ_CONFIG);
if (err) {
- ibdev_err(edev->efa_dev, "Failed to get aenq attributes: %d\n",
- err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to get aenq attributes: %d\n",
+ err);
return err;
}
@@ -585,9 +596,10 @@ int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups)
get_resp.u.aenq.enabled_groups);
if ((get_resp.u.aenq.supported_groups & groups) != groups) {
- ibdev_err(edev->efa_dev,
- "Trying to set unsupported aenq groups[%#x] supported[%#x]\n",
- groups, get_resp.u.aenq.supported_groups);
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Trying to set unsupported aenq groups[%#x] supported[%#x]\n",
+ groups, get_resp.u.aenq.supported_groups);
return -EOPNOTSUPP;
}
@@ -595,8 +607,9 @@ int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups)
err = efa_com_set_feature(edev, &set_resp, &cmd,
EFA_ADMIN_AENQ_CONFIG);
if (err) {
- ibdev_err(edev->efa_dev, "Failed to set aenq attributes: %d\n",
- err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to set aenq attributes: %d\n",
+ err);
return err;
}
@@ -619,7 +632,8 @@ int efa_com_alloc_pd(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&resp,
sizeof(resp));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to allocate pd[%d]\n", err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to allocate pd[%d]\n", err);
return err;
}
@@ -645,8 +659,9 @@ int efa_com_dealloc_pd(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&resp,
sizeof(resp));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to deallocate pd-%u [%d]\n",
- cmd.pd, err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to deallocate pd-%u [%d]\n",
+ cmd.pd, err);
return err;
}
@@ -669,7 +684,8 @@ int efa_com_alloc_uar(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&resp,
sizeof(resp));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to allocate uar[%d]\n", err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to allocate uar[%d]\n", err);
return err;
}
@@ -695,10 +711,47 @@ int efa_com_dealloc_uar(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&resp,
sizeof(resp));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to deallocate uar-%u [%d]\n",
- cmd.uar, err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to deallocate uar-%u [%d]\n",
+ cmd.uar, err);
return err;
}
return 0;
}
+
+int efa_com_get_stats(struct efa_com_dev *edev,
+ struct efa_com_get_stats_params *params,
+ union efa_com_get_stats_result *result)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_aq_get_stats_cmd cmd = {};
+ struct efa_admin_acq_get_stats_resp resp;
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_GET_STATS;
+ cmd.type = params->type;
+ cmd.scope = params->scope;
+ cmd.scope_modifier = params->scope_modifier;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to get stats type-%u scope-%u.%u [%d]\n",
+ cmd.type, cmd.scope, cmd.scope_modifier, err);
+ return err;
+ }
+
+ result->basic_stats.tx_bytes = resp.basic_stats.tx_bytes;
+ result->basic_stats.tx_pkts = resp.basic_stats.tx_pkts;
+ result->basic_stats.rx_bytes = resp.basic_stats.rx_bytes;
+ result->basic_stats.rx_pkts = resp.basic_stats.rx_pkts;
+ result->basic_stats.rx_drops = resp.basic_stats.rx_drops;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
index a1174380462c..31db5a0cbd5b 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.h
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -100,14 +100,11 @@ struct efa_com_destroy_ah_params {
u16 pdn;
};
-struct efa_com_get_network_attr_result {
- u8 addr[EFA_GID_SIZE];
- u32 mtu;
-};
-
struct efa_com_get_device_attr_result {
+ u8 addr[EFA_GID_SIZE];
u64 page_size_cap;
u64 max_mr_pages;
+ u32 mtu;
u32 fw_version;
u32 admin_api_version;
u32 device_version;
@@ -124,9 +121,12 @@ struct efa_com_get_device_attr_result {
u32 max_pd;
u32 max_ah;
u32 max_llq_size;
+ u32 max_rdma_size;
+ u32 device_caps;
u16 sub_cqs_per_cq;
u16 max_sq_sge;
u16 max_rq_sge;
+ u16 max_wr_rdma_sge;
u8 db_bar;
};
@@ -181,12 +181,7 @@ struct efa_com_reg_mr_params {
* address mapping
*/
u8 page_shift;
- /*
- * permissions
- * 0: local_write_enable - Write permissions: value of 1 needed
- * for RQ buffers and for RDMA write:1: reserved1 - remote
- * access flags, etc
- */
+ /* see permissions field of struct efa_admin_reg_mr_cmd */
u8 permissions;
u8 inline_pbl;
u8 indirect;
@@ -225,6 +220,26 @@ struct efa_com_dealloc_uar_params {
u16 uarn;
};
+struct efa_com_get_stats_params {
+ /* see enum efa_admin_get_stats_type */
+ u8 type;
+ /* see enum efa_admin_get_stats_scope */
+ u8 scope;
+ u16 scope_modifier;
+};
+
+struct efa_com_basic_stats {
+ u64 tx_bytes;
+ u64 tx_pkts;
+ u64 rx_bytes;
+ u64 rx_pkts;
+ u64 rx_drops;
+};
+
+union efa_com_get_stats_result {
+ struct efa_com_basic_stats basic_stats;
+};
+
void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low);
int efa_com_create_qp(struct efa_com_dev *edev,
struct efa_com_create_qp_params *params,
@@ -251,8 +266,6 @@ int efa_com_create_ah(struct efa_com_dev *edev,
struct efa_com_create_ah_result *result);
int efa_com_destroy_ah(struct efa_com_dev *edev,
struct efa_com_destroy_ah_params *params);
-int efa_com_get_network_attr(struct efa_com_dev *edev,
- struct efa_com_get_network_attr_result *result);
int efa_com_get_device_attr(struct efa_com_dev *edev,
struct efa_com_get_device_attr_result *result);
int efa_com_get_hw_hints(struct efa_com_dev *edev,
@@ -266,5 +279,8 @@ int efa_com_alloc_uar(struct efa_com_dev *edev,
struct efa_com_alloc_uar_result *result);
int efa_com_dealloc_uar(struct efa_com_dev *edev,
struct efa_com_dealloc_uar_params *params);
+int efa_com_get_stats(struct efa_com_dev *edev,
+ struct efa_com_get_stats_params *params,
+ union efa_com_get_stats_result *result);
#endif /* _EFA_COM_CMD_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index dd1c6d49466f..faf3ff1bca2a 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -30,15 +30,6 @@ MODULE_DEVICE_TABLE(pci, efa_pci_tbl);
(BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
-static void efa_update_network_attr(struct efa_dev *dev,
- struct efa_com_get_network_attr_result *network_attr)
-{
- memcpy(dev->addr, network_attr->addr, sizeof(network_attr->addr));
- dev->mtu = network_attr->mtu;
-
- dev_dbg(&dev->pdev->dev, "Full address %pI6\n", dev->addr);
-}
-
/* This handler will called for unknown event group or unimplemented handlers */
static void unimplemented_aenq_handler(void *data,
struct efa_admin_aenq_entry *aenq_e)
@@ -201,6 +192,7 @@ static const struct ib_device_ops efa_dev_ops = {
.driver_id = RDMA_DRIVER_EFA,
.uverbs_abi_ver = EFA_UVERBS_ABI_VERSION,
+ .alloc_hw_stats = efa_alloc_hw_stats,
.alloc_pd = efa_alloc_pd,
.alloc_ucontext = efa_alloc_ucontext,
.create_ah = efa_create_ah,
@@ -212,9 +204,11 @@ static const struct ib_device_ops efa_dev_ops = {
.destroy_ah = efa_destroy_ah,
.destroy_cq = efa_destroy_cq,
.destroy_qp = efa_destroy_qp,
+ .get_hw_stats = efa_get_hw_stats,
.get_link_layer = efa_port_link_layer,
.get_port_immutable = efa_get_port_immutable,
.mmap = efa_mmap,
+ .mmap_free = efa_mmap_free,
.modify_qp = efa_modify_qp,
.query_device = efa_query_device,
.query_gid = efa_query_gid,
@@ -231,7 +225,6 @@ static const struct ib_device_ops efa_dev_ops = {
static int efa_ib_device_add(struct efa_dev *dev)
{
- struct efa_com_get_network_attr_result network_attr;
struct efa_com_get_hw_hints_result hw_hints;
struct pci_dev *pdev = dev->pdev;
int err;
@@ -247,12 +240,6 @@ static int efa_ib_device_add(struct efa_dev *dev)
if (err)
return err;
- err = efa_com_get_network_attr(&dev->edev, &network_attr);
- if (err)
- goto err_release_doorbell_bar;
-
- efa_update_network_attr(dev, &network_attr);
-
err = efa_com_get_hw_hints(&dev->edev, &hw_hints);
if (err)
goto err_release_doorbell_bar;
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index df77bc312a25..ec5545870554 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -13,10 +13,6 @@
#include "efa.h"
-#define EFA_MMAP_FLAG_SHIFT 56
-#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
-#define EFA_MMAP_INVALID U64_MAX
-
enum {
EFA_MMAP_DMA_PAGE = 0,
EFA_MMAP_IO_WC,
@@ -27,19 +23,38 @@ enum {
(BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
-struct efa_mmap_entry {
- void *obj;
+struct efa_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
u64 address;
- u64 length;
- u32 mmap_page;
u8 mmap_flag;
};
-static inline u64 get_mmap_key(const struct efa_mmap_entry *efa)
-{
- return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) |
- ((u64)efa->mmap_page << PAGE_SHIFT);
-}
+#define EFA_DEFINE_STATS(op) \
+ op(EFA_TX_BYTES, "tx_bytes") \
+ op(EFA_TX_PKTS, "tx_pkts") \
+ op(EFA_RX_BYTES, "rx_bytes") \
+ op(EFA_RX_PKTS, "rx_pkts") \
+ op(EFA_RX_DROPS, "rx_drops") \
+ op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
+ op(EFA_COMPLETED_CMDS, "completed_cmds") \
+ op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
+ op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
+ op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
+ op(EFA_CREATE_QP_ERR, "create_qp_err") \
+ op(EFA_REG_MR_ERR, "reg_mr_err") \
+ op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \
+ op(EFA_CREATE_AH_ERR, "create_ah_err")
+
+#define EFA_STATS_ENUM(ename, name) ename,
+#define EFA_STATS_STR(ename, name) [ename] = name,
+
+enum efa_hw_stats {
+ EFA_DEFINE_STATS(EFA_STATS_ENUM)
+};
+
+static const char *const efa_stats_names[] = {
+ EFA_DEFINE_STATS(EFA_STATS_STR)
+};
#define EFA_CHUNK_PAYLOAD_SHIFT 12
#define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT)
@@ -55,8 +70,6 @@ static inline u64 get_mmap_key(const struct efa_mmap_entry *efa)
#define EFA_CHUNK_USED_SIZE \
((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
-#define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE
-
struct pbl_chunk {
dma_addr_t dma_addr;
u64 *buf;
@@ -120,8 +133,19 @@ static inline struct efa_ah *to_eah(struct ib_ah *ibah)
return container_of(ibah, struct efa_ah, ibah);
}
+static inline struct efa_user_mmap_entry *
+to_emmap(struct rdma_user_mmap_entry *rdma_entry)
+{
+ return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry);
+}
+
+static inline bool is_rdma_read_cap(struct efa_dev *dev)
+{
+ return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK;
+}
+
#define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \
- sizeof(((typeof(x) *)0)->fld) <= (sz))
+ sizeof_field(typeof(x), fld) <= (sz))
#define is_reserved_cleared(reserved) \
!memchr_inv(reserved, 0, sizeof(reserved))
@@ -145,106 +169,6 @@ static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
return addr;
}
-/*
- * This is only called when the ucontext is destroyed and there can be no
- * concurrent query via mmap or allocate on the xarray, thus we can be sure no
- * other thread is using the entry pointer. We also know that all the BAR
- * pages have either been zap'd or munmaped at this point. Normal pages are
- * refcounted and will be freed at the proper time.
- */
-static void mmap_entries_remove_free(struct efa_dev *dev,
- struct efa_ucontext *ucontext)
-{
- struct efa_mmap_entry *entry;
- unsigned long mmap_page;
-
- xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
- xa_erase(&ucontext->mmap_xa, mmap_page);
-
- ibdev_dbg(
- &dev->ibdev,
- "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
- entry->obj, get_mmap_key(entry), entry->address,
- entry->length);
- if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
- /* DMA mapping is already gone, now free the pages */
- free_pages_exact(phys_to_virt(entry->address),
- entry->length);
- kfree(entry);
- }
-}
-
-static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev,
- struct efa_ucontext *ucontext,
- u64 key, u64 len)
-{
- struct efa_mmap_entry *entry;
- u64 mmap_page;
-
- mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT;
- if (mmap_page > U32_MAX)
- return NULL;
-
- entry = xa_load(&ucontext->mmap_xa, mmap_page);
- if (!entry || get_mmap_key(entry) != key || entry->length != len)
- return NULL;
-
- ibdev_dbg(&dev->ibdev,
- "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
- entry->obj, key, entry->address, entry->length);
-
- return entry;
-}
-
-/*
- * Note this locking scheme cannot support removal of entries, except during
- * ucontext destruction when the core code guarentees no concurrency.
- */
-static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext,
- void *obj, u64 address, u64 length, u8 mmap_flag)
-{
- struct efa_mmap_entry *entry;
- u32 next_mmap_page;
- int err;
-
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- return EFA_MMAP_INVALID;
-
- entry->obj = obj;
- entry->address = address;
- entry->length = length;
- entry->mmap_flag = mmap_flag;
-
- xa_lock(&ucontext->mmap_xa);
- if (check_add_overflow(ucontext->mmap_xa_page,
- (u32)(length >> PAGE_SHIFT),
- &next_mmap_page))
- goto err_unlock;
-
- entry->mmap_page = ucontext->mmap_xa_page;
- ucontext->mmap_xa_page = next_mmap_page;
- err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry,
- GFP_KERNEL);
- if (err)
- goto err_unlock;
-
- xa_unlock(&ucontext->mmap_xa);
-
- ibdev_dbg(
- &dev->ibdev,
- "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n",
- entry->obj, entry->address, entry->length, get_mmap_key(entry));
-
- return get_mmap_key(entry);
-
-err_unlock:
- xa_unlock(&ucontext->mmap_xa);
- kfree(entry);
- return EFA_MMAP_INVALID;
-
-}
-
int efa_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *udata)
@@ -279,12 +203,17 @@ int efa_query_device(struct ib_device *ibdev,
dev_attr->max_rq_depth);
props->max_send_sge = dev_attr->max_sq_sge;
props->max_recv_sge = dev_attr->max_rq_sge;
+ props->max_sge_rd = dev_attr->max_wr_rdma_sge;
if (udata && udata->outlen) {
resp.max_sq_sge = dev_attr->max_sq_sge;
resp.max_rq_sge = dev_attr->max_rq_sge;
resp.max_sq_wr = dev_attr->max_sq_depth;
resp.max_rq_wr = dev_attr->max_rq_depth;
+ resp.max_rdma_size = dev_attr->max_rdma_size;
+
+ if (is_rdma_read_cap(dev))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
err = ib_copy_to_udata(udata, &resp,
min(sizeof(resp), udata->outlen));
@@ -306,14 +235,14 @@ int efa_query_port(struct ib_device *ibdev, u8 port,
props->lmc = 1;
props->state = IB_PORT_ACTIVE;
- props->phys_state = 5;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
props->gid_tbl_len = 1;
props->pkey_tbl_len = 1;
props->active_speed = IB_SPEED_EDR;
props->active_width = IB_WIDTH_4X;
- props->max_mtu = ib_mtu_int_to_enum(dev->mtu);
- props->active_mtu = ib_mtu_int_to_enum(dev->mtu);
- props->max_msg_sz = dev->mtu;
+ props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
+ props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
+ props->max_msg_sz = dev->dev_attr.mtu;
props->max_vl_num = 1;
return 0;
@@ -374,7 +303,7 @@ int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
{
struct efa_dev *dev = to_edev(ibdev);
- memcpy(gid->raw, dev->addr, sizeof(dev->addr));
+ memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr));
return 0;
}
@@ -458,6 +387,14 @@ static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
return efa_com_destroy_qp(&dev->edev, &params);
}
+static void efa_qp_user_mmap_entries_remove(struct efa_qp *qp)
+{
+ rdma_user_mmap_entry_remove(qp->rq_mmap_entry);
+ rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry);
+ rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry);
+ rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry);
+}
+
int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct efa_dev *dev = to_edev(ibqp->pd->device);
@@ -478,61 +415,101 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
DMA_TO_DEVICE);
}
+ efa_qp_user_mmap_entries_remove(qp);
kfree(qp);
return 0;
}
+static struct rdma_user_mmap_entry*
+efa_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+ u64 address, size_t length,
+ u8 mmap_flag, u64 *offset)
+{
+ struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ int err;
+
+ if (!entry)
+ return NULL;
+
+ entry->address = address;
+ entry->mmap_flag = mmap_flag;
+
+ err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry,
+ length);
+ if (err) {
+ kfree(entry);
+ return NULL;
+ }
+ *offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+ return &entry->rdma_entry;
+}
+
static int qp_mmap_entries_setup(struct efa_qp *qp,
struct efa_dev *dev,
struct efa_ucontext *ucontext,
struct efa_com_create_qp_params *params,
struct efa_ibv_create_qp_resp *resp)
{
- /*
- * Once an entry is inserted it might be mmapped, hence cannot be
- * cleaned up until dealloc_ucontext.
- */
- resp->sq_db_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- dev->db_bar_addr + resp->sq_db_offset,
- PAGE_SIZE, EFA_MMAP_IO_NC);
- if (resp->sq_db_mmap_key == EFA_MMAP_INVALID)
+ size_t length;
+ u64 address;
+
+ address = dev->db_bar_addr + resp->sq_db_offset;
+ qp->sq_db_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address,
+ PAGE_SIZE, EFA_MMAP_IO_NC,
+ &resp->sq_db_mmap_key);
+ if (!qp->sq_db_mmap_entry)
return -ENOMEM;
resp->sq_db_offset &= ~PAGE_MASK;
- resp->llq_desc_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- dev->mem_bar_addr + resp->llq_desc_offset,
- PAGE_ALIGN(params->sq_ring_size_in_bytes +
- (resp->llq_desc_offset & ~PAGE_MASK)),
- EFA_MMAP_IO_WC);
- if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID)
- return -ENOMEM;
+ address = dev->mem_bar_addr + resp->llq_desc_offset;
+ length = PAGE_ALIGN(params->sq_ring_size_in_bytes +
+ (resp->llq_desc_offset & ~PAGE_MASK));
+
+ qp->llq_desc_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address, length,
+ EFA_MMAP_IO_WC,
+ &resp->llq_desc_mmap_key);
+ if (!qp->llq_desc_mmap_entry)
+ goto err_remove_mmap;
resp->llq_desc_offset &= ~PAGE_MASK;
if (qp->rq_size) {
- resp->rq_db_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- dev->db_bar_addr + resp->rq_db_offset,
- PAGE_SIZE, EFA_MMAP_IO_NC);
- if (resp->rq_db_mmap_key == EFA_MMAP_INVALID)
- return -ENOMEM;
+ address = dev->db_bar_addr + resp->rq_db_offset;
+
+ qp->rq_db_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address, PAGE_SIZE,
+ EFA_MMAP_IO_NC,
+ &resp->rq_db_mmap_key);
+ if (!qp->rq_db_mmap_entry)
+ goto err_remove_mmap;
resp->rq_db_offset &= ~PAGE_MASK;
- resp->rq_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- virt_to_phys(qp->rq_cpu_addr),
- qp->rq_size, EFA_MMAP_DMA_PAGE);
- if (resp->rq_mmap_key == EFA_MMAP_INVALID)
- return -ENOMEM;
+ address = virt_to_phys(qp->rq_cpu_addr);
+ qp->rq_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address, qp->rq_size,
+ EFA_MMAP_DMA_PAGE,
+ &resp->rq_mmap_key);
+ if (!qp->rq_mmap_entry)
+ goto err_remove_mmap;
resp->rq_mmap_size = qp->rq_size;
}
return 0;
+
+err_remove_mmap:
+ efa_qp_user_mmap_entries_remove(qp);
+
+ return -ENOMEM;
}
static int efa_qp_validate_cap(struct efa_dev *dev,
@@ -607,7 +584,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
struct efa_dev *dev = to_edev(ibpd->device);
struct efa_ibv_create_qp_resp resp = {};
struct efa_ibv_create_qp cmd = {};
- bool rq_entry_inserted = false;
struct efa_ucontext *ucontext;
struct efa_qp *qp;
int err;
@@ -715,7 +691,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
if (err)
goto err_destroy_qp;
- rq_entry_inserted = true;
qp->qp_handle = create_qp_resp.qp_handle;
qp->ibqp.qp_num = create_qp_resp.qp_num;
qp->ibqp.qp_type = init_attr->qp_type;
@@ -732,7 +707,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
ibdev_dbg(&dev->ibdev,
"Failed to copy udata for qp[%u]\n",
create_qp_resp.qp_num);
- goto err_destroy_qp;
+ goto err_remove_mmap_entries;
}
}
@@ -740,13 +715,16 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
return &qp->ibqp;
+err_remove_mmap_entries:
+ efa_qp_user_mmap_entries_remove(qp);
err_destroy_qp:
efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
err_free_mapped:
if (qp->rq_size) {
dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
DMA_TO_DEVICE);
- if (!rq_entry_inserted)
+
+ if (!qp->rq_mmap_entry)
free_pages_exact(qp->rq_cpu_addr, qp->rq_size);
}
err_free_qp:
@@ -870,16 +848,18 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
efa_destroy_cq_idx(dev, cq->cq_idx);
dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
DMA_FROM_DEVICE);
+ rdma_user_mmap_entry_remove(cq->mmap_entry);
}
static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
struct efa_ibv_create_cq_resp *resp)
{
resp->q_mmap_size = cq->size;
- resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq,
- virt_to_phys(cq->cpu_addr),
- cq->size, EFA_MMAP_DMA_PAGE);
- if (resp->q_mmap_key == EFA_MMAP_INVALID)
+ cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext,
+ virt_to_phys(cq->cpu_addr),
+ cq->size, EFA_MMAP_DMA_PAGE,
+ &resp->q_mmap_key);
+ if (!cq->mmap_entry)
return -ENOMEM;
return 0;
@@ -897,7 +877,6 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct efa_dev *dev = to_edev(ibdev);
struct efa_ibv_create_cq cmd = {};
struct efa_cq *cq = to_ecq(ibcq);
- bool cq_entry_inserted = false;
int entries = attr->cqe;
int err;
@@ -986,15 +965,13 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
goto err_destroy_cq;
}
- cq_entry_inserted = true;
-
if (udata->outlen) {
err = ib_copy_to_udata(udata, &resp,
min(sizeof(resp), udata->outlen));
if (err) {
ibdev_dbg(ibdev,
"Failed to copy udata for create_cq\n");
- goto err_destroy_cq;
+ goto err_remove_mmap;
}
}
@@ -1003,13 +980,16 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
return 0;
+err_remove_mmap:
+ rdma_user_mmap_entry_remove(cq->mmap_entry);
err_destroy_cq:
efa_destroy_cq_idx(dev, cq->cq_idx);
err_free_mapped:
dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
DMA_FROM_DEVICE);
- if (!cq_entry_inserted)
+ if (!cq->mmap_entry)
free_pages_exact(cq->cpu_addr, cq->size);
+
err_out:
atomic64_inc(&dev->stats.sw_stats.create_cq_err);
return err;
@@ -1369,12 +1349,13 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
struct efa_com_reg_mr_params params = {};
struct efa_com_reg_mr_result result = {};
struct pbl_context pbl;
+ int supp_access_flags;
unsigned int pg_sz;
struct efa_mr *mr;
int inline_size;
int err;
- if (udata->inlen &&
+ if (udata && udata->inlen &&
!ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) {
ibdev_dbg(&dev->ibdev,
"Incompatible ABI params, udata not cleared\n");
@@ -1382,10 +1363,15 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
goto err_out;
}
- if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) {
+ supp_access_flags =
+ IB_ACCESS_LOCAL_WRITE |
+ (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0);
+
+ access_flags &= ~IB_ACCESS_OPTIONAL;
+ if (access_flags & ~supp_access_flags) {
ibdev_dbg(&dev->ibdev,
"Unsupported access flags[%#x], supported[%#x]\n",
- access_flags, EFA_SUPPORTED_ACCESS_FLAGS);
+ access_flags, supp_access_flags);
err = -EOPNOTSUPP;
goto err_out;
}
@@ -1396,7 +1382,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
goto err_out;
}
- mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
+ mr->umem = ib_umem_get(ibpd->device, start, length, access_flags);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
ibdev_dbg(&dev->ibdev,
@@ -1407,7 +1393,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
params.pd = to_epd(ibpd)->pdn;
params.iova = virt_addr;
params.mr_length_in_bytes = length;
- params.permissions = access_flags & 0x1;
+ params.permissions = access_flags;
pg_sz = ib_umem_find_best_pgsz(mr->umem,
dev->dev_attr.page_size_cap,
@@ -1473,14 +1459,12 @@ int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey);
- if (mr->umem) {
- params.l_key = mr->ibmr.lkey;
- err = efa_com_dereg_mr(&dev->edev, &params);
- if (err)
- return err;
- }
- ib_umem_release(mr->umem);
+ params.l_key = mr->ibmr.lkey;
+ err = efa_com_dereg_mr(&dev->edev, &params);
+ if (err)
+ return err;
+ ib_umem_release(mr->umem);
kfree(mr);
return 0;
@@ -1531,7 +1515,6 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
goto err_out;
ucontext->uarn = result.uarn;
- xa_init(&ucontext->mmap_xa);
resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
@@ -1560,38 +1543,56 @@ void efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
struct efa_ucontext *ucontext = to_eucontext(ibucontext);
struct efa_dev *dev = to_edev(ibucontext->device);
- mmap_entries_remove_free(dev, ucontext);
efa_dealloc_uar(dev, ucontext->uarn);
}
+void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct efa_user_mmap_entry *entry = to_emmap(rdma_entry);
+
+ /* DMA mapping is already gone, now free the pages */
+ if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
+ free_pages_exact(phys_to_virt(entry->address),
+ entry->rdma_entry.npages * PAGE_SIZE);
+ kfree(entry);
+}
+
static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
- struct vm_area_struct *vma, u64 key, u64 length)
+ struct vm_area_struct *vma)
{
- struct efa_mmap_entry *entry;
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct efa_user_mmap_entry *entry;
unsigned long va;
+ int err = 0;
u64 pfn;
- int err;
- entry = mmap_entry_get(dev, ucontext, key, length);
- if (!entry) {
- ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n",
- key);
+ rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma);
+ if (!rdma_entry) {
+ ibdev_dbg(&dev->ibdev,
+ "pgoff[%#lx] does not have valid entry\n",
+ vma->vm_pgoff);
return -EINVAL;
}
+ entry = to_emmap(rdma_entry);
ibdev_dbg(&dev->ibdev,
- "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n",
- entry->address, length, entry->mmap_flag);
+ "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
+ entry->address, rdma_entry->npages * PAGE_SIZE,
+ entry->mmap_flag);
pfn = entry->address >> PAGE_SHIFT;
switch (entry->mmap_flag) {
case EFA_MMAP_IO_NC:
- err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
- pgprot_noncached(vma->vm_page_prot));
+ err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
+ entry->rdma_entry.npages * PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ rdma_entry);
break;
case EFA_MMAP_IO_WC:
- err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
- pgprot_writecombine(vma->vm_page_prot));
+ err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
+ entry->rdma_entry.npages * PAGE_SIZE,
+ pgprot_writecombine(vma->vm_page_prot),
+ rdma_entry);
break;
case EFA_MMAP_DMA_PAGE:
for (va = vma->vm_start; va < vma->vm_end;
@@ -1605,15 +1606,15 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
err = -EINVAL;
}
- if (err) {
+ if (err)
ibdev_dbg(
&dev->ibdev,
- "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n",
- entry->address, length, entry->mmap_flag, err);
- return err;
- }
+ "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
+ entry->address, rdma_entry->npages * PAGE_SIZE,
+ entry->mmap_flag, err);
- return 0;
+ rdma_user_mmap_entry_put(rdma_entry);
+ return err;
}
int efa_mmap(struct ib_ucontext *ibucontext,
@@ -1621,26 +1622,13 @@ int efa_mmap(struct ib_ucontext *ibucontext,
{
struct efa_ucontext *ucontext = to_eucontext(ibucontext);
struct efa_dev *dev = to_edev(ibucontext->device);
- u64 length = vma->vm_end - vma->vm_start;
- u64 key = vma->vm_pgoff << PAGE_SHIFT;
+ size_t length = vma->vm_end - vma->vm_start;
ibdev_dbg(&dev->ibdev,
- "start %#lx, end %#lx, length = %#llx, key = %#llx\n",
- vma->vm_start, vma->vm_end, length, key);
-
- if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) {
- ibdev_dbg(&dev->ibdev,
- "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n",
- length, PAGE_SIZE, vma->vm_flags);
- return -EINVAL;
- }
-
- if (vma->vm_flags & VM_EXEC) {
- ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n");
- return -EPERM;
- }
+ "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
+ vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
- return __efa_mmap(dev, ucontext, vma, key, length);
+ return __efa_mmap(dev, ucontext, vma);
}
static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
@@ -1727,6 +1715,54 @@ void efa_destroy_ah(struct ib_ah *ibah, u32 flags)
efa_ah_destroy(dev, ah);
}
+struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num)
+{
+ return rdma_alloc_hw_stats_struct(efa_stats_names,
+ ARRAY_SIZE(efa_stats_names),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u8 port_num, int index)
+{
+ struct efa_com_get_stats_params params = {};
+ union efa_com_get_stats_result result;
+ struct efa_dev *dev = to_edev(ibdev);
+ struct efa_com_basic_stats *bs;
+ struct efa_com_stats_admin *as;
+ struct efa_stats *s;
+ int err;
+
+ params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
+ params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL;
+
+ err = efa_com_get_stats(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ bs = &result.basic_stats;
+ stats->value[EFA_TX_BYTES] = bs->tx_bytes;
+ stats->value[EFA_TX_PKTS] = bs->tx_pkts;
+ stats->value[EFA_RX_BYTES] = bs->rx_bytes;
+ stats->value[EFA_RX_PKTS] = bs->rx_pkts;
+ stats->value[EFA_RX_DROPS] = bs->rx_drops;
+
+ as = &dev->edev.aq.stats;
+ stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
+ stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
+ stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion);
+
+ s = &dev->stats;
+ stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
+ stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err);
+ stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err);
+ stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err);
+ stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err);
+ stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err);
+
+ return ARRAY_SIZE(efa_stats_names);
+}
+
enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
u8 port_num)
{
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index c142b23bb401..1aeea5d65c01 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -479,6 +479,8 @@ static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd,
rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu);
}
+ free_cpumask_var(available_cpus);
+ free_cpumask_var(non_intr_cpus);
return 0;
fail:
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 67052dc3100c..e0b1238d31df 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1685,6 +1685,14 @@ static u64 access_sw_pio_drain(const struct cntr_entry *entry,
return dd->verbs_dev.n_piodrain;
}
+static u64 access_sw_ctx0_seq_drop(const struct cntr_entry *entry,
+ void *context, int vl, int mode, u64 data)
+{
+ struct hfi1_devdata *dd = context;
+
+ return dd->ctx0_seq_drop;
+}
+
static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
void *context, int vl, int mode, u64 data)
{
@@ -4101,10 +4109,12 @@ def_access_ibp_counter(rc_dupreq);
def_access_ibp_counter(rdma_seq);
def_access_ibp_counter(unaligned);
def_access_ibp_counter(seq_naks);
+def_access_ibp_counter(rc_crwaits);
static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
[C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
[C_RX_LEN_ERR] = RXE32_DEV_CNTR_ELEM(RxLenErr, RCV_LENGTH_ERR_CNT, CNTR_SYNTH),
+[C_RX_SHORT_ERR] = RXE32_DEV_CNTR_ELEM(RxShrErr, RCV_SHORT_ERR_CNT, CNTR_SYNTH),
[C_RX_ICRC_ERR] = RXE32_DEV_CNTR_ELEM(RxICrcErr, RCV_ICRC_ERR_CNT, CNTR_SYNTH),
[C_RX_EBP] = RXE32_DEV_CNTR_ELEM(RxEbpCnt, RCV_EBP_CNT, CNTR_SYNTH),
[C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
@@ -4248,6 +4258,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
access_sw_cpu_intr),
[C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL,
access_sw_cpu_rcv_limit),
+[C_SW_CTX0_SEQ_DROP] = CNTR_ELEM("SeqDrop0", 0, 0, CNTR_NORMAL,
+ access_sw_ctx0_seq_drop),
[C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL,
access_sw_vtx_wait),
[C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
@@ -5119,6 +5131,7 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
[C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
[C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
[C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
+[C_SW_IBP_RC_CRWAITS] = SW_IBP_CNTR(RcCrWait, rc_crwaits),
[C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
access_sw_cpu_rc_acks),
[C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
@@ -6860,7 +6873,7 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
}
rcvmask = HFI1_RCVCTRL_CTXT_ENB;
/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
- rcvmask |= rcd->rcvhdrtail_kvaddr ?
+ rcvmask |= hfi1_rcvhdrtail_kvaddr(rcd) ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
hfi1_rcvctrl(dd, rcvmask, rcd);
hfi1_rcd_put(rcd);
@@ -8392,20 +8405,62 @@ void force_recv_intr(struct hfi1_ctxtdata *rcd)
static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
{
u32 tail;
- int present;
- if (!rcd->rcvhdrtail_kvaddr)
- present = (rcd->seq_cnt ==
- rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
- else /* is RDMA rtail */
- present = (rcd->head != get_rcvhdrtail(rcd));
-
- if (present)
+ if (hfi1_packet_present(rcd))
return 1;
/* fall back to a CSR read, correct indpendent of DMA_RTAIL */
tail = (u32)read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
- return rcd->head != tail;
+ return hfi1_rcd_head(rcd) != tail;
+}
+
+/**
+ * Common code for receive contexts interrupt handlers.
+ * Update traces, increment kernel IRQ counter and
+ * setup ASPM when needed.
+ */
+static void receive_interrupt_common(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+
+ trace_hfi1_receive_interrupt(dd, rcd);
+ this_cpu_inc(*dd->int_counter);
+ aspm_ctx_disable(rcd);
+}
+
+/**
+ * __hfi1_rcd_eoi_intr() - Make HW issue receive interrupt
+ * when there are packets present in the queue. When calling
+ * with interrupts enabled please use hfi1_rcd_eoi_intr.
+ *
+ * @rcd: valid receive context
+ */
+static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
+{
+ clear_recv_intr(rcd);
+ if (check_packet_present(rcd))
+ force_recv_intr(rcd);
+}
+
+/**
+ * hfi1_rcd_eoi_intr() - End of Interrupt processing action
+ *
+ * @rcd: Ptr to hfi1_ctxtdata of receive context
+ *
+ * Hold IRQs so we can safely clear the interrupt and
+ * recheck for a packet that may have arrived after the previous
+ * check and the interrupt clear. If a packet arrived, force another
+ * interrupt. This routine can be called at the end of receive packet
+ * processing in interrupt service routines, interrupt service thread
+ * and softirqs
+ */
+static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __hfi1_rcd_eoi_intr(rcd);
+ local_irq_restore(flags);
}
/*
@@ -8419,13 +8474,9 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
irqreturn_t receive_context_interrupt(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
- struct hfi1_devdata *dd = rcd->dd;
int disposition;
- int present;
- trace_hfi1_receive_interrupt(dd, rcd);
- this_cpu_inc(*dd->int_counter);
- aspm_ctx_disable(rcd);
+ receive_interrupt_common(rcd);
/* receive interrupt remains blocked while processing packets */
disposition = rcd->do_interrupt(rcd, 0);
@@ -8438,17 +8489,7 @@ irqreturn_t receive_context_interrupt(int irq, void *data)
if (disposition == RCV_PKT_LIMIT)
return IRQ_WAKE_THREAD;
- /*
- * The packet processor detected no more packets. Clear the receive
- * interrupt and recheck for a packet packet that may have arrived
- * after the previous check and interrupt clear. If a packet arrived,
- * force another interrupt.
- */
- clear_recv_intr(rcd);
- present = check_packet_present(rcd);
- if (present)
- force_recv_intr(rcd);
-
+ __hfi1_rcd_eoi_intr(rcd);
return IRQ_HANDLED;
}
@@ -8459,24 +8500,11 @@ irqreturn_t receive_context_interrupt(int irq, void *data)
irqreturn_t receive_context_thread(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
- int present;
/* receive interrupt is still blocked from the IRQ handler */
(void)rcd->do_interrupt(rcd, 1);
- /*
- * The packet processor will only return if it detected no more
- * packets. Hold IRQs here so we can safely clear the interrupt and
- * recheck for a packet that may have arrived after the previous
- * check and the interrupt clear. If a packet arrived, force another
- * interrupt.
- */
- local_irq_disable();
- clear_recv_intr(rcd);
- present = check_packet_present(rcd);
- if (present)
- force_recv_intr(rcd);
- local_irq_enable();
+ hfi1_rcd_eoi_intr(rcd);
return IRQ_HANDLED;
}
@@ -10047,7 +10075,7 @@ u32 lrh_max_header_bytes(struct hfi1_devdata *dd)
* the first kernel context would have been allocated by now so
* we are guaranteed a valid value.
*/
- return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
+ return (get_hdrqentsize(dd->rcd[0]) - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
}
/*
@@ -10092,7 +10120,7 @@ static void set_send_length(struct hfi1_pportdata *ppd)
thres = min(sc_percent_to_threshold(dd->vld[i].sc, 50),
sc_mtu_to_threshold(dd->vld[i].sc,
dd->vld[i].mtu,
- dd->rcd[0]->rcvhdrqentsize));
+ get_hdrqentsize(dd->rcd[0])));
for (j = 0; j < INIT_SC_PER_VL; j++)
sc_set_cr_threshold(
pio_select_send_context_vl(dd, j, i),
@@ -11819,7 +11847,7 @@ u32 hdrqempty(struct hfi1_ctxtdata *rcd)
head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
& RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
- if (rcd->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(rcd))
tail = get_rcvhdrtail(rcd);
else
tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
@@ -11863,6 +11891,84 @@ static u32 encoded_size(u32 size)
return 0x1; /* if invalid, go with the minimum size */
}
+/**
+ * encode_rcv_header_entry_size - return chip specific encoding for size
+ * @size: size in dwords
+ *
+ * Convert a receive header entry size that to the encoding used in the CSR.
+ *
+ * Return a zero if the given size is invalid, otherwise the encoding.
+ */
+u8 encode_rcv_header_entry_size(u8 size)
+{
+ /* there are only 3 valid receive header entry sizes */
+ if (size == 2)
+ return 1;
+ if (size == 16)
+ return 2;
+ if (size == 32)
+ return 4;
+ return 0; /* invalid */
+}
+
+/**
+ * hfi1_validate_rcvhdrcnt - validate hdrcnt
+ * @dd: the device data
+ * @thecnt: the header count
+ */
+int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
+{
+ if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
+ dd_dev_err(dd, "Receive header queue count too small\n");
+ return -EINVAL;
+ }
+
+ if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
+ dd_dev_err(dd,
+ "Receive header queue count cannot be greater than %u\n",
+ HFI1_MAX_HDRQ_EGRBUF_CNT);
+ return -EINVAL;
+ }
+
+ if (thecnt % HDRQ_INCREMENT) {
+ dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
+ thecnt, HDRQ_INCREMENT);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * set_hdrq_regs - set header queue registers for context
+ * @dd: the device data
+ * @ctxt: the context
+ * @entsize: the dword entry size
+ * @hdrcnt: the number of header entries
+ */
+void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt)
+{
+ u64 reg;
+
+ reg = (((u64)hdrcnt >> HDRQ_SIZE_SHIFT) & RCV_HDR_CNT_CNT_MASK) <<
+ RCV_HDR_CNT_CNT_SHIFT;
+ write_kctxt_csr(dd, ctxt, RCV_HDR_CNT, reg);
+ reg = ((u64)encode_rcv_header_entry_size(entsize) &
+ RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) <<
+ RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
+ write_kctxt_csr(dd, ctxt, RCV_HDR_ENT_SIZE, reg);
+ reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) <<
+ RCV_HDR_SIZE_HDR_SIZE_SHIFT;
+ write_kctxt_csr(dd, ctxt, RCV_HDR_SIZE, reg);
+
+ /*
+ * Program dummy tail address for every receive context
+ * before enabling any receive context
+ */
+ write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
+ dd->rcvhdrtail_dummy_dma);
+}
+
void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
struct hfi1_ctxtdata *rcd)
{
@@ -11884,13 +11990,13 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
/* reset the tail and hdr addresses, and sequence count */
write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
rcd->rcvhdrq_dma);
- if (rcd->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(rcd))
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
rcd->rcvhdrqtailaddr_dma);
- rcd->seq_cnt = 1;
+ hfi1_set_seq_cnt(rcd, 1);
/* reset the cached receive header queue head value */
- rcd->head = 0;
+ hfi1_set_rcd_head(rcd, 0);
/*
* Zero the receive header queue so we don't get false
@@ -11970,7 +12076,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
IS_RCVAVAIL_START + rcd->ctxt, false);
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
}
- if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
+ if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && hfi1_rcvhdrtail_kvaddr(rcd))
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
/* See comment on RcvCtxtCtrl.TailUpd above */
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index b76cf81f927f..725509261016 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -358,6 +358,8 @@
#define MAX_EAGER_BUFFER (256 * 1024)
#define MAX_EAGER_BUFFER_TOTAL (64 * (1 << 20)) /* max per ctxt 64MB */
#define MAX_EXPECTED_BUFFER (2048 * 1024)
+#define HFI1_MIN_HDRQ_EGRBUF_CNT 32
+#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
/*
* Receive expected base and count and eager base and count increment -
@@ -699,6 +701,10 @@ static inline u32 chip_rcv_array_count(struct hfi1_devdata *dd)
return read_csr(dd, RCV_ARRAY_CNT);
}
+u8 encode_rcv_header_entry_size(u8 size);
+int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt);
+void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt);
+
u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
u32 dw_len);
@@ -859,6 +865,7 @@ static inline int idx_from_vl(int vl)
enum {
C_RCV_OVF = 0,
C_RX_LEN_ERR,
+ C_RX_SHORT_ERR,
C_RX_ICRC_ERR,
C_RX_EBP,
C_RX_TID_FULL,
@@ -926,6 +933,7 @@ enum {
C_DC_PG_STS_TX_MBE_CNT,
C_SW_CPU_INTR,
C_SW_CPU_RCV_LIM,
+ C_SW_CTX0_SEQ_DROP,
C_SW_VTX_WAIT,
C_SW_PIO_WAIT,
C_SW_PIO_DRAIN,
@@ -1245,6 +1253,7 @@ enum {
C_SW_IBP_RDMA_SEQ,
C_SW_IBP_UNALIGNED,
C_SW_IBP_SEQ_NAK,
+ C_SW_IBP_RC_CRWAITS,
C_SW_CPU_RC_ACKS,
C_SW_CPU_RC_QACKS,
C_SW_CPU_RC_DELAYED_COMP,
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index ab3589d17aee..fb3ec9bff7a2 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -381,6 +381,7 @@
#define DC_LCB_STS_LINK_TRANSFER_ACTIVE (DC_LCB_CSRS + 0x000000000468)
#define DC_LCB_STS_ROUND_TRIP_LTP_CNT (DC_LCB_CSRS + 0x0000000004B0)
#define RCV_LENGTH_ERR_CNT 0
+#define RCV_SHORT_ERR_CNT 2
#define RCV_ICRC_ERR_CNT 6
#define RCV_EBP_CNT 9
#define RCV_BUF_OVFL_CNT 10
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index d47da7b0438f..40a1ff0c8a8e 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -323,6 +323,9 @@ struct diag_pkt {
/* RHF receive type error - bypass packet errors */
#define RHF_RTE_BYPASS_NO_ERR 0x0
+/* MAX RcvSEQ */
+#define RHF_MAX_SEQ 13
+
/* IB - LRH header constants */
#define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
#define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index d268bf9c42ee..4633a0ce1a8c 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -379,7 +379,7 @@ static void *_rcds_seq_next(struct seq_file *s, void *v, loff_t *pos)
struct hfi1_devdata *dd = dd_from_dev(ibd);
++*pos;
- if (!dd->rcd || *pos >= dd->n_krcv_queues)
+ if (!dd->rcd || *pos >= dd->num_rcv_contexts)
return NULL;
return pos;
}
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 01aa1f132f55..049d15befe58 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -411,14 +411,14 @@ drop:
static inline void init_packet(struct hfi1_ctxtdata *rcd,
struct hfi1_packet *packet)
{
- packet->rsize = rcd->rcvhdrqentsize; /* words */
- packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */
+ packet->rsize = get_hdrqentsize(rcd); /* words */
+ packet->maxcnt = get_hdrq_cnt(rcd) * packet->rsize; /* words */
packet->rcd = rcd;
packet->updegr = 0;
packet->etail = -1;
packet->rhf_addr = get_rhf_addr(rcd);
packet->rhf = rhf_to_cpu(packet->rhf_addr);
- packet->rhqoff = rcd->head;
+ packet->rhqoff = hfi1_rcd_head(rcd);
packet->numpkt = 0;
}
@@ -551,22 +551,22 @@ static inline void init_ps_mdata(struct ps_mdata *mdata,
mdata->maxcnt = packet->maxcnt;
mdata->ps_head = packet->rhqoff;
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
+ if (get_dma_rtail_setting(rcd)) {
mdata->ps_tail = get_rcvhdrtail(rcd);
if (rcd->ctxt == HFI1_CTRL_CTXT)
- mdata->ps_seq = rcd->seq_cnt;
+ mdata->ps_seq = hfi1_seq_cnt(rcd);
else
mdata->ps_seq = 0; /* not used with DMA_RTAIL */
} else {
mdata->ps_tail = 0; /* used only with DMA_RTAIL*/
- mdata->ps_seq = rcd->seq_cnt;
+ mdata->ps_seq = hfi1_seq_cnt(rcd);
}
}
static inline int ps_done(struct ps_mdata *mdata, u64 rhf,
struct hfi1_ctxtdata *rcd)
{
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
+ if (get_dma_rtail_setting(rcd))
return mdata->ps_head == mdata->ps_tail;
return mdata->ps_seq != rhf_rcv_seq(rhf);
}
@@ -592,11 +592,9 @@ static inline void update_ps_mdata(struct ps_mdata *mdata,
mdata->ps_head = 0;
/* Control context must do seq counting */
- if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
- (rcd->ctxt == HFI1_CTRL_CTXT)) {
- if (++mdata->ps_seq > 13)
- mdata->ps_seq = 1;
- }
+ if (!get_dma_rtail_setting(rcd) ||
+ rcd->ctxt == HFI1_CTRL_CTXT)
+ mdata->ps_seq = hfi1_seq_incr_wrap(mdata->ps_seq);
}
/*
@@ -734,6 +732,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
{
int ret;
+ packet->rcd->dd->ctx0_seq_drop++;
/* Set up for the next packet */
packet->rhqoff += packet->rsize;
if (packet->rhqoff >= packet->maxcnt)
@@ -769,7 +768,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
* The +2 is the size of the RHF.
*/
prefetch_range(packet->ebuf,
- packet->tlen - ((packet->rcd->rcvhdrqentsize -
+ packet->tlen - ((get_hdrqentsize(packet->rcd) -
(rhf_hdrq_offset(packet->rhf)
+ 2)) * 4));
}
@@ -823,7 +822,7 @@ static inline void finish_packet(struct hfi1_packet *packet)
* The only thing we need to do is a final update and call for an
* interrupt
*/
- update_usrhead(packet->rcd, packet->rcd->head, packet->updegr,
+ update_usrhead(packet->rcd, hfi1_rcd_head(packet->rcd), packet->updegr,
packet->etail, rcv_intr_dynamic, packet->numpkt);
}
@@ -832,13 +831,11 @@ static inline void finish_packet(struct hfi1_packet *packet)
*/
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
{
- u32 seq;
int last = RCV_PKT_OK;
struct hfi1_packet packet;
init_packet(rcd, &packet);
- seq = rhf_rcv_seq(packet.rhf);
- if (seq != rcd->seq_cnt) {
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) {
last = RCV_PKT_DONE;
goto bail;
}
@@ -847,15 +844,12 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
while (last == RCV_PKT_OK) {
last = process_rcv_packet(&packet, thread);
- seq = rhf_rcv_seq(packet.rhf);
- if (++rcd->seq_cnt > 13)
- rcd->seq_cnt = 1;
- if (seq != rcd->seq_cnt)
+ if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
last = RCV_PKT_DONE;
process_rcv_update(last, &packet);
}
process_rcv_qp_work(&packet);
- rcd->head = packet.rhqoff;
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
bail:
finish_packet(&packet);
return last;
@@ -884,15 +878,14 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
process_rcv_update(last, &packet);
}
process_rcv_qp_work(&packet);
- rcd->head = packet.rhqoff;
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
bail:
finish_packet(&packet);
return last;
}
-static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt)
+static void set_all_fastpath(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
- struct hfi1_ctxtdata *rcd;
u16 i;
/*
@@ -900,50 +893,17 @@ static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt)
* interrupt handler only for that context. Otherwise, switch
* interrupt handler for all statically allocated kernel contexts.
*/
- if (ctxt >= dd->first_dyn_alloc_ctxt) {
- rcd = hfi1_rcd_get_by_index_safe(dd, ctxt);
- if (rcd) {
- rcd->do_interrupt =
- &handle_receive_interrupt_nodma_rtail;
- hfi1_rcd_put(rcd);
- }
- return;
- }
-
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) {
- rcd = hfi1_rcd_get_by_index(dd, i);
- if (rcd)
- rcd->do_interrupt =
- &handle_receive_interrupt_nodma_rtail;
+ if (rcd->ctxt >= dd->first_dyn_alloc_ctxt && !rcd->is_vnic) {
+ hfi1_rcd_get(rcd);
+ hfi1_set_fast(rcd);
hfi1_rcd_put(rcd);
- }
-}
-
-static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt)
-{
- struct hfi1_ctxtdata *rcd;
- u16 i;
-
- /*
- * For dynamically allocated kernel contexts (like vnic) switch
- * interrupt handler only for that context. Otherwise, switch
- * interrupt handler for all statically allocated kernel contexts.
- */
- if (ctxt >= dd->first_dyn_alloc_ctxt) {
- rcd = hfi1_rcd_get_by_index_safe(dd, ctxt);
- if (rcd) {
- rcd->do_interrupt =
- &handle_receive_interrupt_dma_rtail;
- hfi1_rcd_put(rcd);
- }
return;
}
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) {
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
rcd = hfi1_rcd_get_by_index(dd, i);
- if (rcd)
- rcd->do_interrupt =
- &handle_receive_interrupt_dma_rtail;
+ if (rcd && (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic))
+ hfi1_set_fast(rcd);
hfi1_rcd_put(rcd);
}
}
@@ -959,17 +919,14 @@ void set_all_slowpath(struct hfi1_devdata *dd)
if (!rcd)
continue;
if (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
- rcd->do_interrupt = &handle_receive_interrupt;
+ rcd->do_interrupt = rcd->slow_handler;
hfi1_rcd_put(rcd);
}
}
-static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
- struct hfi1_packet *packet,
- struct hfi1_devdata *dd)
+static bool __set_armed_to_active(struct hfi1_packet *packet)
{
- struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
u8 etype = rhf_rcv_type(packet->rhf);
u8 sc = SC15_PACKET;
@@ -984,19 +941,34 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
sc = hfi1_16B_get_sc(hdr);
}
if (sc != SC15_PACKET) {
- int hwstate = driver_lstate(rcd->ppd);
+ int hwstate = driver_lstate(packet->rcd->ppd);
+ struct work_struct *lsaw =
+ &packet->rcd->ppd->linkstate_active_work;
if (hwstate != IB_PORT_ACTIVE) {
- dd_dev_info(dd,
+ dd_dev_info(packet->rcd->dd,
"Unexpected link state %s\n",
opa_lstate_name(hwstate));
- return 0;
+ return false;
}
- queue_work(rcd->ppd->link_wq, lsaw);
- return 1;
+ queue_work(packet->rcd->ppd->link_wq, lsaw);
+ return true;
}
- return 0;
+ return false;
+}
+
+/**
+ * armed to active - the fast path for armed to active
+ * @packet: the packet structure
+ *
+ * Return true if packet processing needs to bail.
+ */
+static bool set_armed_to_active(struct hfi1_packet *packet)
+{
+ if (likely(packet->rcd->ppd->host_link_state != HLS_UP_ARMED))
+ return false;
+ return __set_armed_to_active(packet);
}
/*
@@ -1019,10 +991,8 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
init_packet(rcd, &packet);
- if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
- u32 seq = rhf_rcv_seq(packet.rhf);
-
- if (seq != rcd->seq_cnt) {
+ if (!get_dma_rtail_setting(rcd)) {
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) {
last = RCV_PKT_DONE;
goto bail;
}
@@ -1039,22 +1009,15 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
* Control context can potentially receive an invalid
* rhf. Drop such packets.
*/
- if (rcd->ctxt == HFI1_CTRL_CTXT) {
- u32 seq = rhf_rcv_seq(packet.rhf);
-
- if (seq != rcd->seq_cnt)
+ if (rcd->ctxt == HFI1_CTRL_CTXT)
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
skip_pkt = 1;
- }
}
prescan_rxq(rcd, &packet);
while (last == RCV_PKT_OK) {
- if (unlikely(dd->do_drop &&
- atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
- DROP_PACKET_ON)) {
- dd->do_drop = 0;
-
+ if (hfi1_need_drop(dd)) {
/* On to the next packet */
packet.rhqoff += packet.rsize;
packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
@@ -1066,26 +1029,14 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
last = skip_rcv_packet(&packet, thread);
skip_pkt = 0;
} else {
- /* Auto activate link on non-SC15 packet receive */
- if (unlikely(rcd->ppd->host_link_state ==
- HLS_UP_ARMED) &&
- set_armed_to_active(rcd, &packet, dd))
+ if (set_armed_to_active(&packet))
goto bail;
last = process_rcv_packet(&packet, thread);
}
- if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
- u32 seq = rhf_rcv_seq(packet.rhf);
-
- if (++rcd->seq_cnt > 13)
- rcd->seq_cnt = 1;
- if (seq != rcd->seq_cnt)
+ if (!get_dma_rtail_setting(rcd)) {
+ if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
last = RCV_PKT_DONE;
- if (needset) {
- dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
- set_nodma_rtail(dd, rcd->ctxt);
- needset = 0;
- }
} else {
if (packet.rhqoff == hdrqtail)
last = RCV_PKT_DONE;
@@ -1094,27 +1045,24 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
* rhf. Drop such packets.
*/
if (rcd->ctxt == HFI1_CTRL_CTXT) {
- u32 seq = rhf_rcv_seq(packet.rhf);
+ bool lseq;
- if (++rcd->seq_cnt > 13)
- rcd->seq_cnt = 1;
- if (!last && (seq != rcd->seq_cnt))
+ lseq = hfi1_seq_incr(rcd,
+ rhf_rcv_seq(packet.rhf));
+ if (!last && lseq)
skip_pkt = 1;
}
-
- if (needset) {
- dd_dev_info(dd,
- "Switching to DMA_RTAIL\n");
- set_dma_rtail(dd, rcd->ctxt);
- needset = 0;
- }
}
+ if (needset) {
+ needset = false;
+ set_all_fastpath(dd, rcd);
+ }
process_rcv_update(last, &packet);
}
process_rcv_qp_work(&packet);
- rcd->head = packet.rhqoff;
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
bail:
/*
@@ -1606,23 +1554,22 @@ void handle_eflags(struct hfi1_packet *packet)
* The following functions are called by the interrupt handler. They are type
* specific handlers for each packet type.
*/
-static int process_receive_ib(struct hfi1_packet *packet)
+static void process_receive_ib(struct hfi1_packet *packet)
{
if (hfi1_setup_9B_packet(packet))
- return RHF_RCV_CONTINUE;
+ return;
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
- return RHF_RCV_CONTINUE;
+ return;
trace_hfi1_rcvhdr(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
- return RHF_RCV_CONTINUE;
+ return;
}
hfi1_ib_rcv(packet);
- return RHF_RCV_CONTINUE;
}
static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
@@ -1638,23 +1585,23 @@ static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
return false;
}
-static int process_receive_bypass(struct hfi1_packet *packet)
+static void process_receive_bypass(struct hfi1_packet *packet)
{
struct hfi1_devdata *dd = packet->rcd->dd;
if (hfi1_is_vnic_packet(packet)) {
hfi1_vnic_bypass_rcv(packet);
- return RHF_RCV_CONTINUE;
+ return;
}
if (hfi1_setup_bypass_packet(packet))
- return RHF_RCV_CONTINUE;
+ return;
trace_hfi1_rcvhdr(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
- return RHF_RCV_CONTINUE;
+ return;
}
if (hfi1_16B_get_l2(packet->hdr) == 0x2) {
@@ -1677,17 +1624,16 @@ static int process_receive_bypass(struct hfi1_packet *packet)
(OPA_EI_STATUS_SMASK | BAD_L2_ERR);
}
}
- return RHF_RCV_CONTINUE;
}
-static int process_receive_error(struct hfi1_packet *packet)
+static void process_receive_error(struct hfi1_packet *packet)
{
/* KHdrHCRCErr -- KDETH packet with a bad HCRC */
if (unlikely(
hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
(rhf_rcv_type_err(packet->rhf) == RHF_RCV_TYPE_ERROR ||
packet->rhf & RHF_DC_ERR)))
- return RHF_RCV_CONTINUE;
+ return;
hfi1_setup_ib_header(packet);
handle_eflags(packet);
@@ -1695,32 +1641,29 @@ static int process_receive_error(struct hfi1_packet *packet)
if (unlikely(rhf_err_flags(packet->rhf)))
dd_dev_err(packet->rcd->dd,
"Unhandled error packet received. Dropping.\n");
-
- return RHF_RCV_CONTINUE;
}
-static int kdeth_process_expected(struct hfi1_packet *packet)
+static void kdeth_process_expected(struct hfi1_packet *packet)
{
hfi1_setup_9B_packet(packet);
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
- return RHF_RCV_CONTINUE;
+ return;
if (unlikely(rhf_err_flags(packet->rhf))) {
struct hfi1_ctxtdata *rcd = packet->rcd;
if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
- return RHF_RCV_CONTINUE;
+ return;
}
hfi1_kdeth_expected_rcv(packet);
- return RHF_RCV_CONTINUE;
}
-static int kdeth_process_eager(struct hfi1_packet *packet)
+static void kdeth_process_eager(struct hfi1_packet *packet)
{
hfi1_setup_9B_packet(packet);
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
- return RHF_RCV_CONTINUE;
+ return;
trace_hfi1_rcvhdr(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
@@ -1728,37 +1671,41 @@ static int kdeth_process_eager(struct hfi1_packet *packet)
show_eflags_errs(packet);
if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
- return RHF_RCV_CONTINUE;
+ return;
}
hfi1_kdeth_eager_rcv(packet);
- return RHF_RCV_CONTINUE;
}
-static int process_receive_invalid(struct hfi1_packet *packet)
+static void process_receive_invalid(struct hfi1_packet *packet)
{
dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
rhf_rcv_type(packet->rhf));
- return RHF_RCV_CONTINUE;
}
+#define HFI1_RCVHDR_DUMP_MAX 5
+
void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd)
{
struct hfi1_packet packet;
struct ps_mdata mdata;
+ int i;
- seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s head %llu tail %llu\n",
- rcd->ctxt, rcd->rcvhdrq_cnt, rcd->rcvhdrqentsize,
- HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ?
+ seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s ctrl 0x%08llx status 0x%08llx, head %llu tail %llu sw head %u\n",
+ rcd->ctxt, get_hdrq_cnt(rcd), get_hdrqentsize(rcd),
+ get_dma_rtail_setting(rcd) ?
"dma_rtail" : "nodma_rtail",
+ read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_CTRL),
+ read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_STATUS),
read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD) &
RCV_HDR_HEAD_HEAD_MASK,
- read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL));
+ read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL),
+ rcd->head);
init_packet(rcd, &packet);
init_ps_mdata(&mdata, &packet);
- while (1) {
+ for (i = 0; i < HFI1_RCVHDR_DUMP_MAX; i++) {
__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
rcd->rhf_offset;
struct ib_header *hdr;
diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c
index 93613e5def9b..986c12153e62 100644
--- a/drivers/infiniband/hw/hfi1/fault.c
+++ b/drivers/infiniband/hw/hfi1/fault.c
@@ -141,12 +141,14 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf,
if (!data)
return -ENOMEM;
copy = min(len, datalen - 1);
- if (copy_from_user(data, buf, copy))
- return -EFAULT;
+ if (copy_from_user(data, buf, copy)) {
+ ret = -EFAULT;
+ goto free_data;
+ }
ret = debugfs_file_get(file->f_path.dentry);
if (unlikely(ret))
- return ret;
+ goto free_data;
ptr = data;
token = ptr;
for (ptr = data; *ptr; ptr = end + 1, token = ptr) {
@@ -195,6 +197,7 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf,
ret = len;
debugfs_file_put(file->f_path.dentry);
+free_data:
kfree(data);
return ret;
}
@@ -214,7 +217,7 @@ static ssize_t fault_opcodes_read(struct file *file, char __user *buf,
return -ENOMEM;
ret = debugfs_file_get(file->f_path.dentry);
if (unlikely(ret))
- return ret;
+ goto free_data;
bit = find_first_bit(fault->opcodes, bitsize);
while (bit < bitsize) {
zero = find_next_zero_bit(fault->opcodes, bitsize, bit);
@@ -232,6 +235,7 @@ static ssize_t fault_opcodes_read(struct file *file, char __user *buf,
data[size - 1] = '\n';
data[size] = '\0';
ret = simple_read_from_buffer(buf, len, pos, data, size);
+free_data:
kfree(data);
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index f9a7e9d29c8b..259115886d35 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -200,23 +200,24 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
fd = kzalloc(sizeof(*fd), GFP_KERNEL);
- if (fd) {
- fd->rec_cpu_num = -1; /* no cpu affinity by default */
- fd->mm = current->mm;
- mmgrab(fd->mm);
- fd->dd = dd;
- kobject_get(&fd->dd->kobj);
- fp->private_data = fd;
- } else {
- fp->private_data = NULL;
-
- if (atomic_dec_and_test(&dd->user_refcount))
- complete(&dd->user_comp);
-
- return -ENOMEM;
- }
-
+ if (!fd || init_srcu_struct(&fd->pq_srcu))
+ goto nomem;
+ spin_lock_init(&fd->pq_rcu_lock);
+ spin_lock_init(&fd->tid_lock);
+ spin_lock_init(&fd->invalid_lock);
+ fd->rec_cpu_num = -1; /* no cpu affinity by default */
+ fd->mm = current->mm;
+ mmgrab(fd->mm);
+ fd->dd = dd;
+ kobject_get(&fd->dd->kobj);
+ fp->private_data = fd;
return 0;
+nomem:
+ kfree(fd);
+ fp->private_data = NULL;
+ if (atomic_dec_and_test(&dd->user_refcount))
+ complete(&dd->user_comp);
+ return -ENOMEM;
}
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
@@ -301,21 +302,30 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
{
struct hfi1_filedata *fd = kiocb->ki_filp->private_data;
- struct hfi1_user_sdma_pkt_q *pq = fd->pq;
+ struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq = fd->cq;
int done = 0, reqs = 0;
unsigned long dim = from->nr_segs;
+ int idx;
- if (!cq || !pq)
+ idx = srcu_read_lock(&fd->pq_srcu);
+ pq = srcu_dereference(fd->pq, &fd->pq_srcu);
+ if (!cq || !pq) {
+ srcu_read_unlock(&fd->pq_srcu, idx);
return -EIO;
+ }
- if (!iter_is_iovec(from) || !dim)
+ if (!iter_is_iovec(from) || !dim) {
+ srcu_read_unlock(&fd->pq_srcu, idx);
return -EINVAL;
+ }
trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim);
- if (atomic_read(&pq->n_reqs) == pq->n_max_reqs)
+ if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) {
+ srcu_read_unlock(&fd->pq_srcu, idx);
return -ENOSPC;
+ }
while (dim) {
int ret;
@@ -333,6 +343,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
reqs++;
}
+ srcu_read_unlock(&fd->pq_srcu, idx);
return reqs;
}
@@ -505,12 +516,12 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
ret = -EINVAL;
goto done;
}
- if ((flags & VM_WRITE) || !uctxt->rcvhdrtail_kvaddr) {
+ if ((flags & VM_WRITE) || !hfi1_rcvhdrtail_kvaddr(uctxt)) {
ret = -EPERM;
goto done;
}
memlen = PAGE_SIZE;
- memvirt = (void *)uctxt->rcvhdrtail_kvaddr;
+ memvirt = (void *)hfi1_rcvhdrtail_kvaddr(uctxt);
flags &= ~VM_MAYWRITE;
break;
case SUBCTXT_UREGS:
@@ -707,6 +718,7 @@ done:
if (atomic_dec_and_test(&dd->user_refcount))
complete(&dd->user_comp);
+ cleanup_srcu_struct(&fdata->pq_srcu);
kfree(fdata);
return 0;
}
@@ -1090,7 +1102,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt)
* don't have to wait to be sure the DMA update has happened
* (chip resets head/tail to 0 on transition to enable).
*/
- if (uctxt->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(uctxt))
clear_rcvhdrtail(uctxt);
/* Setup J_KEY before enabling the context */
@@ -1138,7 +1150,7 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
HFI1_CAP_UGET_MASK(uctxt->flags, MASK) |
HFI1_CAP_KGET_MASK(uctxt->flags, K2U);
/* adjust flag if this fd is not able to cache */
- if (!fd->handler)
+ if (!fd->use_mn)
cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */
cinfo.num_active = hfi1_count_active_units();
@@ -1154,8 +1166,8 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
cinfo.send_ctxt = uctxt->sc->hw_context;
cinfo.egrtids = uctxt->egrbufs.alloced;
- cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
- cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2;
+ cinfo.rcvhdrq_cnt = get_hdrq_cnt(uctxt);
+ cinfo.rcvhdrq_entsize = get_hdrqentsize(uctxt) << 2;
cinfo.sdma_ring_size = fd->cq->nentries;
cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size;
@@ -1543,7 +1555,7 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
* always resets it's tail register back to 0 on a
* transition from disabled to enabled.
*/
- if (uctxt->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(uctxt))
clear_rcvhdrtail(uctxt);
rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB;
} else {
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index fa45350a9a1d..cae12f416ca0 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -197,7 +197,9 @@ struct exp_tid_set {
u32 count;
};
-typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
+struct hfi1_ctxtdata;
+typedef int (*intr_handler)(struct hfi1_ctxtdata *rcd, int data);
+typedef void (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
struct tid_queue {
struct list_head queue_head;
@@ -226,7 +228,11 @@ struct hfi1_ctxtdata {
* be valid. Worst case is we process an extra interrupt and up to 64
* packets with the wrong interrupt handler.
*/
- int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+ intr_handler do_interrupt;
+ /** fast handler after autoactive */
+ intr_handler fast_handler;
+ /** slow handler */
+ intr_handler slow_handler;
/* verbs rx_stats per rcd */
struct hfi1_opcode_stats_perctx *opstats;
/* clear interrupt mask */
@@ -1153,6 +1159,8 @@ struct hfi1_devdata {
char *boardname; /* human readable board info */
+ u64 ctx0_seq_drop;
+
/* reset value */
u64 z_int_counter;
u64 z_rcv_limit;
@@ -1310,7 +1318,7 @@ struct hfi1_devdata {
struct err_info_constraint err_info_xmit_constraint;
atomic_t drop_packet;
- u8 do_drop;
+ bool do_drop;
u8 err_info_uncorrectable;
u8 err_info_fmconfig;
@@ -1436,15 +1444,18 @@ struct mmu_rb_handler;
/* Private data for file operations */
struct hfi1_filedata {
+ struct srcu_struct pq_srcu;
struct hfi1_devdata *dd;
struct hfi1_ctxtdata *uctxt;
struct hfi1_user_sdma_comp_q *cq;
- struct hfi1_user_sdma_pkt_q *pq;
+ /* update side lock for SRCU */
+ spinlock_t pq_rcu_lock;
+ struct hfi1_user_sdma_pkt_q __rcu *pq;
u16 subctxt;
/* for cpu affinity; -1 if none */
int rec_cpu_num;
u32 tid_n_pinned;
- struct mmu_rb_handler *handler;
+ bool use_mn;
struct tid_rb_node **entry_to_rb;
spinlock_t tid_lock; /* protect tid_[limit,used] counters */
u32 tid_limit;
@@ -1507,12 +1518,148 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp,
#define RCV_PKT_LIMIT 0x1 /* stop, hit limit, start thread */
#define RCV_PKT_DONE 0x2 /* stop, no more packets detected */
+/**
+ * hfi1_rcd_head - add accessor for rcd head
+ * @rcd: the context
+ */
+static inline u32 hfi1_rcd_head(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->head;
+}
+
+/**
+ * hfi1_set_rcd_head - add accessor for rcd head
+ * @rcd: the context
+ * @head: the new head
+ */
+static inline void hfi1_set_rcd_head(struct hfi1_ctxtdata *rcd, u32 head)
+{
+ rcd->head = head;
+}
+
/* calculate the current RHF address */
static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd)
{
return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset;
}
+/* return DMA_RTAIL configuration */
+static inline bool get_dma_rtail_setting(struct hfi1_ctxtdata *rcd)
+{
+ return !!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL);
+}
+
+/**
+ * hfi1_seq_incr_wrap - wrapping increment for sequence
+ * @seq: the current sequence number
+ *
+ * Returns: the incremented seq
+ */
+static inline u8 hfi1_seq_incr_wrap(u8 seq)
+{
+ if (++seq > RHF_MAX_SEQ)
+ seq = 1;
+ return seq;
+}
+
+/**
+ * hfi1_seq_cnt - return seq_cnt member
+ * @rcd: the receive context
+ *
+ * Return seq_cnt member
+ */
+static inline u8 hfi1_seq_cnt(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->seq_cnt;
+}
+
+/**
+ * hfi1_set_seq_cnt - return seq_cnt member
+ * @rcd: the receive context
+ *
+ * Return seq_cnt member
+ */
+static inline void hfi1_set_seq_cnt(struct hfi1_ctxtdata *rcd, u8 cnt)
+{
+ rcd->seq_cnt = cnt;
+}
+
+/**
+ * last_rcv_seq - is last
+ * @rcd: the receive context
+ * @seq: sequence
+ *
+ * return true if last packet
+ */
+static inline bool last_rcv_seq(struct hfi1_ctxtdata *rcd, u32 seq)
+{
+ return seq != rcd->seq_cnt;
+}
+
+/**
+ * rcd_seq_incr - increment context sequence number
+ * @rcd: the receive context
+ * @seq: the current sequence number
+ *
+ * Returns: true if the this was the last packet
+ */
+static inline bool hfi1_seq_incr(struct hfi1_ctxtdata *rcd, u32 seq)
+{
+ rcd->seq_cnt = hfi1_seq_incr_wrap(rcd->seq_cnt);
+ return last_rcv_seq(rcd, seq);
+}
+
+/**
+ * get_hdrqentsize - return hdrq entry size
+ * @rcd: the receive context
+ */
+static inline u8 get_hdrqentsize(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->rcvhdrqentsize;
+}
+
+/**
+ * get_hdrq_cnt - return hdrq count
+ * @rcd: the receive context
+ */
+static inline u16 get_hdrq_cnt(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->rcvhdrq_cnt;
+}
+
+/**
+ * hfi1_is_slowpath - check if this context is slow path
+ * @rcd: the receive context
+ */
+static inline bool hfi1_is_slowpath(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->do_interrupt == rcd->slow_handler;
+}
+
+/**
+ * hfi1_is_fastpath - check if this context is fast path
+ * @rcd: the receive context
+ */
+static inline bool hfi1_is_fastpath(struct hfi1_ctxtdata *rcd)
+{
+ if (rcd->ctxt == HFI1_CTRL_CTXT)
+ return false;
+
+ return rcd->do_interrupt == rcd->fast_handler;
+}
+
+/**
+ * hfi1_set_fast - change to the fast handler
+ * @rcd: the receive context
+ */
+static inline void hfi1_set_fast(struct hfi1_ctxtdata *rcd)
+{
+ if (unlikely(!rcd))
+ return;
+ if (unlikely(!hfi1_is_fastpath(rcd)))
+ rcd->do_interrupt = rcd->fast_handler;
+}
+
int hfi1_reset_device(int);
void receive_interrupt_work(struct work_struct *work);
@@ -2015,9 +2162,21 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr,
void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
size_t npages, bool dirty);
+/**
+ * hfi1_rcvhdrtail_kvaddr - return tail kvaddr
+ * @rcd - the receive context
+ */
+static inline __le64 *hfi1_rcvhdrtail_kvaddr(const struct hfi1_ctxtdata *rcd)
+{
+ return (__le64 *)rcd->rcvhdrtail_kvaddr;
+}
+
static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
{
- *((u64 *)rcd->rcvhdrtail_kvaddr) = 0ULL;
+ u64 *kv = (u64 *)hfi1_rcvhdrtail_kvaddr(rcd);
+
+ if (kv)
+ *kv = 0ULL;
}
static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
@@ -2026,7 +2185,17 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
* volatile because it's a DMA target from the chip, routine is
* inlined, and don't want register caching or reordering.
*/
- return (u32)le64_to_cpu(*rcd->rcvhdrtail_kvaddr);
+ return (u32)le64_to_cpu(*hfi1_rcvhdrtail_kvaddr(rcd));
+}
+
+static inline bool hfi1_packet_present(struct hfi1_ctxtdata *rcd)
+{
+ if (likely(!rcd->rcvhdrtail_kvaddr)) {
+ u32 seq = rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd)));
+
+ return !last_rcv_seq(rcd, seq);
+ }
+ return hfi1_rcd_head(rcd) != get_rcvhdrtail(rcd);
}
/*
@@ -2298,6 +2467,25 @@ static inline bool is_integrated(struct hfi1_devdata *dd)
return dd->pcidev->device == PCI_DEVICE_ID_INTEL1;
}
+/**
+ * hfi1_need_drop - detect need for drop
+ * @dd: - the device
+ *
+ * In some cases, the first packet needs to be dropped.
+ *
+ * Return true is the current packet needs to be dropped and false otherwise.
+ */
+static inline bool hfi1_need_drop(struct hfi1_devdata *dd)
+{
+ if (unlikely(dd->do_drop &&
+ atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
+ DROP_PACKET_ON)) {
+ dd->do_drop = false;
+ return true;
+ }
+ return false;
+}
+
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 71cb9525c074..e3acda7a0800 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -78,8 +78,6 @@
*/
#define HFI1_MIN_USER_CTXT_BUFCNT 7
-#define HFI1_MIN_HDRQ_EGRBUF_CNT 2
-#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
@@ -122,8 +120,6 @@ unsigned int user_credit_return_threshold = 33; /* default is 33% */
module_param(user_credit_return_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)");
-static inline u64 encode_rcv_header_entry_size(u16 size);
-
DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
static int hfi1_create_kctxt(struct hfi1_devdata *dd,
@@ -154,7 +150,12 @@ static int hfi1_create_kctxt(struct hfi1_devdata *dd,
/* Control context must use DMA_RTAIL */
if (rcd->ctxt == HFI1_CTRL_CTXT)
rcd->flags |= HFI1_CAP_DMA_RTAIL;
- rcd->seq_cnt = 1;
+ rcd->fast_handler = get_dma_rtail_setting(rcd) ?
+ handle_receive_interrupt_dma_rtail :
+ handle_receive_interrupt_nodma_rtail;
+ rcd->slow_handler = handle_receive_interrupt;
+
+ hfi1_set_seq_cnt(rcd, 1);
rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
if (!rcd->sc) {
@@ -511,23 +512,6 @@ void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd)
}
/*
- * Convert a receive header entry size that to the encoding used in the CSR.
- *
- * Return a zero if the given size is invalid.
- */
-static inline u64 encode_rcv_header_entry_size(u16 size)
-{
- /* there are only 3 valid receive header entry sizes */
- if (size == 2)
- return 1;
- if (size == 16)
- return 2;
- else if (size == 32)
- return 4;
- return 0; /* invalid */
-}
-
-/*
* Select the largest ccti value over all SLs to determine the intra-
* packet gap for the link.
*
@@ -892,10 +876,10 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
if (is_ax(dd)) {
atomic_set(&dd->drop_packet, DROP_PACKET_ON);
- dd->do_drop = 1;
+ dd->do_drop = true;
} else {
atomic_set(&dd->drop_packet, DROP_PACKET_OFF);
- dd->do_drop = 0;
+ dd->do_drop = false;
}
/* make sure the link is not "up" */
@@ -1149,9 +1133,9 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd),
rcd->rcvhdrq, rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
- if (rcd->rcvhdrtail_kvaddr) {
+ if (hfi1_rcvhdrtail_kvaddr(rcd)) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
- (void *)rcd->rcvhdrtail_kvaddr,
+ (void *)hfi1_rcvhdrtail_kvaddr(rcd),
rcd->rcvhdrqtailaddr_dma);
rcd->rcvhdrtail_kvaddr = NULL;
}
@@ -1489,7 +1473,6 @@ static int __init hfi1_mod_init(void)
goto bail_dev;
}
- hfi1_compute_tid_rdma_flow_wt();
/*
* These must be called before the driver is registered with
* the PCI subsystem.
@@ -1612,29 +1595,6 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
hfi1_free_devdata(dd);
}
-static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
-{
- if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
- dd_dev_err(dd, "Receive header queue count too small\n");
- return -EINVAL;
- }
-
- if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
- dd_dev_err(dd,
- "Receive header queue count cannot be greater than %u\n",
- HFI1_MAX_HDRQ_EGRBUF_CNT);
- return -EINVAL;
- }
-
- if (thecnt % HDRQ_INCREMENT) {
- dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
- thecnt, HDRQ_INCREMENT);
- return -EINVAL;
- }
-
- return 0;
-}
-
static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int ret = 0, j, pidx, initfail;
@@ -1662,7 +1622,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* Validate some global module parameters */
- ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt);
+ ret = hfi1_validate_rcvhdrcnt(dd, rcvhdrcnt);
if (ret)
goto bail;
@@ -1843,7 +1803,6 @@ static void shutdown_one(struct pci_dev *pdev)
int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
unsigned amt;
- u64 reg;
if (!rcd->rcvhdrq) {
gfp_t gfp_flags;
@@ -1875,30 +1834,9 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
goto bail_free;
}
}
- /*
- * These values are per-context:
- * RcvHdrCnt
- * RcvHdrEntSize
- * RcvHdrSize
- */
- reg = ((u64)(rcd->rcvhdrq_cnt >> HDRQ_SIZE_SHIFT)
- & RCV_HDR_CNT_CNT_MASK)
- << RCV_HDR_CNT_CNT_SHIFT;
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_CNT, reg);
- reg = (encode_rcv_header_entry_size(rcd->rcvhdrqentsize)
- & RCV_HDR_ENT_SIZE_ENT_SIZE_MASK)
- << RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg);
- reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK)
- << RCV_HDR_SIZE_HDR_SIZE_SHIFT;
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg);
- /*
- * Program dummy tail address for every receive context
- * before enabling any receive context
- */
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_dma);
+ set_hdrq_regs(rcd->dd, rcd->ctxt, rcd->rcvhdrqentsize,
+ rcd->rcvhdrq_cnt);
return 0;
diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c
index adb4a1ba921b..5836fe7b2817 100644
--- a/drivers/infiniband/hw/hfi1/iowait.c
+++ b/drivers/infiniband/hw/hfi1/iowait.c
@@ -81,7 +81,9 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
void iowait_cancel_work(struct iowait *w)
{
cancel_work_sync(&iowait_get_ib_work(w)->iowork);
- cancel_work_sync(&iowait_get_tid_work(w)->iowork);
+ /* Make sure that the iowork for TID RDMA is used */
+ if (iowait_get_tid_work(w)->iowork.func)
+ cancel_work_sync(&iowait_get_tid_work(w)->iowork);
}
/**
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 184dba3c2828..a51bcd2b4391 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -2326,7 +2326,7 @@ struct opa_port_status_req {
__be32 vl_select_mask;
};
-#define VL_MASK_ALL 0x000080ff
+#define VL_MASK_ALL 0x00000000000080ffUL
struct opa_port_status_rsp {
__u8 port_num;
@@ -2625,15 +2625,14 @@ static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
}
static void a0_portstatus(struct hfi1_pportdata *ppd,
- struct opa_port_status_rsp *rsp, u32 vl_select_mask)
+ struct opa_port_status_rsp *rsp)
{
if (!is_bx(ppd->dd)) {
unsigned long vl;
u64 sum_vl_xmit_wait = 0;
- u32 vl_all_mask = VL_MASK_ALL;
+ unsigned long vl_all_mask = VL_MASK_ALL;
- for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
- 8 * sizeof(vl_all_mask)) {
+ for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
u64 tmp = sum_vl_xmit_wait +
read_port_cntr(ppd, C_TX_WAIT_VL,
idx_from_vl(vl));
@@ -2730,12 +2729,12 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
(struct opa_port_status_req *)pmp->data;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct opa_port_status_rsp *rsp;
- u32 vl_select_mask = be32_to_cpu(req->vl_select_mask);
+ unsigned long vl_select_mask = be32_to_cpu(req->vl_select_mask);
unsigned long vl;
size_t response_data_size;
u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
u8 port_num = req->port_num;
- u8 num_vls = hweight32(vl_select_mask);
+ u8 num_vls = hweight64(vl_select_mask);
struct _vls_pctrs *vlinfo;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -2770,7 +2769,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
- rsp->vl_select_mask = cpu_to_be32(vl_select_mask);
+ rsp->vl_select_mask = cpu_to_be32((u32)vl_select_mask);
rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
CNTR_INVALID_VL));
rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
@@ -2841,8 +2840,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
* So in the for_each_set_bit() loop below, we don't need
* any additional checks for vl.
*/
- for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
- 8 * sizeof(vl_select_mask)) {
+ for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
memset(vlinfo, 0, sizeof(*vlinfo));
tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
@@ -2883,7 +2881,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
vfi++;
}
- a0_portstatus(ppd, rsp, vl_select_mask);
+ a0_portstatus(ppd, rsp);
if (resp_len)
*resp_len += response_data_size;
@@ -2930,16 +2928,14 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
return error_counter_summary;
}
-static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
- u32 vl_select_mask)
+static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp)
{
if (!is_bx(ppd->dd)) {
unsigned long vl;
u64 sum_vl_xmit_wait = 0;
- u32 vl_all_mask = VL_MASK_ALL;
+ unsigned long vl_all_mask = VL_MASK_ALL;
- for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
- 8 * sizeof(vl_all_mask)) {
+ for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
u64 tmp = sum_vl_xmit_wait +
read_port_cntr(ppd, C_TX_WAIT_VL,
idx_from_vl(vl));
@@ -2994,7 +2990,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
u64 port_mask;
u8 port_num;
unsigned long vl;
- u32 vl_select_mask;
+ unsigned long vl_select_mask;
int vfi;
u16 link_width;
u16 link_speed;
@@ -3071,8 +3067,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
* So in the for_each_set_bit() loop below, we don't need
* any additional checks for vl.
*/
- for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
- 8 * sizeof(req->vl_select_mask)) {
+ for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
memset(vlinfo, 0, sizeof(*vlinfo));
rsp->vls[vfi].port_vl_xmit_data =
@@ -3120,7 +3115,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
vfi++;
}
- a0_datacounters(ppd, rsp, vl_select_mask);
+ a0_datacounters(ppd, rsp);
if (resp_len)
*resp_len += response_data_size;
@@ -3215,7 +3210,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
struct _vls_ectrs *vlinfo;
unsigned long vl;
u64 port_mask, tmp;
- u32 vl_select_mask;
+ unsigned long vl_select_mask;
int vfi;
req = (struct opa_port_error_counters64_msg *)pmp->data;
@@ -3273,8 +3268,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
vlinfo = &rsp->vls[0];
vfi = 0;
vl_select_mask = be32_to_cpu(req->vl_select_mask);
- for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
- 8 * sizeof(req->vl_select_mask)) {
+ for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
memset(vlinfo, 0, sizeof(*vlinfo));
rsp->vls[vfi].port_vl_xmit_discards =
cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
@@ -3485,7 +3479,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
u64 portn = be64_to_cpu(req->port_select_mask[3]);
u32 counter_select = be32_to_cpu(req->counter_select_mask);
- u32 vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
+ unsigned long vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
unsigned long vl;
if ((nports != 1) || (portn != 1 << port)) {
@@ -3579,8 +3573,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
if (counter_select & CS_UNCORRECTABLE_ERRORS)
write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
- for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
- 8 * sizeof(vl_select_mask)) {
+ for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
if (counter_select & CS_PORT_XMIT_DATA)
write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
@@ -4922,16 +4915,11 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
*/
int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad, size_t in_mad_size,
- struct ib_mad_hdr *out_mad, size_t *out_mad_size,
- u16 *out_mad_pkey_index)
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
{
- switch (in_mad->base_version) {
+ switch (in_mad->mad_hdr.base_version) {
case OPA_MGMT_BASE_VERSION:
- if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
- dev_err(ibdev->dev.parent, "invalid in_mad_size\n");
- return IB_MAD_RESULT_FAILURE;
- }
return hfi1_process_opa_mad(ibdev, mad_flags, port,
in_wc, in_grh,
(struct opa_mad *)in_mad,
@@ -4939,10 +4927,8 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
out_mad_size,
out_mad_pkey_index);
case IB_MGMT_BASE_VERSION:
- return hfi1_process_ib_mad(ibdev, mad_flags, port,
- in_wc, in_grh,
- (const struct ib_mad *)in_mad,
- (struct ib_mad *)out_mad);
+ return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc,
+ in_grh, in_mad, out_mad);
default:
break;
}
diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c
index d920b165d696..db82db497b2c 100644
--- a/drivers/infiniband/hw/hfi1/msix.c
+++ b/drivers/infiniband/hw/hfi1/msix.c
@@ -115,13 +115,11 @@ int msix_initialize(struct hfi1_devdata *dd)
*/
static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
irq_handler_t handler, irq_handler_t thread,
- u32 idx, enum irq_type type)
+ enum irq_type type, const char *name)
{
unsigned long nr;
int irq;
int ret;
- const char *err_info;
- char name[MAX_NAME_SIZE];
struct hfi1_msix_entry *me;
/* Allocate an MSIx vector */
@@ -135,43 +133,15 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
if (nr == dd->msix_info.max_requested)
return -ENOSPC;
- /* Specific verification and determine the name */
- switch (type) {
- case IRQ_GENERAL:
- /* general interrupt must be MSIx vector 0 */
- if (nr) {
- spin_lock(&dd->msix_info.msix_lock);
- __clear_bit(nr, dd->msix_info.in_use_msix);
- spin_unlock(&dd->msix_info.msix_lock);
- dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n",
- nr);
- return -EINVAL;
- }
- snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
- err_info = "general";
- break;
- case IRQ_SDMA:
- snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
- dd->unit, idx);
- err_info = "sdma";
- break;
- case IRQ_RCVCTXT:
- snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
- dd->unit, idx);
- err_info = "receive context";
- break;
- case IRQ_OTHER:
- default:
+ if (type < IRQ_SDMA || type >= IRQ_OTHER)
return -EINVAL;
- }
- name[sizeof(name) - 1] = 0;
irq = pci_irq_vector(dd->pcidev, nr);
ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name);
if (ret) {
dd_dev_err(dd,
- "%s: request for IRQ %d failed, MSIx %d, err %d\n",
- err_info, irq, idx, ret);
+ "%s: request for IRQ %d failed, MSIx %lu, err %d\n",
+ name, irq, nr, ret);
spin_lock(&dd->msix_info.msix_lock);
__clear_bit(nr, dd->msix_info.in_use_msix);
spin_unlock(&dd->msix_info.msix_lock);
@@ -195,17 +165,13 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
return nr;
}
-/**
- * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
- * @rcd: valid rcd context
- *
- */
-int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+static int msix_request_rcd_irq_common(struct hfi1_ctxtdata *rcd,
+ irq_handler_t handler,
+ irq_handler_t thread,
+ const char *name)
{
- int nr;
-
- nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt,
- receive_context_thread, rcd->ctxt, IRQ_RCVCTXT);
+ int nr = msix_request_irq(rcd->dd, rcd, handler, thread,
+ IRQ_RCVCTXT, name);
if (nr < 0)
return nr;
@@ -222,6 +188,22 @@ int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
}
/**
+ * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
+ * @rcd: valid rcd context
+ *
+ */
+int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+{
+ char name[MAX_NAME_SIZE];
+
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
+ rcd->dd->unit, rcd->ctxt);
+
+ return msix_request_rcd_irq_common(rcd, receive_context_interrupt,
+ receive_context_thread, name);
+}
+
+/**
* msix_request_smda_ira() - Helper for getting SDMA IRQ resources
* @sde: valid sdma engine
*
@@ -229,9 +211,12 @@ int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
int msix_request_sdma_irq(struct sdma_engine *sde)
{
int nr;
+ char name[MAX_NAME_SIZE];
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
+ sde->dd->unit, sde->this_idx);
nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL,
- sde->this_idx, IRQ_SDMA);
+ IRQ_SDMA, name);
if (nr < 0)
return nr;
sde->msix_intr = nr;
@@ -241,6 +226,32 @@ int msix_request_sdma_irq(struct sdma_engine *sde)
}
/**
+ * msix_request_general_irq(void) - Helper for getting general IRQ
+ * resources
+ * @dd: valid device data
+ */
+int msix_request_general_irq(struct hfi1_devdata *dd)
+{
+ int nr;
+ char name[MAX_NAME_SIZE];
+
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
+ nr = msix_request_irq(dd, dd, general_interrupt, NULL, IRQ_GENERAL,
+ name);
+ if (nr < 0)
+ return nr;
+
+ /* general interrupt must be MSIx vector 0 */
+ if (nr) {
+ msix_free_irq(dd, (u8)nr);
+ dd_dev_err(dd, "Invalid index %d for GENERAL IRQ\n", nr);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
* enable_sdma_src() - Helper to enable SDMA IRQ srcs
* @dd: valid devdata structure
* @i: index of SDMA engine
@@ -265,10 +276,9 @@ static void enable_sdma_srcs(struct hfi1_devdata *dd, int i)
int msix_request_irqs(struct hfi1_devdata *dd)
{
int i;
- int ret;
+ int ret = msix_request_general_irq(dd);
- ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL);
- if (ret < 0)
+ if (ret)
return ret;
for (i = 0; i < dd->num_sdma; i++) {
diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h
index a514881632a4..1a02ab7971c8 100644
--- a/drivers/infiniband/hw/hfi1/msix.h
+++ b/drivers/infiniband/hw/hfi1/msix.h
@@ -54,6 +54,7 @@
int msix_initialize(struct hfi1_devdata *dd);
int msix_request_irqs(struct hfi1_devdata *dd);
void msix_clean_up_interrupts(struct hfi1_devdata *dd);
+int msix_request_general_irq(struct hfi1_devdata *dd);
int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd);
int msix_request_sdma_irq(struct sdma_engine *sde);
void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 61aa5504d7c3..1a6268d61977 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -161,7 +161,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
return -EINVAL;
}
- dd->kregbase1 = ioremap_nocache(addr, RCV_ARRAY);
+ dd->kregbase1 = ioremap(addr, RCV_ARRAY);
if (!dd->kregbase1) {
dd_dev_err(dd, "UC mapping of kregbase1 failed\n");
return -ENOMEM;
@@ -179,7 +179,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
dd_dev_info(dd, "RcvArray count: %u\n", rcv_array_count);
dd->base2_start = RCV_ARRAY + rcv_array_count * 8;
- dd->kregbase2 = ioremap_nocache(
+ dd->kregbase2 = ioremap(
addr + dd->base2_start,
TXE_PIO_SEND - dd->base2_start);
if (!dd->kregbase2) {
@@ -319,7 +319,9 @@ int pcie_speeds(struct hfi1_devdata *dd)
/*
* bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
*/
- if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
+ if (parent &&
+ (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT ||
+ dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) {
dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
dd->link_gen3_capable = 0;
}
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index cbf7faa5038c..36593f2efe26 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -634,7 +634,7 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id,
u32 config_data, const char *message)
{
u8 i;
- int ret = HCMD_SUCCESS;
+ int ret;
for (i = 0; i < 4; i++) {
ret = load_8051_config(ppd->dd, field_id, i, config_data);
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 024a7c2b6124..f1734e5e9ac4 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -595,11 +595,8 @@ check_s_state:
case IB_WR_SEND_WITH_IMM:
case IB_WR_SEND_WITH_INV:
/* If no credit, return. */
- if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
- qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+ if (!rvt_rc_credit_avail(qp, wqe))
goto bail;
- }
if (len > pmtu) {
qp->s_state = OP(SEND_FIRST);
len = pmtu;
@@ -632,11 +629,8 @@ check_s_state:
goto no_flow_control;
case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */
- if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
- qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+ if (!rvt_rc_credit_avail(qp, wqe))
goto bail;
- }
no_flow_control:
put_ib_reth_vaddr(
wqe->rdma_wr.remote_addr,
@@ -1483,6 +1477,11 @@ static void update_num_rd_atomic(struct rvt_qp *qp, u32 psn,
req->ack_pending = cur_seg - req->comp_seg;
priv->pending_tid_r_segs += req->ack_pending;
qp->s_num_rd_atomic += req->ack_pending;
+ trace_hfi1_tid_req_update_num_rd_atomic(qp, 0,
+ wqe->wr.opcode,
+ wqe->psn,
+ wqe->lpsn,
+ req);
} else {
priv->pending_tid_r_segs += req->total_segs;
qp->s_num_rd_atomic += req->total_segs;
@@ -2210,15 +2209,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
if (qp->s_flags & RVT_S_WAIT_RNR)
goto bail_stop;
rdi = ib_to_rvt(qp->ibqp.device);
- if (qp->s_rnr_retry == 0 &&
- !((rdi->post_parms[wqe->wr.opcode].flags &
- RVT_OPERATION_IGN_RNR_CNT) &&
- qp->s_rnr_retry_cnt == 0)) {
- status = IB_WC_RNR_RETRY_EXC_ERR;
- goto class_b;
+ if (!(rdi->post_parms[wqe->wr.opcode].flags &
+ RVT_OPERATION_IGN_RNR_CNT)) {
+ if (qp->s_rnr_retry == 0) {
+ status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto class_b;
+ }
+ if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
+ qp->s_rnr_retry--;
}
- if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
- qp->s_rnr_retry--;
/*
* The last valid PSN is the previous PSN. For TID RDMA WRITE
@@ -2600,7 +2599,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
* to be sent before sending this one.
*/
e = NULL;
- old_req = 1;
+ old_req = true;
ibp->rvp.n_rc_dupreq++;
spin_lock_irqsave(&qp->s_lock, flags);
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 2395fd4233a7..a51525647ac8 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -65,6 +65,7 @@
#define SDMA_DESCQ_CNT 2048
#define SDMA_DESC_INTR 64
#define INVALID_TAIL 0xffff
+#define SDMA_PAD max_t(size_t, MAX_16B_PADDING, sizeof(u32))
static uint sdma_descq_cnt = SDMA_DESCQ_CNT;
module_param(sdma_descq_cnt, uint, S_IRUGO);
@@ -847,7 +848,7 @@ static const struct rhashtable_params sdma_rht_params = {
.nelem_hint = NR_CPUS_HINT,
.head_offset = offsetof(struct sdma_rht_node, node),
.key_offset = offsetof(struct sdma_rht_node, cpu_id),
- .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
+ .key_len = sizeof_field(struct sdma_rht_node, cpu_id),
.max_size = NR_CPUS,
.min_size = 8,
.automatic_shrinking = true,
@@ -880,8 +881,8 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
cpu_id = smp_processor_id();
rcu_read_lock();
- rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id,
- sdma_rht_params);
+ rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id,
+ sdma_rht_params);
if (rht_node && rht_node->map[vl]) {
struct sdma_rht_map_elem *map = rht_node->map[vl];
@@ -1296,7 +1297,7 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
struct sdma_engine *sde;
if (dd->sdma_pad_dma) {
- dma_free_coherent(&dd->pcidev->dev, 4,
+ dma_free_coherent(&dd->pcidev->dev, SDMA_PAD,
(void *)dd->sdma_pad_dma,
dd->sdma_pad_phys);
dd->sdma_pad_dma = NULL;
@@ -1491,7 +1492,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
}
/* Allocate memory for pad */
- dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, sizeof(u32),
+ dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD,
&dd->sdma_pad_phys, GFP_KERNEL);
if (!dd->sdma_pad_dma) {
dd_dev_err(dd, "failed to allocate SendDMA pad memory\n");
@@ -1526,8 +1527,11 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
}
ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
- if (ret < 0)
+ if (ret < 0) {
+ kfree(tmp_sdma_rht);
goto bail;
+ }
+
dd->sdma_rht = tmp_sdma_rht;
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 996fc298207e..8a2e0d9351e9 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -107,8 +107,6 @@ static u32 mask_generation(u32 a)
* C - Capcode
*/
-static u32 tid_rdma_flow_wt;
-
static void tid_rdma_trigger_resume(struct work_struct *work);
static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
@@ -136,6 +134,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet,
struct tid_rdma_flow *flow,
bool fecn);
+static void validate_r_tid_ack(struct hfi1_qp_priv *priv)
+{
+ if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
+ priv->r_tid_ack = priv->r_tid_tail;
+}
+
+static void tid_rdma_schedule_ack(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ priv->s_flags |= RVT_S_ACK_PENDING;
+ hfi1_schedule_tid_send(qp);
+}
+
+static void tid_rdma_trigger_ack(struct rvt_qp *qp)
+{
+ validate_r_tid_ack(qp->priv);
+ tid_rdma_schedule_ack(qp);
+}
+
static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
{
return
@@ -2574,18 +2592,9 @@ void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp)
hfi1_kern_clear_hw_flow(priv->rcd, qp);
}
-static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd,
- struct hfi1_packet *packet, u8 rcv_type,
- u8 opcode)
+static bool tid_rdma_tid_err(struct hfi1_packet *packet, u8 rcv_type)
{
struct rvt_qp *qp = packet->qp;
- struct hfi1_qp_priv *qpriv = qp->priv;
- u32 ipsn;
- struct ib_other_headers *ohdr = packet->ohdr;
- struct rvt_ack_entry *e;
- struct tid_rdma_request *req;
- struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
- u32 i;
if (rcv_type >= RHF_RCV_TYPE_IB)
goto done;
@@ -2602,41 +2611,9 @@ static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd,
if (rcv_type == RHF_RCV_TYPE_EAGER) {
hfi1_restart_rc(qp, qp->s_last_psn + 1, 1);
hfi1_schedule_send(qp);
- goto done_unlock;
- }
-
- /*
- * For TID READ response, error out QP after freeing the tid
- * resources.
- */
- if (opcode == TID_OP(READ_RESP)) {
- ipsn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn));
- if (cmp_psn(ipsn, qp->s_last_psn) > 0 &&
- cmp_psn(ipsn, qp->s_psn) < 0) {
- hfi1_kern_read_tid_flow_free(qp);
- spin_unlock(&qp->s_lock);
- rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
- goto done;
- }
- goto done_unlock;
}
- /*
- * Error out the qp for TID RDMA WRITE
- */
- hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
- for (i = 0; i < rvt_max_atomic(rdi); i++) {
- e = &qp->s_ack_queue[i];
- if (e->opcode == TID_OP(WRITE_REQ)) {
- req = ack_to_tid_req(e);
- hfi1_kern_exp_rcv_clear_all(req);
- }
- }
- spin_unlock(&qp->s_lock);
- rvt_rc_error(qp, IB_WC_LOC_LEN_ERR);
- goto done;
-
-done_unlock:
+ /* Since no payload is delivered, just drop the packet */
spin_unlock(&qp->s_lock);
done:
return true;
@@ -2687,12 +2664,15 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
u32 fpsn;
lockdep_assert_held(&qp->r_lock);
+ trace_hfi1_rsp_read_kdeth_eflags(qp, ibpsn);
+ trace_hfi1_sender_read_kdeth_eflags(qp);
+ trace_hfi1_tid_read_sender_kdeth_eflags(qp, 0);
+ spin_lock(&qp->s_lock);
/* If the psn is out of valid range, drop the packet */
if (cmp_psn(ibpsn, qp->s_last_psn) < 0 ||
cmp_psn(ibpsn, qp->s_psn) > 0)
- return ret;
+ goto s_unlock;
- spin_lock(&qp->s_lock);
/*
* Note that NAKs implicitly ACK outstanding SEND and RDMA write
* requests and implicitly NAK RDMA read and atomic requests issued
@@ -2740,14 +2720,19 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
wqe = do_rc_completion(qp, wqe, ibp);
if (qp->s_acked == qp->s_tail)
- break;
+ goto s_unlock;
}
+ if (qp->s_acked == qp->s_tail)
+ goto s_unlock;
+
/* Handle the eflags for the request */
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
goto s_unlock;
req = wqe_to_tid_req(wqe);
+ trace_hfi1_tid_req_read_kdeth_eflags(qp, 0, wqe->wr.opcode, wqe->psn,
+ wqe->lpsn, req);
switch (rcv_type) {
case RHF_RCV_TYPE_EXPECTED:
switch (rte) {
@@ -2762,15 +2747,13 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
* packets that could be still in the fabric.
*/
flow = &req->flows[req->clear_tail];
+ trace_hfi1_tid_flow_read_kdeth_eflags(qp,
+ req->clear_tail,
+ flow);
if (priv->s_flags & HFI1_R_TID_SW_PSN) {
diff = cmp_psn(psn,
flow->flow_state.r_next_psn);
if (diff > 0) {
- if (!(qp->r_flags & RVT_R_RDMAR_SEQ))
- restart_tid_rdma_read_req(rcd,
- qp,
- wqe);
-
/* Drop the packet.*/
goto s_unlock;
} else if (diff < 0) {
@@ -2922,7 +2905,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
if (lnh == HFI1_LRH_GRH)
goto r_unlock;
- if (tid_rdma_tid_err(rcd, packet, rcv_type, opcode))
+ if (tid_rdma_tid_err(packet, rcv_type))
goto r_unlock;
}
@@ -2942,8 +2925,15 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
*/
spin_lock(&qp->s_lock);
qpriv = qp->priv;
+ if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID ||
+ qpriv->r_tid_tail == qpriv->r_tid_head)
+ goto unlock;
e = &qp->s_ack_queue[qpriv->r_tid_tail];
+ if (e->opcode != TID_OP(WRITE_REQ))
+ goto unlock;
req = ack_to_tid_req(e);
+ if (req->comp_seg == req->cur_seg)
+ goto unlock;
flow = &req->flows[req->clear_tail];
trace_hfi1_eflags_err_write(qp, rcv_type, rte, psn);
trace_hfi1_rsp_handle_kdeth_eflags(qp, psn);
@@ -3033,10 +3023,7 @@ nak_psn:
qpriv->s_nak_state = IB_NAK_PSN_ERROR;
/* We are NAK'ing the next expected PSN */
qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
- qpriv->s_flags |= RVT_S_ACK_PENDING;
- if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID)
- qpriv->r_tid_ack = qpriv->r_tid_tail;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
}
goto unlock;
}
@@ -3399,18 +3386,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
}
-void hfi1_compute_tid_rdma_flow_wt(void)
+static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
{
/*
* Heuristic for computing the RNR timeout when waiting on the flow
* queue. Rather than a computationaly expensive exact estimate of when
* a flow will be available, we assume that if a QP is at position N in
* the flow queue it has to wait approximately (N + 1) * (number of
- * segments between two sync points), assuming PMTU of 4K. The rationale
- * for this is that flows are released and recycled at each sync point.
+ * segments between two sync points). The rationale for this is that
+ * flows are released and recycled at each sync point.
*/
- tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) /
- TID_RDMA_MAX_SEGMENT_SIZE;
+ return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
}
static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
@@ -3533,7 +3519,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
if (ret) {
- to_seg = tid_rdma_flow_wt *
+ to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
position_in_queue(qpriv,
&rcd->flow_queue);
break;
@@ -3554,7 +3540,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
/*
* If overtaking req->acked_tail, send an RNR NAK. Because the
* QP is not queued in this case, and the issue can only be
- * caused due a delay in scheduling the second leg which we
+ * caused by a delay in scheduling the second leg which we
* cannot estimate, we use a rather arbitrary RNR timeout of
* (MAX_FLOWS / 2) segments
*/
@@ -3562,8 +3548,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
MAX_FLOWS)) {
ret = -EAGAIN;
to_seg = MAX_FLOWS >> 1;
- qpriv->s_flags |= RVT_S_ACK_PENDING;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
break;
}
@@ -4363,8 +4348,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
req);
trace_hfi1_tid_write_rsp_rcv_data(qp);
- if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
- priv->r_tid_ack = priv->r_tid_tail;
+ validate_r_tid_ack(priv);
if (opcode == TID_OP(WRITE_DATA_LAST)) {
release_rdma_sge_mr(e);
@@ -4403,8 +4387,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
}
done:
- priv->s_flags |= RVT_S_ACK_PENDING;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_schedule_ack(qp);
exit:
priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
if (fecn)
@@ -4416,10 +4399,7 @@ send_nak:
if (!priv->s_nak_state) {
priv->s_nak_state = IB_NAK_PSN_ERROR;
priv->s_nak_psn = flow->flow_state.r_next_psn;
- priv->s_flags |= RVT_S_ACK_PENDING;
- if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
- priv->r_tid_ack = priv->r_tid_tail;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
}
goto done;
}
@@ -4509,7 +4489,7 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
struct rvt_swqe *wqe;
struct tid_rdma_request *req;
struct tid_rdma_flow *flow;
- u32 aeth, psn, req_psn, ack_psn, resync_psn, ack_kpsn;
+ u32 aeth, psn, req_psn, ack_psn, flpsn, resync_psn, ack_kpsn;
unsigned long flags;
u16 fidx;
@@ -4538,6 +4518,9 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
ack_kpsn--;
}
+ if (unlikely(qp->s_acked == qp->s_tail))
+ goto ack_op_err;
+
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
@@ -4550,7 +4533,8 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
/* Drop stale ACK/NAK */
- if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0)
+ if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0 ||
+ cmp_psn(req_psn, flow->flow_state.resp_ib_psn) < 0)
goto ack_op_err;
while (cmp_psn(ack_kpsn,
@@ -4649,6 +4633,15 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
*/
fpsn = full_flow_psn(flow, flow->flow_state.spsn);
req->r_ack_psn = psn;
+ /*
+ * If resync_psn points to the last flow PSN for a
+ * segment and the new segment (likely from a new
+ * request) starts with a new generation number, we
+ * need to adjust resync_psn accordingly.
+ */
+ if (flow->flow_state.generation !=
+ (resync_psn >> HFI1_KDETH_BTH_SEQ_SHIFT))
+ resync_psn = mask_psn(fpsn - 1);
flow->resync_npkts +=
delta_psn(mask_psn(resync_psn + 1), fpsn);
/*
@@ -4712,7 +4705,12 @@ done:
switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
IB_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
+ if (!req->flows)
+ break;
flow = &req->flows[req->acked_tail];
+ flpsn = full_flow_psn(flow, flow->flow_state.lpsn);
+ if (cmp_psn(psn, flpsn) > 0)
+ break;
trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail,
flow);
req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
@@ -4958,8 +4956,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
qpriv->resync = true;
/* RESYNC request always gets a TID RDMA ACK. */
qpriv->s_nak_state = 0;
- qpriv->s_flags |= RVT_S_ACK_PENDING;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
bail:
if (fecn)
qp->s_flags |= RVT_S_ECN;
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
index 1c536185261e..6e82df2190b7 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.h
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -17,6 +17,7 @@
#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
+#define TID_RDMA_SEGMENT_SHIFT 18
/*
* Bit definitions for priv->s_flags.
@@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len);
-void hfi1_compute_tid_rdma_flow_wt(void);
-
void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index e00c8a7d559c..b5fc5c6cd52f 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -80,7 +80,7 @@ TRACE_EVENT(hfi1_uctxtdata,
__entry->credits = uctxt->sc->credits;
__entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
__entry->piobase = uctxt->sc->base_addr;
- __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
+ __entry->rcvhdrq_cnt = get_hdrq_cnt(uctxt);
__entry->rcvhdrq_dma = uctxt->rcvhdrq_dma;
__entry->eager_cnt = uctxt->egrbufs.alloced;
__entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma;
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 3cec960e9674..168079ed122c 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -106,19 +106,8 @@ TRACE_EVENT(hfi1_receive_interrupt,
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = rcd->ctxt;
- if (rcd->do_interrupt ==
- &handle_receive_interrupt) {
- __entry->slow_path = 1;
- __entry->dma_rtail = 0xFF;
- } else if (rcd->do_interrupt ==
- &handle_receive_interrupt_dma_rtail){
- __entry->dma_rtail = 1;
- __entry->slow_path = 0;
- } else if (rcd->do_interrupt ==
- &handle_receive_interrupt_nodma_rtail) {
- __entry->dma_rtail = 0;
- __entry->slow_path = 0;
- }
+ __entry->slow_path = hfi1_is_slowpath(rcd);
+ __entry->dma_rtail = get_dma_rtail_setting(rcd);
),
TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d",
__get_str(dev),
diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h
index 4388b594ed1b..985ffa9cc958 100644
--- a/drivers/infiniband/hw/hfi1/trace_tid.h
+++ b/drivers/infiniband/hw/hfi1/trace_tid.h
@@ -138,10 +138,10 @@ TRACE_EVENT(/* put_tid */
TP_ARGS(dd, index, type, pa, order),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd)
- __field(unsigned long, pa);
- __field(u32, index);
- __field(u32, type);
- __field(u16, order);
+ __field(unsigned long, pa)
+ __field(u32, index)
+ __field(u32, type)
+ __field(u16, order)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd);
@@ -627,6 +627,12 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp, index, flow)
);
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_template, hfi1_tid_flow_read_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow)
+);
+
DECLARE_EVENT_CLASS(/* tid_node */
hfi1_tid_node_template,
TP_PROTO(struct rvt_qp *qp, const char *msg, u32 index, u32 base,
@@ -851,6 +857,12 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp, psn)
);
+DEFINE_EVENT(/* event */
+ hfi1_responder_info_template, hfi1_rsp_read_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
DECLARE_EVENT_CLASS(/* sender_info */
hfi1_sender_info_template,
TP_PROTO(struct rvt_qp *qp),
@@ -955,6 +967,12 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp)
);
+DEFINE_EVENT(/* event */
+ hfi1_sender_info_template, hfi1_sender_read_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
DECLARE_EVENT_CLASS(/* tid_read_sender */
hfi1_tid_read_sender_template,
TP_PROTO(struct rvt_qp *qp, char newreq),
@@ -1015,6 +1033,12 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp, newreq)
);
+DEFINE_EVENT(/* event */
+ hfi1_tid_read_sender_template, hfi1_tid_read_sender_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, char newreq),
+ TP_ARGS(qp, newreq)
+);
+
DECLARE_EVENT_CLASS(/* tid_rdma_request */
hfi1_tid_rdma_request_template,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
@@ -1216,6 +1240,13 @@ DEFINE_EVENT(/* event */
);
DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_read_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_make_rc_ack_write,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
@@ -1229,6 +1260,13 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_update_num_rd_atomic,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
DECLARE_EVENT_CLASS(/* rc_rcv_err */
hfi1_rc_rcv_err_template,
TP_PROTO(struct rvt_qp *qp, u32 opcode, u32 psn, int diff),
diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
index 09eb0c9ada00..769e5e4710c6 100644
--- a/drivers/infiniband/hw/hfi1/trace_tx.h
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h
@@ -588,7 +588,7 @@ TRACE_EVENT(hfi1_sdma_user_reqinfo,
TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i),
TP_ARGS(dd, ctxt, subctxt, i),
TP_STRUCT__entry(
- DD_DEV_ENTRY(dd);
+ DD_DEV_ENTRY(dd)
__field(u16, ctxt)
__field(u8, subctxt)
__field(u8, ver_opcode)
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 3592a9ec155e..4da03f823474 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -59,11 +59,11 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
struct tid_user_buf *tbuf,
u32 rcventry, struct tid_group *grp,
u16 pageidx, unsigned int npages);
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node);
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
struct tid_rb_node *tnode);
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node);
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
+static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq);
static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
struct tid_group *grp,
unsigned int start, u16 count,
@@ -73,10 +73,8 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
struct tid_group **grp);
static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
-static struct mmu_rb_ops tid_rb_ops = {
- .insert = tid_rb_insert,
- .remove = tid_rb_remove,
- .invalidate = tid_rb_invalidate
+static const struct mmu_interval_notifier_ops tid_mn_ops = {
+ .invalidate = tid_rb_invalidate,
};
/*
@@ -87,12 +85,8 @@ static struct mmu_rb_ops tid_rb_ops = {
int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt)
{
- struct hfi1_devdata *dd = uctxt->dd;
int ret = 0;
- spin_lock_init(&fd->tid_lock);
- spin_lock_init(&fd->invalid_lock);
-
fd->entry_to_rb = kcalloc(uctxt->expected_count,
sizeof(struct rb_node *),
GFP_KERNEL);
@@ -109,20 +103,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
fd->entry_to_rb = NULL;
return -ENOMEM;
}
-
- /*
- * Register MMU notifier callbacks. If the registration
- * fails, continue without TID caching for this context.
- */
- ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops,
- dd->pport->hfi1_wq,
- &fd->handler);
- if (ret) {
- dd_dev_info(dd,
- "Failed MMU notifier registration %d\n",
- ret);
- ret = 0;
- }
+ fd->use_mn = true;
}
/*
@@ -139,7 +120,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
* init.
*/
spin_lock(&fd->tid_lock);
- if (uctxt->subctxt_cnt && fd->handler) {
+ if (uctxt->subctxt_cnt && fd->use_mn) {
u16 remainder;
fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
@@ -158,18 +139,12 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
- /*
- * The notifier would have been removed when the process'es mm
- * was freed.
- */
- if (fd->handler) {
- hfi1_mmu_rb_unregister(fd->handler);
- } else {
- if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
- unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
- if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
- unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
- }
+ mutex_lock(&uctxt->exp_mutex);
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
+ unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
+ unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
+ mutex_unlock(&uctxt->exp_mutex);
kfree(fd->invalid_tids);
fd->invalid_tids = NULL;
@@ -201,7 +176,7 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd,
if (mapped) {
pci_unmap_single(dd->pcidev, node->dma_addr,
- node->mmu.len, PCI_DMA_FROMDEVICE);
+ node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
pages = &node->pages[idx];
} else {
pages = &tidbuf->pages[idx];
@@ -777,8 +752,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
return -EFAULT;
}
- node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE);
- node->mmu.len = npages * PAGE_SIZE;
+ node->fdata = fd;
node->phys = page_to_phys(pages[0]);
node->npages = npages;
node->rcventry = rcventry;
@@ -787,23 +761,35 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
node->freed = false;
memcpy(node->pages, pages, sizeof(struct page *) * npages);
- if (!fd->handler)
- ret = tid_rb_insert(fd, &node->mmu);
- else
- ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu);
-
- if (ret) {
- hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
- node->rcventry, node->mmu.addr, node->phys, ret);
- pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
- kfree(node);
- return -EFAULT;
+ if (fd->use_mn) {
+ ret = mmu_interval_notifier_insert(
+ &node->notifier, fd->mm,
+ tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE,
+ &tid_mn_ops);
+ if (ret)
+ goto out_unmap;
+ /*
+ * FIXME: This is in the wrong order, the notifier should be
+ * established before the pages are pinned by pin_rcv_pages.
+ */
+ mmu_interval_read_begin(&node->notifier);
}
+ fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node;
+
hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
- node->mmu.addr, node->phys, phys);
+ node->notifier.interval_tree.start, node->phys,
+ phys);
return 0;
+
+out_unmap:
+ hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
+ node->rcventry, node->notifier.interval_tree.start,
+ node->phys, ret);
+ pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
+ kfree(node);
+ return -EFAULT;
}
static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
@@ -833,10 +819,9 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
if (grp)
*grp = node->grp;
- if (!fd->handler)
- cacheless_tid_rb_remove(fd, node);
- else
- hfi1_mmu_rb_remove(fd->handler, &node->mmu);
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(&node->notifier);
+ cacheless_tid_rb_remove(fd, node);
return 0;
}
@@ -847,7 +832,8 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
struct hfi1_devdata *dd = uctxt->dd;
trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
- node->npages, node->mmu.addr, node->phys,
+ node->npages,
+ node->notifier.interval_tree.start, node->phys,
node->dma_addr);
/*
@@ -894,30 +880,29 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
if (!node || node->rcventry != rcventry)
continue;
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(
+ &node->notifier);
cacheless_tid_rb_remove(fd, node);
}
}
}
}
-/*
- * Always return 0 from this function. A non-zero return indicates that the
- * remove operation will be called and that memory should be unpinned.
- * However, the driver cannot unpin out from under PSM. Instead, retain the
- * memory (by returning 0) and inform PSM that the memory is going away. PSM
- * will call back later when it has removed the memory from its list.
- */
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
+static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct hfi1_filedata *fdata = arg;
- struct hfi1_ctxtdata *uctxt = fdata->uctxt;
struct tid_rb_node *node =
- container_of(mnode, struct tid_rb_node, mmu);
+ container_of(mni, struct tid_rb_node, notifier);
+ struct hfi1_filedata *fdata = node->fdata;
+ struct hfi1_ctxtdata *uctxt = fdata->uctxt;
if (node->freed)
- return 0;
+ return true;
- trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr,
+ trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt,
+ node->notifier.interval_tree.start,
node->rcventry, node->npages, node->dma_addr);
node->freed = true;
@@ -946,18 +931,7 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
fdata->invalid_tid_idx++;
}
spin_unlock(&fdata->invalid_lock);
- return 0;
-}
-
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node)
-{
- struct hfi1_filedata *fdata = arg;
- struct tid_rb_node *tnode =
- container_of(node, struct tid_rb_node, mmu);
- u32 base = fdata->uctxt->expected_base;
-
- fdata->entry_to_rb[tnode->rcventry - base] = tnode;
- return 0;
+ return true;
}
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
@@ -968,12 +942,3 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
fdata->entry_to_rb[tnode->rcventry - base] = NULL;
clear_tid_node(fdata, tnode);
}
-
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node)
-{
- struct hfi1_filedata *fdata = arg;
- struct tid_rb_node *tnode =
- container_of(node, struct tid_rb_node, mmu);
-
- cacheless_tid_rb_remove(fdata, tnode);
-}
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 43b105de1d54..6257eee083a1 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
@@ -65,7 +65,8 @@ struct tid_user_buf {
};
struct tid_rb_node {
- struct mmu_rb_node mmu;
+ struct mmu_interval_notifier notifier;
+ struct hfi1_filedata *fdata;
unsigned long phys;
struct tid_group *grp;
u32 rcventry;
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index b89a9b9aef7a..3b505006c0a6 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -106,7 +106,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
int ret;
unsigned int gup_flags = FOLL_LONGTERM | (writable ? FOLL_WRITE : 0);
- ret = get_user_pages_fast(vaddr, npages, gup_flags, pages);
+ ret = pin_user_pages_fast(vaddr, npages, gup_flags, pages);
if (ret < 0)
return ret;
@@ -118,10 +118,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
size_t npages, bool dirty)
{
- if (dirty)
- put_user_pages_dirty_lock(p, npages);
- else
- put_user_pages(p, npages);
+ unpin_user_pages_dirty_lock(p, npages, dirty);
if (mm) { /* during close after signal, mm can be NULL */
atomic64_sub(npages, &mm->pinned_vm);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index fd754a16475a..c2f0d9ba93de 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -179,7 +179,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
pq = kzalloc(sizeof(*pq), GFP_KERNEL);
if (!pq)
return -ENOMEM;
-
pq->dd = dd;
pq->ctxt = uctxt->ctxt;
pq->subctxt = fd->subctxt;
@@ -236,7 +235,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
goto pq_mmu_fail;
}
- fd->pq = pq;
+ rcu_assign_pointer(fd->pq, pq);
fd->cq = cq;
return 0;
@@ -264,8 +263,14 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt);
- pq = fd->pq;
+ spin_lock(&fd->pq_rcu_lock);
+ pq = srcu_dereference_check(fd->pq, &fd->pq_srcu,
+ lockdep_is_held(&fd->pq_rcu_lock));
if (pq) {
+ rcu_assign_pointer(fd->pq, NULL);
+ spin_unlock(&fd->pq_rcu_lock);
+ synchronize_srcu(&fd->pq_srcu);
+ /* at this point there can be no more new requests */
if (pq->handler)
hfi1_mmu_rb_unregister(pq->handler);
iowait_sdma_drain(&pq->busy);
@@ -277,7 +282,8 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
kfree(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
kfree(pq);
- fd->pq = NULL;
+ } else {
+ spin_unlock(&fd->pq_rcu_lock);
}
if (fd->cq) {
vfree(fd->cq->comps);
@@ -321,7 +327,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
{
int ret = 0, i;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct hfi1_user_sdma_pkt_q *pq = fd->pq;
+ struct hfi1_user_sdma_pkt_q *pq =
+ srcu_dereference(fd->pq, &fd->pq_srcu);
struct hfi1_user_sdma_comp_q *cq = fd->cq;
struct hfi1_devdata *dd = pq->dd;
unsigned long idx = 0;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index 4d8510b0fc38..9972e0e6545e 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -110,12 +110,6 @@ enum pkt_q_sdma_state {
SDMA_PKT_Q_DEFERRED,
};
-/*
- * Maximum retry attempts to submit a TX request
- * before putting the process to sleep.
- */
-#define MAX_DEFER_RETRY_COUNT 1
-
#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
#define SDMA_DBG(req, fmt, ...) \
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 646f61545ed6..089e201d7550 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -147,9 +147,6 @@ static int pio_wait(struct rvt_qp *qp,
/* Length of buffer to create verbs txreq cache name */
#define TXREQ_NAME_LEN 24
-/* 16B trailing buffer */
-static const u8 trail_buf[MAX_16B_PADDING];
-
static uint wss_threshold = 80;
module_param(wss_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
@@ -820,8 +817,8 @@ static int build_verbs_tx_desc(
/* add icrc, lt byte, and padding to flit */
if (extra_bytes)
- ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq,
- (void *)trail_buf, extra_bytes);
+ ret = sdma_txadd_daddr(sde->dd, &tx->txreq,
+ sde->dd->sdma_pad_phys, extra_bytes);
bail_txadd:
return ret;
@@ -874,16 +871,17 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
else
pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
- if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
- pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
pbc = create_pbc(ppd,
pbc,
qp->srate_mbps,
vl,
plen);
- /* Update HCRC based on packet opcode */
- pbc = update_hcrc(ps->opcode, pbc);
+ if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
+ pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
+ else
+ /* Update HCRC based on packet opcode */
+ pbc = update_hcrc(ps->opcode, pbc);
}
tx->wqe = qp->s_wqe;
ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
@@ -1030,17 +1028,17 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
else
pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
+ pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
- pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
-
- /* Update HCRC based on packet opcode */
- pbc = update_hcrc(ps->opcode, pbc);
+ else
+ /* Update HCRC based on packet opcode */
+ pbc = update_hcrc(ps->opcode, pbc);
}
if (cb)
iowait_pio_inc(&priv->s_iowait);
pbuf = sc_buffer_alloc(sc, plen, cb, qp);
- if (unlikely(IS_ERR_OR_NULL(pbuf))) {
+ if (IS_ERR_OR_NULL(pbuf)) {
if (cb)
verbs_pio_complete(qp, 0);
if (IS_ERR(pbuf)) {
@@ -1088,7 +1086,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
}
/* add icrc, lt byte, and padding to flit */
if (extra_bytes)
- seg_pio_copy_mid(pbuf, trail_buf, extra_bytes);
+ seg_pio_copy_mid(pbuf, ppd->dd->sdma_pad_dma,
+ extra_bytes);
seg_pio_copy_end(pbuf);
}
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index ae9582ddbc8f..d36e3e14896d 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -107,9 +107,9 @@ enum {
HFI1_HAS_GRH = (1 << 0),
};
-#define LRH_16B_BYTES (FIELD_SIZEOF(struct hfi1_16b_header, lrh))
+#define LRH_16B_BYTES (sizeof_field(struct hfi1_16b_header, lrh))
#define LRH_16B_DWORDS (LRH_16B_BYTES / sizeof(u32))
-#define LRH_9B_BYTES (FIELD_SIZEOF(struct ib_header, lrh))
+#define LRH_9B_BYTES (sizeof_field(struct ib_header, lrh))
#define LRH_9B_DWORDS (LRH_9B_BYTES / sizeof(u32))
/* 24Bits for qpn, upper 8Bits reserved */
@@ -330,9 +330,8 @@ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp);
void hfi1_node_desc_chg(struct hfi1_ibport *ibp);
int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad, size_t in_mad_size,
- struct ib_mad_hdr *out_mad, size_t *out_mad_size,
- u16 *out_mad_pkey_index);
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
/*
* The PSN_MASK and PSN_SHIFT allow for
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index b49e60e8397d..6b14581b9965 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -78,7 +78,7 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
if (ret)
goto done;
- if (uctxt->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(uctxt))
clear_rcvhdrtail(uctxt);
rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index af1b1ffcb38e..7d90b900131b 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -102,13 +102,13 @@ static noinline int build_vnic_ulp_payload(struct sdma_engine *sde,
goto bail_txadd;
for (i = 0; i < skb_shinfo(tx->skb)->nr_frags; i++) {
- struct skb_frag_struct *frag = &skb_shinfo(tx->skb)->frags[i];
+ skb_frag_t *frag = &skb_shinfo(tx->skb)->frags[i];
/* combine physically continuous fragments later? */
ret = sdma_txadd_page(sde->dd,
&tx->txreq,
skb_frag_page(frag),
- frag->page_offset,
+ skb_frag_off(frag),
skb_frag_size(frag));
if (unlikely(ret))
goto bail_txadd;
diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig
index 54782197c717..4921c1e40ccd 100644
--- a/drivers/infiniband/hw/hns/Kconfig
+++ b/drivers/infiniband/hw/hns/Kconfig
@@ -1,8 +1,9 @@
# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_HNS
- bool "HNS RoCE Driver"
+ tristate "HNS RoCE Driver"
depends on NET_VENDOR_HISILICON
depends on ARM64 || (COMPILE_TEST && 64BIT)
+ depends on (HNS_DSAF && HNS_ENET) || HNS3
---help---
This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine
is used in Hisilicon Hip06 and more further ICT SoC based on
@@ -11,8 +12,9 @@ config INFINIBAND_HNS
To compile HIP06 or HIP08 driver as module, choose M here.
config INFINIBAND_HNS_HIP06
- tristate "Hisilicon Hip06 Family RoCE support"
+ bool "Hisilicon Hip06 Family RoCE support"
depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET
+ depends on INFINIBAND_HNS=m || (HNS_DSAF=y && HNS_ENET=y)
---help---
RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and
Hip07 SoC. These RoCE engines are platform devices.
@@ -21,8 +23,9 @@ config INFINIBAND_HNS_HIP06
module will be called hns-roce-hw-v1
config INFINIBAND_HNS_HIP08
- tristate "Hisilicon Hip08 Family RoCE support"
+ bool "Hisilicon Hip08 Family RoCE support"
depends on INFINIBAND_HNS && PCI && HNS3
+ depends on INFINIBAND_HNS=m || HNS3=y
---help---
RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC.
The RoCE engine is a PCI device.
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index 449a2d81319d..e105945b94a1 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -9,8 +9,12 @@ hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o
+ifdef CONFIG_INFINIBAND_HNS_HIP06
hns-roce-hw-v1-objs := hns_roce_hw_v1.o $(hns-roce-objs)
-obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o
+obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v1.o
+endif
+ifdef CONFIG_INFINIBAND_HNS_HIP08
hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o $(hns-roce-objs)
-obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o
+obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o
+endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index cdd2ac24fc2a..8a522e14ef62 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -46,41 +46,38 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
const struct ib_gid_attr *gid_attr;
struct device *dev = hr_dev->dev;
struct hns_roce_ah *ah = to_hr_ah(ibah);
- u16 vlan_tag = 0xffff;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+ u16 vlan_id = 0xffff;
bool vlan_en = false;
int ret;
gid_attr = ah_attr->grh.sgid_attr;
- ret = rdma_read_gid_l2_fields(gid_attr, &vlan_tag, NULL);
+ ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
if (ret)
return ret;
/* Get mac address */
memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
- if (vlan_tag < VLAN_CFI_MASK) {
+ if (vlan_id < VLAN_N_VID) {
vlan_en = true;
- vlan_tag |= (rdma_ah_get_sl(ah_attr) &
+ vlan_id |= (rdma_ah_get_sl(ah_attr) &
HNS_ROCE_VLAN_SL_BIT_MASK) <<
HNS_ROCE_VLAN_SL_SHIFT;
}
- ah->av.port_pd = cpu_to_le32(to_hr_pd(ibah->pd)->pdn |
- (rdma_ah_get_port_num(ah_attr) <<
- HNS_ROCE_PORT_NUM_SHIFT));
+ ah->av.port = rdma_ah_get_port_num(ah_attr);
ah->av.gid_index = grh->sgid_index;
- ah->av.vlan = cpu_to_le16(vlan_tag);
+ ah->av.vlan_id = vlan_id;
ah->av.vlan_en = vlan_en;
- dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index,
- ah->av.vlan);
+ dev_dbg(dev, "gid_index = 0x%x,vlan_id = 0x%x\n", ah->av.gid_index,
+ ah->av.vlan_id);
if (rdma_ah_get_static_rate(ah_attr))
ah->av.stat_rate = IB_RATE_10_GBPS;
memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE);
- ah->av.sl_tclass_flowlabel = cpu_to_le32(rdma_ah_get_sl(ah_attr) <<
- HNS_ROCE_SL_SHIFT);
+ ah->av.sl = rdma_ah_get_sl(ah_attr);
return 0;
}
@@ -91,17 +88,11 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
memset(ah_attr, 0, sizeof(*ah_attr));
- rdma_ah_set_sl(ah_attr, (le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
- HNS_ROCE_SL_SHIFT));
- rdma_ah_set_port_num(ah_attr, (le32_to_cpu(ah->av.port_pd) >>
- HNS_ROCE_PORT_NUM_SHIFT));
+ rdma_ah_set_sl(ah_attr, ah->av.sl);
+ rdma_ah_set_port_num(ah_attr, ah->av.port);
rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate);
- rdma_ah_set_grh(ah_attr, NULL,
- (le32_to_cpu(ah->av.sl_tclass_flowlabel) &
- HNS_ROCE_FLOW_LABEL_MASK), ah->av.gid_index,
- ah->av.hop_limit,
- (le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
- HNS_ROCE_TCLASS_SHIFT));
+ rdma_ah_set_grh(ah_attr, NULL, ah->av.flowlabel,
+ ah->av.gid_index, ah->av.hop_limit, ah->av.tclass);
rdma_ah_set_dgid_raw(ah_attr, ah->av.dgid);
return 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 8c063c598d2a..da574c26e063 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -55,7 +55,7 @@ int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj)
bitmap->last = 0;
*obj |= bitmap->top;
} else {
- ret = -1;
+ ret = -EINVAL;
}
spin_unlock(&bitmap->lock);
@@ -100,7 +100,7 @@ int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
}
*obj |= bitmap->top;
} else {
- ret = -1;
+ ret = -EINVAL;
}
spin_unlock(&bitmap->lock);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 0cd09bf4d7ea..455d533dd7c4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -211,7 +211,6 @@ int hns_roce_cmd_init(struct hns_roce_dev *hr_dev)
mutex_init(&hr_dev->cmd.hcr_mutex);
sema_init(&hr_dev->cmd.poll_sem, 1);
hr_dev->cmd.use_events = 0;
- hr_dev->cmd.toggle = 1;
hr_dev->cmd.max_cmds = CMD_MAX_NUM;
hr_dev->cmd.pool = dma_pool_create("hns_roce_cmd", dev,
HNS_ROCE_MAILBOX_SIZE,
@@ -252,23 +251,15 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev)
hr_cmd->token_mask = CMD_TOKEN_MASK;
hr_cmd->use_events = 1;
- down(&hr_cmd->poll_sem);
-
return 0;
}
void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd;
- int i;
-
- hr_cmd->use_events = 0;
-
- for (i = 0; i < hr_cmd->max_cmds; ++i)
- down(&hr_cmd->event_sem);
kfree(hr_cmd->context);
- up(&hr_cmd->poll_sem);
+ hr_cmd->use_events = 0;
}
struct hns_roce_cmd_mailbox
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index 2b6ac646ca9a..1915bacaded0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -115,12 +115,12 @@ enum {
enum {
/* TPT commands */
- HNS_ROCE_CMD_SW2HW_MPT = 0xd,
- HNS_ROCE_CMD_HW2SW_MPT = 0xf,
+ HNS_ROCE_CMD_CREATE_MPT = 0xd,
+ HNS_ROCE_CMD_DESTROY_MPT = 0xf,
/* CQ commands */
- HNS_ROCE_CMD_SW2HW_CQ = 0x16,
- HNS_ROCE_CMD_HW2SW_CQ = 0x17,
+ HNS_ROCE_CMD_CREATE_CQC = 0x16,
+ HNS_ROCE_CMD_DESTROY_CQC = 0x17,
/* QP/EE commands */
HNS_ROCE_CMD_RST2INIT_QP = 0x19,
@@ -129,14 +129,14 @@ enum {
HNS_ROCE_CMD_RTS2RTS_QP = 0x1c,
HNS_ROCE_CMD_2ERR_QP = 0x1e,
HNS_ROCE_CMD_RTS2SQD_QP = 0x1f,
- HNS_ROCE_CMD_SQD2SQD_QP = 0x38,
HNS_ROCE_CMD_SQD2RTS_QP = 0x20,
HNS_ROCE_CMD_2RST_QP = 0x21,
HNS_ROCE_CMD_QUERY_QP = 0x22,
- HNS_ROCE_CMD_SW2HW_SRQ = 0x70,
+ HNS_ROCE_CMD_SQD2SQD_QP = 0x38,
+ HNS_ROCE_CMD_CREATE_SRQ = 0x70,
HNS_ROCE_CMD_MODIFY_SRQC = 0x72,
HNS_ROCE_CMD_QUERY_SRQC = 0x73,
- HNS_ROCE_CMD_HW2SW_SRQ = 0x74,
+ HNS_ROCE_CMD_DESTROY_SRQ = 0x74,
};
int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 4e50c22a2da4..5ffe4c996ed3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -39,52 +39,8 @@
#include <rdma/hns-abi.h>
#include "hns_roce_common.h"
-static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq)
-{
- struct ib_cq *ibcq = &hr_cq->ib_cq;
-
- ibcq->comp_handler(ibcq, ibcq->cq_context);
-}
-
-static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq,
- enum hns_roce_event event_type)
-{
- struct hns_roce_dev *hr_dev;
- struct ib_event event;
- struct ib_cq *ibcq;
-
- ibcq = &hr_cq->ib_cq;
- hr_dev = to_hr_dev(ibcq->device);
-
- if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID &&
- event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR &&
- event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) {
- dev_err(hr_dev->dev,
- "hns_roce_ib: Unexpected event type 0x%x on CQ %06lx\n",
- event_type, hr_cq->cqn);
- return;
- }
-
- if (ibcq->event_handler) {
- event.device = ibcq->device;
- event.event = IB_EVENT_CQ_ERR;
- event.element.cq = ibcq;
- ibcq->event_handler(&event, ibcq->cq_context);
- }
-}
-
-static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long cq_num)
-{
- return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0,
- HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS);
-}
-
-static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
- struct hns_roce_mtt *hr_mtt,
- struct hns_roce_uar *hr_uar,
- struct hns_roce_cq *hr_cq, int vector)
+static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq)
{
struct hns_roce_cmd_mailbox *mailbox;
struct hns_roce_hem_table *mtt_table;
@@ -102,35 +58,32 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
else
mtt_table = &hr_dev->mr_table.mtt_table;
- mtts = hns_roce_table_find(hr_dev, mtt_table,
- hr_mtt->first_seg, &dma_handle);
- if (!mtts) {
- dev_err(dev, "CQ alloc.Failed to find cq buf addr.\n");
- return -EINVAL;
- }
+ mtts = hns_roce_table_find(hr_dev, mtt_table, hr_cq->mtt.first_seg,
+ &dma_handle);
- if (vector >= hr_dev->caps.num_comp_vectors) {
- dev_err(dev, "CQ alloc.Invalid vector.\n");
+ if (!mtts) {
+ dev_err(dev, "Failed to find mtt for CQ buf.\n");
return -EINVAL;
}
- hr_cq->vector = vector;
ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn);
- if (ret == -1) {
- dev_err(dev, "CQ alloc.Failed to alloc index.\n");
- return -ENOMEM;
+ if (ret) {
+ dev_err(dev, "Num of CQ out of range.\n");
+ return ret;
}
/* Get CQC memory HEM(Hardware Entry Memory) table */
ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn);
if (ret) {
- dev_err(dev, "CQ alloc.Failed to get context mem.\n");
+ dev_err(dev,
+ "Get context mem failed(%d) when CQ(0x%lx) alloc.\n",
+ ret, hr_cq->cqn);
goto err_out;
}
ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL));
if (ret) {
- dev_err(dev, "CQ alloc failed xa_store.\n");
+ dev_err(dev, "Failed to xa_store CQ.\n");
goto err_put;
}
@@ -141,20 +94,21 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
goto err_xa;
}
- hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle,
- nent, vector);
+ hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle);
/* Send mailbox to hw */
- ret = hns_roce_sw2hw_cq(hr_dev, mailbox, hr_cq->cqn);
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_cq->cqn, 0,
+ HNS_ROCE_CMD_CREATE_CQC, HNS_ROCE_CMD_TIMEOUT_MSECS);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
if (ret) {
- dev_err(dev, "CQ alloc.Failed to cmd mailbox.\n");
+ dev_err(dev,
+ "Send cmd mailbox failed(%d) when CQ(0x%lx) alloc.\n",
+ ret, hr_cq->cqn);
goto err_xa;
}
hr_cq->cons_index = 0;
hr_cq->arm_sn = 1;
- hr_cq->uar = hr_uar;
atomic_set(&hr_cq->refcount, 1);
init_completion(&hr_cq->free);
@@ -172,24 +126,17 @@ err_out:
return ret;
}
-static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long cq_num)
-{
- return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
- mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-}
-
-void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
struct device *dev = hr_dev->dev;
int ret;
- ret = hns_roce_hw2sw_cq(hr_dev, NULL, hr_cq->cqn);
+ ret = hns_roce_cmd_mbox(hr_dev, 0, 0, hr_cq->cqn, 1,
+ HNS_ROCE_CMD_DESTROY_CQC,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
if (ret)
- dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret,
+ dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret,
hr_cq->cqn);
xa_erase(&cq_table->array, hr_cq->cqn);
@@ -206,186 +153,243 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
}
-static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
- struct ib_udata *udata,
- struct hns_roce_cq_buf *buf,
- struct ib_umem **umem, u64 buf_addr, int cqe)
+static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+ struct hns_roce_ib_create_cq ucmd,
+ struct ib_udata *udata)
{
- int ret;
- u32 page_shift;
+ struct hns_roce_buf *buf = &hr_cq->buf;
+ struct hns_roce_mtt *mtt = &hr_cq->mtt;
+ struct ib_umem **umem = &hr_cq->umem;
u32 npages;
+ int ret;
- *umem = ib_umem_get(udata, buf_addr, cqe * hr_dev->caps.cq_entry_sz,
- IB_ACCESS_LOCAL_WRITE, 1);
+ *umem = ib_umem_get(&hr_dev->ib_dev, ucmd.buf_addr, buf->size,
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- buf->hr_mtt.mtt_type = MTT_TYPE_CQE;
+ mtt->mtt_type = MTT_TYPE_CQE;
else
- buf->hr_mtt.mtt_type = MTT_TYPE_WQE;
-
- if (hr_dev->caps.cqe_buf_pg_sz) {
- npages = (ib_umem_page_count(*umem) +
- (1 << hr_dev->caps.cqe_buf_pg_sz) - 1) /
- (1 << hr_dev->caps.cqe_buf_pg_sz);
- page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
- ret = hns_roce_mtt_init(hr_dev, npages, page_shift,
- &buf->hr_mtt);
- } else {
- ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem),
- PAGE_SHIFT, &buf->hr_mtt);
- }
+ mtt->mtt_type = MTT_TYPE_WQE;
+
+ npages = DIV_ROUND_UP(ib_umem_page_count(*umem),
+ 1 << hr_dev->caps.cqe_buf_pg_sz);
+ ret = hns_roce_mtt_init(hr_dev, npages, buf->page_shift, mtt);
if (ret)
goto err_buf;
- ret = hns_roce_ib_umem_write_mtt(hr_dev, &buf->hr_mtt, *umem);
+ ret = hns_roce_ib_umem_write_mtt(hr_dev, mtt, *umem);
if (ret)
goto err_mtt;
return 0;
err_mtt:
- hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt);
+ hns_roce_mtt_cleanup(hr_dev, mtt);
err_buf:
ib_umem_release(*umem);
return ret;
}
-static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq_buf *buf, u32 nent)
+static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
+ struct hns_roce_buf *buf = &hr_cq->buf;
+ struct hns_roce_mtt *mtt = &hr_cq->mtt;
int ret;
- u32 page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
- ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz,
- (1 << page_shift) * 2, &buf->hr_buf,
- page_shift);
+ ret = hns_roce_buf_alloc(hr_dev, buf->size, (1 << buf->page_shift) * 2,
+ buf, buf->page_shift);
if (ret)
goto out;
if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- buf->hr_mtt.mtt_type = MTT_TYPE_CQE;
+ mtt->mtt_type = MTT_TYPE_CQE;
else
- buf->hr_mtt.mtt_type = MTT_TYPE_WQE;
+ mtt->mtt_type = MTT_TYPE_WQE;
- ret = hns_roce_mtt_init(hr_dev, buf->hr_buf.npages,
- buf->hr_buf.page_shift, &buf->hr_mtt);
+ ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, mtt);
if (ret)
goto err_buf;
- ret = hns_roce_buf_write_mtt(hr_dev, &buf->hr_mtt, &buf->hr_buf);
+ ret = hns_roce_buf_write_mtt(hr_dev, mtt, buf);
if (ret)
goto err_mtt;
return 0;
err_mtt:
- hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt);
+ hns_roce_mtt_cleanup(hr_dev, mtt);
err_buf:
- hns_roce_buf_free(hr_dev, nent * hr_dev->caps.cq_entry_sz,
- &buf->hr_buf);
+ hns_roce_buf_free(hr_dev, buf->size, buf);
+
out:
return ret;
}
-static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq_buf *buf, int cqe)
+static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
- hns_roce_buf_free(hr_dev, (cqe + 1) * hr_dev->caps.cq_entry_sz,
- &buf->hr_buf);
+ hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf);
}
-int hns_roce_ib_create_cq(struct ib_cq *ib_cq,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+static int create_user_cq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_cq_resp *resp)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
- struct device *dev = hr_dev->dev;
struct hns_roce_ib_create_cq ucmd;
+ struct device *dev = hr_dev->dev;
+ int ret;
+ struct hns_roce_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct hns_roce_ucontext, ibucontext);
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+ dev_err(dev, "Failed to copy_from_udata.\n");
+ return -EFAULT;
+ }
+
+ /* Get user space address, write it into mtt table */
+ ret = get_cq_umem(hr_dev, hr_cq, ucmd, udata);
+ if (ret) {
+ dev_err(dev, "Failed to get_cq_umem.\n");
+ return ret;
+ }
+
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
+ (udata->outlen >= sizeof(*resp))) {
+ ret = hns_roce_db_map_user(context, udata, ucmd.db_addr,
+ &hr_cq->db);
+ if (ret) {
+ dev_err(dev, "cq record doorbell map failed!\n");
+ goto err_mtt;
+ }
+ hr_cq->db_en = 1;
+ resp->cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB;
+ }
+
+ return 0;
+
+err_mtt:
+ hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
+ ib_umem_release(hr_cq->umem);
+
+ return ret;
+}
+
+static int create_kernel_cq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
+ ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1);
+ if (ret)
+ return ret;
+
+ hr_cq->set_ci_db = hr_cq->db.db_record;
+ *hr_cq->set_ci_db = 0;
+ hr_cq->db_en = 1;
+ }
+
+ /* Init mtt table and write buff address to mtt table */
+ ret = alloc_cq_buf(hr_dev, hr_cq);
+ if (ret) {
+ dev_err(dev, "Failed to alloc_cq_buf.\n");
+ goto err_db;
+ }
+
+ hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset +
+ DB_REG_OFFSET * hr_dev->priv_uar.index;
+
+ return 0;
+
+err_db:
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
+ hns_roce_free_db(hr_dev, &hr_cq->db);
+
+ return ret;
+}
+
+static void destroy_user_cq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_cq_resp *resp)
+{
+ struct hns_roce_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct hns_roce_ucontext, ibucontext);
+
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
+ (udata->outlen >= sizeof(*resp)))
+ hns_roce_db_unmap_user(context, &hr_cq->db);
+
+ hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
+ ib_umem_release(hr_cq->umem);
+}
+
+static void destroy_kernel_cq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq)
+{
+ hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
+ free_cq_buf(hr_dev, hr_cq);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
+ hns_roce_free_db(hr_dev, &hr_cq->db);
+}
+
+int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct hns_roce_ib_create_cq_resp resp = {};
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
- struct hns_roce_uar *uar = NULL;
+ struct device *dev = hr_dev->dev;
int vector = attr->comp_vector;
- int cq_entries = attr->cqe;
+ u32 cq_entries = attr->cqe;
int ret;
- struct hns_roce_ucontext *context = rdma_udata_to_drv_context(
- udata, struct hns_roce_ucontext, ibucontext);
if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) {
- dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n",
+ dev_err(dev, "Create CQ failed. entries=%d, max=%d\n",
cq_entries, hr_dev->caps.max_cqes);
return -EINVAL;
}
- if (hr_dev->caps.min_cqes)
- cq_entries = max(cq_entries, hr_dev->caps.min_cqes);
+ if (vector >= hr_dev->caps.num_comp_vectors) {
+ dev_err(dev, "Create CQ failed, vector=%d, max=%d\n",
+ vector, hr_dev->caps.num_comp_vectors);
+ return -EINVAL;
+ }
- cq_entries = roundup_pow_of_two((unsigned int)cq_entries);
- hr_cq->ib_cq.cqe = cq_entries - 1;
+ cq_entries = max(cq_entries, hr_dev->caps.min_cqes);
+ cq_entries = roundup_pow_of_two(cq_entries);
+ hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */
+ hr_cq->cq_depth = cq_entries;
+ hr_cq->vector = vector;
+ hr_cq->buf.size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz;
+ hr_cq->buf.page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
spin_lock_init(&hr_cq->lock);
+ INIT_LIST_HEAD(&hr_cq->sq_list);
+ INIT_LIST_HEAD(&hr_cq->rq_list);
if (udata) {
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
- dev_err(dev, "Failed to copy_from_udata.\n");
- ret = -EFAULT;
- goto err_cq;
- }
-
- /* Get user space address, write it into mtt table */
- ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf,
- &hr_cq->umem, ucmd.buf_addr,
- cq_entries);
+ ret = create_user_cq(hr_dev, hr_cq, udata, &resp);
if (ret) {
- dev_err(dev, "Failed to get_cq_umem.\n");
+ dev_err(dev, "Create cq failed in user mode!\n");
goto err_cq;
}
-
- if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
- (udata->outlen >= sizeof(resp))) {
- ret = hns_roce_db_map_user(context, udata, ucmd.db_addr,
- &hr_cq->db);
- if (ret) {
- dev_err(dev, "cq record doorbell map failed!\n");
- goto err_mtt;
- }
- hr_cq->db_en = 1;
- resp.cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB;
- }
-
- /* Get user space parameters */
- uar = &context->uar;
} else {
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
- ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1);
- if (ret)
- goto err_cq;
-
- hr_cq->set_ci_db = hr_cq->db.db_record;
- *hr_cq->set_ci_db = 0;
- hr_cq->db_en = 1;
- }
-
- /* Init mmt table and write buff address to mtt table */
- ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf,
- cq_entries);
+ ret = create_kernel_cq(hr_dev, hr_cq);
if (ret) {
- dev_err(dev, "Failed to alloc_cq_buf.\n");
- goto err_db;
+ dev_err(dev, "Create cq failed in kernel mode!\n");
+ goto err_cq;
}
-
- uar = &hr_dev->priv_uar;
- hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset +
- DB_REG_OFFSET * uar->index;
}
- /* Allocate cq index, fill cq_context */
- ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt, uar,
- hr_cq, vector);
+ ret = hns_roce_alloc_cqc(hr_dev, hr_cq);
if (ret) {
- dev_err(dev, "Creat CQ .Failed to cq_alloc.\n");
+ dev_err(dev, "Alloc CQ failed(%d).\n", ret);
goto err_dbmap;
}
@@ -398,11 +402,6 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq,
if (!udata && hr_cq->tptr_addr)
*hr_cq->tptr_addr = 0;
- /* Get created cq handler and carry out event */
- hr_cq->comp = hns_roce_ib_cq_comp;
- hr_cq->event = hns_roce_ib_cq_event;
- hr_cq->cq_depth = cq_entries;
-
if (udata) {
resp.cqn = hr_cq->cqn;
ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
@@ -413,29 +412,19 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq,
return 0;
err_cqc:
- hns_roce_free_cq(hr_dev, hr_cq);
+ hns_roce_free_cqc(hr_dev, hr_cq);
err_dbmap:
- if (udata && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
- (udata->outlen >= sizeof(resp)))
- hns_roce_db_unmap_user(context, &hr_cq->db);
-
-err_mtt:
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
- ib_umem_release(hr_cq->umem);
- if (!udata)
- hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf,
- hr_cq->ib_cq.cqe);
-
-err_db:
- if (!udata && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB))
- hns_roce_free_db(hr_dev, &hr_cq->db);
+ if (udata)
+ destroy_user_cq(hr_dev, hr_cq, udata, &resp);
+ else
+ destroy_kernel_cq(hr_dev, hr_cq);
err_cq:
return ret;
}
-void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
@@ -445,8 +434,8 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
return;
}
- hns_roce_free_cq(hr_dev, hr_cq);
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+ hns_roce_free_cqc(hr_dev, hr_cq);
+ hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
ib_umem_release(hr_cq->umem);
if (udata) {
@@ -458,7 +447,7 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
&hr_cq->db);
} else {
/* Free the buff of stored cq */
- hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe);
+ free_cq_buf(hr_dev, hr_cq);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
hns_roce_free_db(hr_dev, &hr_cq->db);
}
@@ -466,38 +455,57 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
{
- struct device *dev = hr_dev->dev;
- struct hns_roce_cq *cq;
+ struct hns_roce_cq *hr_cq;
+ struct ib_cq *ibcq;
- cq = xa_load(&hr_dev->cq_table.array, cqn & (hr_dev->caps.num_cqs - 1));
- if (!cq) {
- dev_warn(dev, "Completion event for bogus CQ 0x%08x\n", cqn);
+ hr_cq = xa_load(&hr_dev->cq_table.array,
+ cqn & (hr_dev->caps.num_cqs - 1));
+ if (!hr_cq) {
+ dev_warn(hr_dev->dev, "Completion event for bogus CQ 0x%06x\n",
+ cqn);
return;
}
- ++cq->arm_sn;
- cq->comp(cq);
+ ++hr_cq->arm_sn;
+ ibcq = &hr_cq->ib_cq;
+ if (ibcq->comp_handler)
+ ibcq->comp_handler(ibcq, ibcq->cq_context);
}
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
{
- struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
struct device *dev = hr_dev->dev;
- struct hns_roce_cq *cq;
+ struct hns_roce_cq *hr_cq;
+ struct ib_event event;
+ struct ib_cq *ibcq;
- cq = xa_load(&cq_table->array, cqn & (hr_dev->caps.num_cqs - 1));
- if (cq)
- atomic_inc(&cq->refcount);
+ hr_cq = xa_load(&hr_dev->cq_table.array,
+ cqn & (hr_dev->caps.num_cqs - 1));
+ if (!hr_cq) {
+ dev_warn(dev, "Async event for bogus CQ 0x%06x\n", cqn);
+ return;
+ }
- if (!cq) {
- dev_warn(dev, "Async event for bogus CQ %08x\n", cqn);
+ if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID &&
+ event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR &&
+ event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) {
+ dev_err(dev, "Unexpected event type 0x%x on CQ 0x%06x\n",
+ event_type, cqn);
return;
}
- cq->event(cq, (enum hns_roce_event)event_type);
+ atomic_inc(&hr_cq->refcount);
+
+ ibcq = &hr_cq->ib_cq;
+ if (ibcq->event_handler) {
+ event.device = ibcq->device;
+ event.element.cq = ibcq;
+ event.event = IB_EVENT_CQ_ERR;
+ ibcq->event_handler(&event, ibcq->cq_context);
+ }
- if (atomic_dec_and_test(&cq->refcount))
- complete(&cq->free);
+ if (atomic_dec_and_test(&hr_cq->refcount))
+ complete(&hr_cq->free);
}
int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c
index c00714c2f16a..bff6abdccfb0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_db.c
+++ b/drivers/infiniband/hw/hns/hns_roce_db.c
@@ -31,7 +31,8 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context,
refcount_set(&page->refcount, 1);
page->user_virt = page_addr;
- page->umem = ib_umem_get(udata, page_addr, PAGE_SIZE, 0, 0);
+ page->umem = ib_umem_get(context->ibucontext.device, page_addr,
+ PAGE_SIZE, 0);
if (IS_ERR(page->umem)) {
ret = PTR_ERR(page->umem);
kfree(page);
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index a548b28aab63..a7c4ff975c28 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -45,16 +45,12 @@
#define HNS_ROCE_MAX_MSG_LEN 0x80000000
-#define HNS_ROCE_ALOGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b))
-
#define HNS_ROCE_IB_MIN_SQ_STRIDE 6
#define HNS_ROCE_BA_SIZE (32 * 4096)
#define BA_BYTE_LEN 8
-#define BITS_PER_BYTE 8
-
/* Hardware specification only for v1 engine */
#define HNS_ROCE_MIN_CQE_NUM 0x40
#define HNS_ROCE_MIN_WQE_NUM 0x20
@@ -84,7 +80,6 @@
#define HNS_ROCE_CEQ_ENTRY_SIZE 0x4
#define HNS_ROCE_AEQ_ENTRY_SIZE 0x10
-/* 4G/4K = 1M */
#define HNS_ROCE_SL_SHIFT 28
#define HNS_ROCE_TCLASS_SHIFT 20
#define HNS_ROCE_FLOW_LABEL_MASK 0xfffff
@@ -110,11 +105,6 @@
#define NODE_DESC_SIZE 64
#define DB_REG_OFFSET 0x1000
-#define SERV_TYPE_RC 0
-#define SERV_TYPE_RD 1
-#define SERV_TYPE_UC 2
-#define SERV_TYPE_UD 3
-
/* Configure to HW for PAGE_SIZE larger than 4KB */
#define PG_SHIFT_OFFSET (PAGE_SHIFT - 12)
@@ -128,6 +118,18 @@
#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4
#define SRQ_DB_REG 0x230
+/* The chip implementation of the consumer index is calculated
+ * according to twice the actual EQ depth
+ */
+#define EQ_DEPTH_COEFF 2
+
+enum {
+ SERV_TYPE_RC,
+ SERV_TYPE_UC,
+ SERV_TYPE_RD,
+ SERV_TYPE_UD,
+};
+
enum {
HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0,
HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1,
@@ -322,7 +324,7 @@ struct hns_roce_hem_table {
unsigned long num_hem;
/* HEM entry record obj total num */
unsigned long num_obj;
- /*Single obj size */
+ /* Single obj size */
unsigned long obj_size;
unsigned long table_chunk_size;
int lowmem;
@@ -343,7 +345,7 @@ struct hns_roce_mtt {
struct hns_roce_buf_region {
int offset; /* page offset */
- u32 count; /* page count*/
+ u32 count; /* page count */
int hopnum; /* addressing hop num */
};
@@ -384,25 +386,25 @@ struct hns_roce_mr {
u64 size; /* Address range of MR */
u32 key; /* Key of MR */
u32 pd; /* PD num of MR */
- u32 access;/* Access permission of MR */
+ u32 access; /* Access permission of MR */
u32 npages;
int enabled; /* MR's active status */
int type; /* MR's register type */
- u64 *pbl_buf;/* MR's PBL space */
+ u64 *pbl_buf; /* MR's PBL space */
dma_addr_t pbl_dma_addr; /* MR's PBL space PA */
- u32 pbl_size;/* PA number in the PBL */
- u64 pbl_ba;/* page table address */
- u32 l0_chunk_last_num;/* L0 last number */
- u32 l1_chunk_last_num;/* L1 last number */
- u64 **pbl_bt_l2;/* PBL BT L2 */
- u64 **pbl_bt_l1;/* PBL BT L1 */
- u64 *pbl_bt_l0;/* PBL BT L0 */
- dma_addr_t *pbl_l2_dma_addr;/* PBL BT L2 dma addr */
- dma_addr_t *pbl_l1_dma_addr;/* PBL BT L1 dma addr */
- dma_addr_t pbl_l0_dma_addr;/* PBL BT L0 dma addr */
- u32 pbl_ba_pg_sz;/* BT chunk page size */
- u32 pbl_buf_pg_sz;/* buf chunk page size */
- u32 pbl_hop_num;/* multi-hop number */
+ u32 pbl_size; /* PA number in the PBL */
+ u64 pbl_ba; /* page table address */
+ u32 l0_chunk_last_num; /* L0 last number */
+ u32 l1_chunk_last_num; /* L1 last number */
+ u64 **pbl_bt_l2; /* PBL BT L2 */
+ u64 **pbl_bt_l1; /* PBL BT L1 */
+ u64 *pbl_bt_l0; /* PBL BT L0 */
+ dma_addr_t *pbl_l2_dma_addr; /* PBL BT L2 dma addr */
+ dma_addr_t *pbl_l1_dma_addr; /* PBL BT L1 dma addr */
+ dma_addr_t pbl_l0_dma_addr; /* PBL BT L0 dma addr */
+ u32 pbl_ba_pg_sz; /* BT chunk page size */
+ u32 pbl_buf_pg_sz; /* buf chunk page size */
+ u32 pbl_hop_num; /* multi-hop number */
};
struct hns_roce_mr_table {
@@ -421,20 +423,19 @@ struct hns_roce_mr_table {
struct hns_roce_wq {
u64 *wrid; /* Work request ID */
spinlock_t lock;
- int wqe_cnt; /* WQE num */
- u32 max_post;
+ u32 wqe_cnt; /* WQE num */
int max_gs;
int offset;
- int wqe_shift;/* WQE size */
+ int wqe_shift; /* WQE size */
u32 head;
u32 tail;
void __iomem *db_reg_l;
};
struct hns_roce_sge {
- int sge_cnt; /* SGE num */
+ int sge_cnt; /* SGE num */
int offset;
- int sge_shift;/* SGE size */
+ int sge_shift; /* SGE size */
};
struct hns_roce_buf_list {
@@ -447,6 +448,7 @@ struct hns_roce_buf {
struct hns_roce_buf_list *page_list;
int nbufs;
u32 npages;
+ u32 size;
int page_shift;
};
@@ -478,22 +480,14 @@ struct hns_roce_db {
int order;
};
-struct hns_roce_cq_buf {
- struct hns_roce_buf hr_buf;
- struct hns_roce_mtt hr_mtt;
-};
-
struct hns_roce_cq {
struct ib_cq ib_cq;
- struct hns_roce_cq_buf hr_buf;
+ struct hns_roce_buf buf;
+ struct hns_roce_mtt mtt;
struct hns_roce_db db;
u8 db_en;
spinlock_t lock;
struct ib_umem *umem;
- void (*comp)(struct hns_roce_cq *cq);
- void (*event)(struct hns_roce_cq *cq, enum hns_roce_event event_type);
-
- struct hns_roce_uar *uar;
u32 cq_depth;
u32 cons_index;
u32 *set_ci_db;
@@ -504,6 +498,10 @@ struct hns_roce_cq {
u32 vector;
atomic_t refcount;
struct completion free;
+ struct list_head sq_list; /* all qps on this send cq */
+ struct list_head rq_list; /* all qps on this recv cq */
+ int is_armed; /* cq is armed */
+ struct list_head node; /* all armed cqs are on a list */
};
struct hns_roce_idx_que {
@@ -517,9 +515,8 @@ struct hns_roce_idx_que {
struct hns_roce_srq {
struct ib_srq ibsrq;
- void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
unsigned long srqn;
- int max;
+ u32 wqe_cnt;
int max_gs;
int wqe_shift;
void __iomem *db_reg_l;
@@ -535,8 +532,8 @@ struct hns_roce_srq {
spinlock_t lock;
int head;
int tail;
- u16 wqe_ctr;
struct mutex mutex;
+ void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
};
struct hns_roce_uar_table {
@@ -569,14 +566,16 @@ struct hns_roce_raq_table {
};
struct hns_roce_av {
- __le32 port_pd;
+ u8 port;
u8 gid_index;
u8 stat_rate;
u8 hop_limit;
- __le32 sl_tclass_flowlabel;
+ u32 flowlabel;
+ u8 sl;
+ u8 tclass;
u8 dgid[HNS_ROCE_GID_SIZE];
u8 mac[ETH_ALEN];
- __le16 vlan;
+ u16 vlan_id;
bool vlan_en;
};
@@ -618,7 +617,6 @@ struct hns_roce_cmdq {
* close device, switch into poll mode(non event mode)
*/
u8 use_events;
- u8 toggle;
};
struct hns_roce_cmd_mailbox {
@@ -652,10 +650,7 @@ struct hns_roce_qp {
u8 rdb_en;
u8 sdb_en;
u32 doorbell_qpn;
- __le32 sq_signal_bits;
- u32 sq_next_wqe;
- int sq_max_wqes_per_wr;
- int sq_spare_wqes;
+ u32 sq_signal_bits;
struct hns_roce_wq sq;
struct ib_umem *umem;
@@ -690,10 +685,9 @@ struct hns_roce_qp {
u32 next_sge;
struct hns_roce_rinl_buf rq_inl_buf;
-};
-
-struct hns_roce_sqp {
- struct hns_roce_qp hr_qp;
+ struct list_head node; /* all qps are on a list */
+ struct list_head rq_node; /* all recv qps are on a list */
+ struct list_head sq_node; /* all send qps are on a list */
};
struct hns_roce_ib_iboe {
@@ -709,7 +703,7 @@ enum {
};
struct hns_roce_ceqe {
- u32 comp;
+ __le32 comp;
};
struct hns_roce_aeqe {
@@ -752,7 +746,7 @@ struct hns_roce_eq {
struct hns_roce_dev *hr_dev;
void __iomem *doorbell;
- int type_flag;/* Aeq:1 ceq:0 */
+ int type_flag; /* Aeq:1 ceq:0 */
int eqn;
u32 entries;
int log_entries;
@@ -798,33 +792,30 @@ struct hns_roce_caps {
int local_ca_ack_delay;
int num_uars;
u32 phy_num_uars;
- u32 max_sq_sg; /* 2 */
- u32 max_sq_inline; /* 32 */
- u32 max_rq_sg; /* 2 */
+ u32 max_sq_sg;
+ u32 max_sq_inline;
+ u32 max_rq_sg;
u32 max_extend_sg;
- int num_qps; /* 256k */
+ int num_qps;
int reserved_qps;
int num_qpc_timer;
int num_cqc_timer;
- u32 max_srq_sg;
int num_srqs;
- u32 max_wqes; /* 16k */
- u32 max_srqs;
+ u32 max_wqes;
u32 max_srq_wrs;
u32 max_srq_sges;
- u32 max_sq_desc_sz; /* 64 */
- u32 max_rq_desc_sz; /* 64 */
+ u32 max_sq_desc_sz;
+ u32 max_rq_desc_sz;
u32 max_srq_desc_sz;
int max_qp_init_rdma;
int max_qp_dest_rdma;
int num_cqs;
- int max_cqes;
- int min_cqes;
+ u32 max_cqes;
+ u32 min_cqes;
u32 min_wqes;
int reserved_cqs;
int reserved_srqs;
- u32 max_srqwqes;
- int num_aeq_vectors; /* 1 */
+ int num_aeq_vectors;
int num_comp_vectors;
int num_other_vectors;
int num_mtpts;
@@ -905,8 +896,14 @@ struct hns_roce_caps {
u32 sl_num;
u32 tsq_buf_pg_sz;
u32 tpq_buf_pg_sz;
- u32 chunk_sz; /* chunk size in non multihop mode*/
+ u32 chunk_sz; /* chunk size in non multihop mode */
u64 flags;
+ u16 default_ceq_max_cnt;
+ u16 default_ceq_period;
+ u16 default_aeq_max_cnt;
+ u16 default_aeq_period;
+ u16 default_aeq_arm_st;
+ u16 default_ceq_arm_st;
};
struct hns_roce_work {
@@ -923,6 +920,12 @@ struct hns_roce_dfx_hw {
int *buffer);
};
+enum hns_roce_device_state {
+ HNS_ROCE_DEVICE_STATE_INITED,
+ HNS_ROCE_DEVICE_STATE_RST_DOWN,
+ HNS_ROCE_DEVICE_STATE_UNINIT,
+};
+
struct hns_roce_hw {
int (*reset)(struct hns_roce_dev *hr_dev, bool enable);
int (*cmq_init)(struct hns_roce_dev *hr_dev);
@@ -950,7 +953,7 @@ struct hns_roce_hw {
int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
void (*write_cqc)(struct hns_roce_dev *hr_dev,
struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
- dma_addr_t dma_handle, int nent, u32 vector);
+ dma_addr_t dma_handle);
int (*set_hem)(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, int obj, int step_idx);
int (*clear_hem)(struct hns_roce_dev *hr_dev,
@@ -991,16 +994,6 @@ struct hns_roce_hw {
const struct ib_device_ops *hns_roce_dev_srq_ops;
};
-enum hns_phy_state {
- HNS_ROCE_PHY_SLEEP = 1,
- HNS_ROCE_PHY_POLLING = 2,
- HNS_ROCE_PHY_DISABLED = 3,
- HNS_ROCE_PHY_TRAINING = 4,
- HNS_ROCE_PHY_LINKUP = 5,
- HNS_ROCE_PHY_LINKERR = 6,
- HNS_ROCE_PHY_TEST = 7
-};
-
struct hns_roce_dev {
struct ib_device ib_dev;
struct platform_device *pdev;
@@ -1015,6 +1008,9 @@ struct hns_roce_dev {
bool dis_db;
unsigned long reset_cnt;
struct hns_roce_ib_iboe iboe;
+ enum hns_roce_device_state state;
+ struct list_head qp_list; /* list of all qps on this dev */
+ spinlock_t qp_list_lock; /* protect qp_list */
struct list_head pgdir_list;
struct mutex pgdir_mutex;
@@ -1045,8 +1041,8 @@ struct hns_roce_dev {
int loop_idc;
u32 sdb_offset;
u32 odb_offset;
- dma_addr_t tptr_dma_addr; /*only for hw v1*/
- u32 tptr_size; /*only for hw v1*/
+ dma_addr_t tptr_dma_addr; /* only for hw v1 */
+ u32 tptr_size; /* only for hw v1 */
const struct hns_roce_hw *hw;
void *priv;
struct workqueue_struct *irq_workq;
@@ -1099,11 +1095,6 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq)
return container_of(ibsrq, struct hns_roce_srq, ibsrq);
}
-static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp)
-{
- return container_of(hr_qp, struct hns_roce_sqp, hr_qp);
-}
-
static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest)
{
__raw_writeq(*(u64 *) val, dest);
@@ -1160,7 +1151,6 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
-int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
@@ -1205,9 +1195,9 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
-int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long mpt_index);
+int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ unsigned long mpt_index);
unsigned long key_to_hw_index(u32 key);
struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
@@ -1264,12 +1254,11 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
__be32 send_ieth(const struct ib_send_wr *wr);
int to_hr_qp_type(int qp_type);
-int hns_roce_ib_create_cq(struct ib_cq *ib_cq,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
-void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
-void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq);
+void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq);
int hns_roce_db_map_user(struct hns_roce_ucontext *context,
struct ib_udata *udata, unsigned long virt,
@@ -1285,6 +1274,7 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index);
+void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
int hns_roce_init(struct hns_roce_dev *hr_dev);
void hns_roce_exit(struct hns_roce_dev *hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index f4da5bd2884f..e82215774032 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -41,29 +41,57 @@
bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
{
- if ((hr_dev->caps.qpc_hop_num && type == HEM_TYPE_QPC) ||
- (hr_dev->caps.mpt_hop_num && type == HEM_TYPE_MTPT) ||
- (hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) ||
- (hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) ||
- (hr_dev->caps.sccc_hop_num && type == HEM_TYPE_SCCC) ||
- (hr_dev->caps.qpc_timer_hop_num && type == HEM_TYPE_QPC_TIMER) ||
- (hr_dev->caps.cqc_timer_hop_num && type == HEM_TYPE_CQC_TIMER) ||
- (hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) ||
- (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) ||
- (hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) ||
- (hr_dev->caps.idx_hop_num && type == HEM_TYPE_IDX))
- return true;
-
- return false;
+ int hop_num = 0;
+
+ switch (type) {
+ case HEM_TYPE_QPC:
+ hop_num = hr_dev->caps.qpc_hop_num;
+ break;
+ case HEM_TYPE_MTPT:
+ hop_num = hr_dev->caps.mpt_hop_num;
+ break;
+ case HEM_TYPE_CQC:
+ hop_num = hr_dev->caps.cqc_hop_num;
+ break;
+ case HEM_TYPE_SRQC:
+ hop_num = hr_dev->caps.srqc_hop_num;
+ break;
+ case HEM_TYPE_SCCC:
+ hop_num = hr_dev->caps.sccc_hop_num;
+ break;
+ case HEM_TYPE_QPC_TIMER:
+ hop_num = hr_dev->caps.qpc_timer_hop_num;
+ break;
+ case HEM_TYPE_CQC_TIMER:
+ hop_num = hr_dev->caps.cqc_timer_hop_num;
+ break;
+ case HEM_TYPE_CQE:
+ hop_num = hr_dev->caps.cqe_hop_num;
+ break;
+ case HEM_TYPE_MTT:
+ hop_num = hr_dev->caps.mtt_hop_num;
+ break;
+ case HEM_TYPE_SRQWQE:
+ hop_num = hr_dev->caps.srqwqe_hop_num;
+ break;
+ case HEM_TYPE_IDX:
+ hop_num = hr_dev->caps.idx_hop_num;
+ break;
+ default:
+ return false;
+ }
+
+ return hop_num ? true : false;
}
static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 start_idx,
- u32 bt_chunk_num)
+ u32 bt_chunk_num, u64 hem_max_num)
{
- int i;
+ u64 check_max_num = start_idx + bt_chunk_num;
+ u64 i;
- for (i = 0; i < bt_chunk_num; i++)
- if (hem[start_idx + i])
+ for (i = start_idx; (i < check_max_num) && (i < hem_max_num); i++)
+ if (hem[i])
return false;
return true;
@@ -92,17 +120,13 @@ static int hns_roce_get_bt_num(u32 table_type, u32 hop_num)
return 0;
}
-int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table, unsigned long *obj,
- struct hns_roce_hem_mhop *mhop)
+static int get_hem_table_config(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_mhop *mhop,
+ u32 type)
{
struct device *dev = hr_dev->dev;
- u32 chunk_ba_num;
- u32 table_idx;
- u32 bt_num;
- u32 chunk_size;
- switch (table->type) {
+ switch (type) {
case HEM_TYPE_QPC:
mhop->buf_chunk_size = 1 << (hr_dev->caps.qpc_buf_pg_sz
+ PAGE_SHIFT);
@@ -193,10 +217,26 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
break;
default:
dev_err(dev, "Table %d not support multi-hop addressing!\n",
- table->type);
+ type);
return -EINVAL;
}
+ return 0;
+}
+
+int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long *obj,
+ struct hns_roce_hem_mhop *mhop)
+{
+ struct device *dev = hr_dev->dev;
+ u32 chunk_ba_num;
+ u32 table_idx;
+ u32 bt_num;
+ u32 chunk_size;
+
+ if (get_hem_table_config(hr_dev, mhop, table->type))
+ return -EINVAL;
+
if (!obj)
return 0;
@@ -324,13 +364,13 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
{
spinlock_t *lock = &hr_dev->bt_cmd_lock;
struct device *dev = hr_dev->dev;
- unsigned long end = 0;
+ long end;
unsigned long flags;
struct hns_roce_hem_iter iter;
void __iomem *bt_cmd;
- u32 bt_cmd_h_val = 0;
- u32 bt_cmd_val[2];
- u32 bt_cmd_l = 0;
+ __le32 bt_cmd_val[2];
+ __le32 bt_cmd_h = 0;
+ __le32 bt_cmd_l = 0;
u64 bt_ba = 0;
int ret = 0;
@@ -340,30 +380,20 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
switch (table->type) {
case HEM_TYPE_QPC:
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC);
- break;
case HEM_TYPE_MTPT:
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S,
- HEM_TYPE_MTPT);
- break;
case HEM_TYPE_CQC:
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC);
- break;
case HEM_TYPE_SRQC:
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S,
- HEM_TYPE_SRQC);
+ roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
+ ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type);
break;
default:
return ret;
}
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
+
+ roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj);
- roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
- roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1);
+ roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
+ roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1);
/* Currently iter only a chunk */
for (hns_roce_hem_first(table->hem[i], &iter);
@@ -375,7 +405,7 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
end = HW_SYNC_TIMEOUT_MSECS;
- while (end) {
+ while (end > 0) {
if (!(readl(bt_cmd) >> BT_CMD_SYNC_SHIFT))
break;
@@ -389,13 +419,13 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
return -EBUSY;
}
- bt_cmd_l = (u32)bt_ba;
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
+ bt_cmd_l = cpu_to_le32(bt_ba);
+ roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S,
bt_ba >> BT_BA_SHIFT);
bt_cmd_val[0] = bt_cmd_l;
- bt_cmd_val[1] = bt_cmd_h_val;
+ bt_cmd_val[1] = bt_cmd_h;
hns_roce_write64_k(bt_cmd_val,
hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
spin_unlock_irqrestore(lock, flags);
@@ -457,6 +487,12 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
return -EINVAL;
}
+ if (unlikely(hem_idx >= table->num_hem)) {
+ dev_err(dev, "Table %d exceed hem limt idx = %llu,max = %lu!\n",
+ table->type, hem_idx, table->num_hem);
+ return -EINVAL;
+ }
+
mutex_lock(&table->mutex);
if (table->hem[hem_idx]) {
@@ -693,7 +729,7 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
if (check_whether_bt_num_2(table->type, hop_num)) {
start_idx = mhop.l0_idx * chunk_ba_num;
if (hns_roce_check_hem_null(table->hem, start_idx,
- chunk_ba_num)) {
+ chunk_ba_num, table->num_hem)) {
if (table->type < HEM_TYPE_MTT &&
hr_dev->hw->clear_hem(hr_dev, table, obj, 0))
dev_warn(dev, "Clear HEM base address failed.\n");
@@ -707,7 +743,7 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
start_idx = mhop.l0_idx * chunk_ba_num * chunk_ba_num +
mhop.l1_idx * chunk_ba_num;
if (hns_roce_check_hem_null(table->hem, start_idx,
- chunk_ba_num)) {
+ chunk_ba_num, table->num_hem)) {
if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1))
dev_warn(dev, "Clear HEM base address failed.\n");
@@ -791,7 +827,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
} else {
u32 seg_size = 64; /* 8 bytes per BA and 8 BA per segment */
- hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop);
+ if (hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop))
+ goto out;
/* mtt mhop */
i = mhop.l0_idx;
j = mhop.l1_idx;
@@ -840,11 +877,13 @@ int hns_roce_table_get_range(struct hns_roce_dev *hr_dev,
{
struct hns_roce_hem_mhop mhop;
unsigned long inc = table->table_chunk_size / table->obj_size;
- unsigned long i;
+ unsigned long i = 0;
int ret;
if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
- hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop);
+ ret = hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop);
+ if (ret)
+ goto fail;
inc = mhop.bt_chunk_size / table->obj_size;
}
@@ -874,7 +913,8 @@ void hns_roce_table_put_range(struct hns_roce_dev *hr_dev,
unsigned long i;
if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
- hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop);
+ if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop))
+ return;
inc = mhop.bt_chunk_size / table->obj_size;
}
@@ -887,7 +927,6 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
unsigned long obj_size, unsigned long nobj,
int use_lowmem)
{
- struct device *dev = hr_dev->dev;
unsigned long obj_per_chunk;
unsigned long num_hem;
@@ -900,99 +939,21 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
if (!table->hem)
return -ENOMEM;
} else {
+ struct hns_roce_hem_mhop mhop = {};
unsigned long buf_chunk_size;
unsigned long bt_chunk_size;
unsigned long bt_chunk_num;
unsigned long num_bt_l0 = 0;
u32 hop_num;
- switch (type) {
- case HEM_TYPE_QPC:
- buf_chunk_size = 1 << (hr_dev->caps.qpc_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.qpc_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.qpc_bt_num;
- hop_num = hr_dev->caps.qpc_hop_num;
- break;
- case HEM_TYPE_MTPT:
- buf_chunk_size = 1 << (hr_dev->caps.mpt_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.mpt_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.mpt_bt_num;
- hop_num = hr_dev->caps.mpt_hop_num;
- break;
- case HEM_TYPE_CQC:
- buf_chunk_size = 1 << (hr_dev->caps.cqc_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.cqc_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.cqc_bt_num;
- hop_num = hr_dev->caps.cqc_hop_num;
- break;
- case HEM_TYPE_SCCC:
- buf_chunk_size = 1 << (hr_dev->caps.sccc_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.sccc_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.sccc_bt_num;
- hop_num = hr_dev->caps.sccc_hop_num;
- break;
- case HEM_TYPE_QPC_TIMER:
- buf_chunk_size = 1 << (hr_dev->caps.qpc_timer_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.qpc_timer_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.qpc_timer_bt_num;
- hop_num = hr_dev->caps.qpc_timer_hop_num;
- break;
- case HEM_TYPE_CQC_TIMER:
- buf_chunk_size = 1 << (hr_dev->caps.cqc_timer_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.cqc_timer_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.cqc_timer_bt_num;
- hop_num = hr_dev->caps.cqc_timer_hop_num;
- break;
- case HEM_TYPE_SRQC:
- buf_chunk_size = 1 << (hr_dev->caps.srqc_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.srqc_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.srqc_bt_num;
- hop_num = hr_dev->caps.srqc_hop_num;
- break;
- case HEM_TYPE_MTT:
- buf_chunk_size = 1 << (hr_dev->caps.mtt_ba_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = buf_chunk_size;
- hop_num = hr_dev->caps.mtt_hop_num;
- break;
- case HEM_TYPE_CQE:
- buf_chunk_size = 1 << (hr_dev->caps.cqe_ba_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = buf_chunk_size;
- hop_num = hr_dev->caps.cqe_hop_num;
- break;
- case HEM_TYPE_SRQWQE:
- buf_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = buf_chunk_size;
- hop_num = hr_dev->caps.srqwqe_hop_num;
- break;
- case HEM_TYPE_IDX:
- buf_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = buf_chunk_size;
- hop_num = hr_dev->caps.idx_hop_num;
- break;
- default:
- dev_err(dev,
- "Table %d not support to init hem table here!\n",
- type);
+ if (get_hem_table_config(hr_dev, &mhop, type))
return -EINVAL;
- }
+
+ buf_chunk_size = mhop.buf_chunk_size;
+ bt_chunk_size = mhop.bt_chunk_size;
+ num_bt_l0 = mhop.ba_l0_num;
+ hop_num = mhop.hop_num;
+
obj_per_chunk = buf_chunk_size / obj_size;
num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk;
bt_chunk_num = bt_chunk_size / BA_BYTE_LEN;
@@ -1075,7 +1036,8 @@ static void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev,
int i;
u64 obj;
- hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop);
+ if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop))
+ return;
buf_chunk_size = table->type < HEM_TYPE_MTT ? mhop.buf_chunk_size :
mhop.bt_chunk_size;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index f1ccb8f35fe5..3bb8f78fb7b0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -59,7 +59,7 @@ enum {
#define HNS_ROCE_HEM_CHUNK_LEN \
((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
- (sizeof(struct scatterlist)))
+ (sizeof(struct scatterlist) + sizeof(void *)))
#define check_whether_bt_num_3(type, hop_num) \
(type < HEM_TYPE_MTT && hop_num == 2)
@@ -102,9 +102,9 @@ struct hns_roce_hem_mhop {
u32 buf_chunk_size;
u32 bt_chunk_size;
u32 ba_l0_num;
- u32 l0_idx;/* level 0 base address table index */
- u32 l1_idx;/* level 1 base address table index */
- u32 l2_idx;/* level 2 base address table index */
+ u32 l0_idx; /* level 0 base address table index */
+ u32 l1_idx; /* level 1 base address table index */
+ u32 l2_idx; /* level 2 base address table index */
};
void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index c07e387a07a3..c6e66586e533 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -73,9 +73,9 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
int ps_opcode = 0, i = 0;
unsigned long flags = 0;
void *wqe = NULL;
- u32 doorbell[2];
+ __le32 doorbell[2];
+ u32 wqe_idx = 0;
int nreq = 0;
- u32 ind = 0;
int ret = 0;
u8 *smac;
int loopback;
@@ -88,7 +88,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
}
spin_lock_irqsave(&qp->sq.lock, flags);
- ind = qp->sq_next_wqe;
+
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
ret = -ENOMEM;
@@ -96,6 +96,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
goto out;
}
+ wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1);
+
if (unlikely(wr->num_sge > qp->sq.max_gs)) {
dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n",
wr->num_sge, qp->sq.max_gs);
@@ -104,9 +106,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
goto out;
}
- wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
- qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] =
- wr->wr_id;
+ wqe = get_send_wqe(qp, wqe_idx);
+ qp->sq.wrid[wqe_idx] = wr->wr_id;
/* Corresponding to the RC and RD type wqe process separately */
if (ibqp->qp_type == IB_QPT_GSI) {
@@ -175,13 +176,11 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
roce_set_field(ud_sq_wqe->u32_36,
UD_SEND_WQE_U32_36_FLOW_LABEL_M,
UD_SEND_WQE_U32_36_FLOW_LABEL_S,
- ah->av.sl_tclass_flowlabel &
- HNS_ROCE_FLOW_LABEL_MASK);
+ ah->av.flowlabel);
roce_set_field(ud_sq_wqe->u32_36,
UD_SEND_WQE_U32_36_PRIORITY_M,
UD_SEND_WQE_U32_36_PRIORITY_S,
- le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
- HNS_ROCE_SL_SHIFT);
+ ah->av.sl);
roce_set_field(ud_sq_wqe->u32_36,
UD_SEND_WQE_U32_36_SGID_INDEX_M,
UD_SEND_WQE_U32_36_SGID_INDEX_S,
@@ -195,8 +194,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
roce_set_field(ud_sq_wqe->u32_40,
UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M,
UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S,
- ah->av.sl_tclass_flowlabel >>
- HNS_ROCE_TCLASS_SHIFT);
+ ah->av.tclass);
memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN);
@@ -213,7 +211,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
cpu_to_le32((wr->sg_list[1].addr) >> 32);
ud_sq_wqe->l_key1 =
cpu_to_le32(wr->sg_list[1].lkey);
- ind++;
} else if (ibqp->qp_type == IB_QPT_RC) {
u32 tmp_len = 0;
@@ -311,7 +308,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
ctrl->flag |= cpu_to_le32(wr->num_sge <<
HNS_ROCE_WQE_SGE_NUM_BIT);
}
- ind++;
}
}
@@ -335,11 +331,10 @@ out:
SQ_DOORBELL_U32_8_QPN_S, qp->doorbell_qpn);
roce_set_bit(sq_db.u32_8, SQ_DOORBELL_HW_SYNC_S, 1);
- doorbell[0] = le32_to_cpu(sq_db.u32_4);
- doorbell[1] = le32_to_cpu(sq_db.u32_8);
+ doorbell[0] = sq_db.u32_4;
+ doorbell[1] = sq_db.u32_8;
- hns_roce_write64_k((__le32 *)doorbell, qp->sq.db_reg_l);
- qp->sq_next_wqe = ind;
+ hns_roce_write64_k(doorbell, qp->sq.db_reg_l);
}
spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -351,22 +346,21 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
{
- int ret = 0;
- int nreq = 0;
- int ind = 0;
- int i = 0;
- u32 reg_val;
- unsigned long flags = 0;
struct hns_roce_rq_wqe_ctrl *ctrl = NULL;
struct hns_roce_wqe_data_seg *scat = NULL;
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct device *dev = &hr_dev->pdev->dev;
struct hns_roce_rq_db rq_db;
- uint32_t doorbell[2] = {0};
+ __le32 doorbell[2] = {0};
+ unsigned long flags = 0;
+ unsigned int wqe_idx;
+ int ret = 0;
+ int nreq = 0;
+ int i = 0;
+ u32 reg_val;
spin_lock_irqsave(&hr_qp->rq.lock, flags);
- ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1);
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (hns_roce_wq_overflow(&hr_qp->rq, nreq,
@@ -376,6 +370,8 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
goto out;
}
+ wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
+
if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n",
wr->num_sge, hr_qp->rq.max_gs);
@@ -384,7 +380,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
goto out;
}
- ctrl = get_recv_wqe(hr_qp, ind);
+ ctrl = get_recv_wqe(hr_qp, wqe_idx);
roce_set_field(ctrl->rwqe_byte_12,
RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M,
@@ -396,9 +392,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
for (i = 0; i < wr->num_sge; i++)
set_data_seg(scat + i, wr->sg_list + i);
- hr_qp->rq.wrid[ind] = wr->wr_id;
-
- ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1);
+ hr_qp->rq.wrid[wqe_idx] = wr->wr_id;
}
out:
@@ -437,11 +431,10 @@ out:
roce_set_bit(rq_db.u32_8, RQ_DOORBELL_U32_8_HW_SYNC_S,
1);
- doorbell[0] = le32_to_cpu(rq_db.u32_4);
- doorbell[1] = le32_to_cpu(rq_db.u32_8);
+ doorbell[0] = rq_db.u32_4;
+ doorbell[1] = rq_db.u32_8;
- hns_roce_write64_k((__le32 *)doorbell,
- hr_qp->rq.db_reg_l);
+ hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l);
}
}
spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
@@ -715,7 +708,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
struct ib_cq *cq;
struct ib_pd *pd;
union ib_gid dgid;
- u64 subnet_prefix;
+ __be64 subnet_prefix;
int attr_mask = 0;
int ret;
int i, j;
@@ -736,7 +729,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
if (!cq)
return -ENOMEM;
- ret = hns_roce_ib_create_cq(cq, &cq_init_attr, NULL);
+ ret = hns_roce_create_cq(cq, &cq_init_attr, NULL);
if (ret) {
dev_err(dev, "Create cq for reserved loop qp failed!");
goto alloc_cq_failed;
@@ -872,7 +865,7 @@ alloc_pd_failed:
kfree(pd);
alloc_mem_failed:
- hns_roce_ib_destroy_cq(cq, NULL);
+ hns_roce_destroy_cq(cq, NULL);
alloc_cq_failed:
kfree(cq);
return ret;
@@ -901,7 +894,7 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
i, ret);
}
- hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL);
+ hns_roce_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL);
kfree(&free_mr->mr_free_cq->ib_cq);
hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd, NULL);
kfree(&free_mr->mr_free_pd->ibpd);
@@ -971,7 +964,7 @@ static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev)
struct hns_roce_free_mr *free_mr;
struct hns_roce_v1_priv *priv;
struct completion comp;
- unsigned long end = HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS;
+ long end = HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS;
priv = (struct hns_roce_v1_priv *)hr_dev->priv;
free_mr = &priv->free_mr;
@@ -991,7 +984,7 @@ static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev)
queue_work(free_mr->free_mr_wq, &(lp_qp_work->work));
- while (end) {
+ while (end > 0) {
if (try_wait_for_completion(&comp))
return 0;
msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE);
@@ -1109,7 +1102,7 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev,
struct hns_roce_free_mr *free_mr;
struct hns_roce_v1_priv *priv;
struct completion comp;
- unsigned long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS;
+ long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS;
unsigned long start = jiffies;
int npages;
int ret = 0;
@@ -1118,9 +1111,10 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev,
free_mr = &priv->free_mr;
if (mr->enabled) {
- if (hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key)
- & (hr_dev->caps.num_mtpts - 1)))
- dev_warn(dev, "HW2SW_MPT failed!\n");
+ if (hns_roce_hw_destroy_mpt(hr_dev, NULL,
+ key_to_hw_index(mr->key) &
+ (hr_dev->caps.num_mtpts - 1)))
+ dev_warn(dev, "DESTROY_MPT failed!\n");
}
mr_work = kzalloc(sizeof(*mr_work), GFP_KERNEL);
@@ -1139,7 +1133,7 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev,
queue_work(free_mr->free_mr_wq, &(mr_work->work));
- while (end) {
+ while (end > 0) {
if (try_wait_for_completion(&comp))
goto free_mr;
msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
@@ -1983,8 +1977,7 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
static void *get_cqe(struct hns_roce_cq *hr_cq, int n)
{
- return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
- n * HNS_ROCE_V1_CQE_ENTRY_SIZE);
+ return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V1_CQE_ENTRY_SIZE);
}
static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n)
@@ -1993,7 +1986,7 @@ static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n)
/* Get cqe when Owner bit is Conversely with the MSB of cons_idx */
return (roce_get_bit(hr_cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S) ^
- !!(n & (hr_cq->ib_cq.cqe + 1))) ? hr_cqe : NULL;
+ !!(n & hr_cq->cq_depth)) ? hr_cqe : NULL;
}
static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq)
@@ -2076,8 +2069,7 @@ static void hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
struct hns_roce_cq *hr_cq, void *mb_buf,
- u64 *mtts, dma_addr_t dma_handle, int nent,
- u32 vector)
+ u64 *mtts, dma_addr_t dma_handle)
{
struct hns_roce_cq_context *cq_context = NULL;
struct hns_roce_buf_list *tptr_buf;
@@ -2112,9 +2104,9 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
roce_set_field(cq_context->cqc_byte_12,
CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M,
CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S,
- ilog2((unsigned int)nent));
+ ilog2(hr_cq->cq_depth));
roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M,
- CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector);
+ CQ_CONTEXT_CQC_BYTE_12_CEQN_S, hr_cq->vector);
cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0]));
@@ -2165,7 +2157,7 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq,
{
struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
u32 notification_flag;
- __le32 doorbell[2];
+ __le32 doorbell[2] = {};
notification_flag = (flags & IB_CQ_SOLICITED_MASK) ==
IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL;
@@ -2430,7 +2422,8 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
{
struct device *dev = &hr_dev->pdev->dev;
struct hns_roce_v1_priv *priv;
- unsigned long end = 0, flags = 0;
+ unsigned long flags = 0;
+ long end = HW_SYNC_TIMEOUT_MSECS;
__le32 bt_cmd_val[2] = {0};
void __iomem *bt_cmd;
u64 bt_ba = 0;
@@ -2439,18 +2432,12 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
switch (table->type) {
case HEM_TYPE_QPC:
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC);
bt_ba = priv->bt_table.qpc_buf.map >> 12;
break;
case HEM_TYPE_MTPT:
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_MTPT);
bt_ba = priv->bt_table.mtpt_buf.map >> 12;
break;
case HEM_TYPE_CQC:
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC);
bt_ba = priv->bt_table.cqc_buf.map >> 12;
break;
case HEM_TYPE_SRQC:
@@ -2459,6 +2446,8 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
default:
return 0;
}
+ roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
+ ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type);
roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj);
roce_set_bit(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
@@ -2468,7 +2457,6 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
- end = HW_SYNC_TIMEOUT_MSECS;
while (1) {
if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) {
if (!end) {
@@ -2484,7 +2472,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
end -= HW_SYNC_SLEEP_TIME_INTERVAL;
}
- bt_cmd_val[0] = (__le32)bt_ba;
+ bt_cmd_val[0] = cpu_to_le32(bt_ba);
roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32);
hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
@@ -2627,7 +2615,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port);
roce_set_bit(context->qp1c_bytes_16,
QP1C_BYTES_16_SIGNALING_TYPE_S,
- le32_to_cpu(hr_qp->sq_signal_bits));
+ hr_qp->sq_signal_bits);
roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S,
1);
roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S,
@@ -2710,7 +2698,6 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
hr_qp->rq.tail = 0;
hr_qp->sq.head = 0;
hr_qp->sq.tail = 0;
- hr_qp->sq_next_wqe = 0;
}
kfree(context);
@@ -2933,7 +2920,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
1);
roce_set_bit(context->qpc_bytes_32,
QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
- le32_to_cpu(hr_qp->sq_signal_bits));
+ hr_qp->sq_signal_bits);
port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) :
hr_qp->port;
@@ -3324,7 +3311,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
hr_qp->rq.tail = 0;
hr_qp->sq.head = 0;
hr_qp->sq.tail = 0;
- hr_qp->sq_next_wqe = 0;
}
out:
kfree(context);
@@ -3578,7 +3564,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148,
QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M,
QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S);
- qp_attr->rnr_retry = (u8)context->rnr_retry;
+ qp_attr->rnr_retry = (u8)le32_to_cpu(context->rnr_retry);
done:
qp_attr->cur_qp_state = qp_attr->qp_state;
@@ -3623,14 +3609,18 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
if (ret)
return ret;
- send_cq = to_hr_cq(hr_qp->ibqp.send_cq);
- recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq);
+ send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL;
+ recv_cq = hr_qp->ibqp.recv_cq ? to_hr_cq(hr_qp->ibqp.recv_cq) : NULL;
hns_roce_lock_cqs(send_cq, recv_cq);
if (!udata) {
- __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ?
- to_hr_srq(hr_qp->ibqp.srq) : NULL);
- if (send_cq != recv_cq)
+ if (recv_cq)
+ __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn,
+ (hr_qp->ibqp.srq ?
+ to_hr_srq(hr_qp->ibqp.srq) :
+ NULL));
+
+ if (send_cq && send_cq != recv_cq)
__hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL);
}
hns_roce_unlock_cqs(send_cq, recv_cq);
@@ -3652,10 +3642,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
}
- if (hr_qp->ibqp.qp_type == IB_QPT_RC)
- kfree(hr_qp);
- else
- kfree(hr_to_hr_sqp(hr_qp));
+ kfree(hr_qp);
return 0;
}
@@ -3666,10 +3653,9 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
struct device *dev = &hr_dev->pdev->dev;
u32 cqe_cnt_ori;
u32 cqe_cnt_cur;
- u32 cq_buf_size;
int wait_time = 0;
- hns_roce_free_cq(hr_dev, hr_cq);
+ hns_roce_free_cqc(hr_dev, hr_cq);
/*
* Before freeing cq buffer, we need to ensure that the outstanding CQE
@@ -3694,13 +3680,12 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
wait_time++;
}
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+ hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
ib_umem_release(hr_cq->umem);
if (!udata) {
/* Free the buff of stored cq */
- cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz;
- hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf);
+ hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf);
}
}
@@ -4021,7 +4006,8 @@ static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev,
++eq->cons_index;
ceqes_found = 1;
- if (eq->cons_index > 2 * hr_dev->caps.ceqe_depth - 1) {
+ if (eq->cons_index >
+ EQ_DEPTH_COEFF * hr_dev->caps.ceqe_depth - 1) {
dev_warn(&eq->hr_dev->pdev->dev,
"cons_index overflow, set back to 0.\n");
eq->cons_index = 0;
@@ -4501,19 +4487,13 @@ static const struct acpi_device_id hns_roce_acpi_match[] = {
};
MODULE_DEVICE_TABLE(acpi, hns_roce_acpi_match);
-static int hns_roce_node_match(struct device *dev, const void *fwnode)
-{
- return dev->fwnode == fwnode;
-}
-
static struct
platform_device *hns_roce_find_pdev(struct fwnode_handle *fwnode)
{
struct device *dev;
/* get the 'device' corresponding to the matching 'fwnode' */
- dev = bus_find_device(&platform_bus_type, NULL,
- fwnode, hns_roce_node_match);
+ dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
/* get the platform device */
return dev ? to_platform_device(dev) : NULL;
}
@@ -4524,7 +4504,6 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
struct platform_device *pdev = NULL;
struct net_device *netdev = NULL;
struct device_node *net_node;
- struct resource *res;
int port_cnt = 0;
u8 phy_port;
int ret;
@@ -4563,8 +4542,7 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
}
/* get the mapped register base address */
- res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0);
- hr_dev->reg_base = devm_ioremap_resource(dev, res);
+ hr_dev->reg_base = devm_platform_ioremap_resource(hr_dev->pdev, 0);
if (IS_ERR(hr_dev->reg_base))
return PTR_ERR(hr_dev->reg_base);
@@ -4639,10 +4617,8 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
/* fetch the interrupt numbers */
for (i = 0; i < HNS_ROCE_V1_MAX_IRQ_NUM; i++) {
hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i);
- if (hr_dev->irq[i] <= 0) {
- dev_err(dev, "platform get of irq[=%d] failed!\n", i);
+ if (hr_dev->irq[i] <= 0)
return -EINVAL;
- }
}
return 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index b76e3beeafb8..12c4cd8e9378 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -63,20 +63,15 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
struct hns_roce_mr *mr = to_hr_mr(wr->mr);
/* use ib_access_flags */
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
+ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
wr->access & IB_ACCESS_MW_BIND ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
+ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_FRMR_WQE_BYTE_4_RR_S,
+ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RR_S,
wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_FRMR_WQE_BYTE_4_RW_S,
+ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RW_S,
wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_FRMR_WQE_BYTE_4_LW_S,
+ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_LW_S,
wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
/* Data structure reuse may lead to confusion */
@@ -110,7 +105,7 @@ static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
}
static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
- unsigned int *sge_ind)
+ unsigned int *sge_ind, int valid_num_sge)
{
struct hns_roce_v2_wqe_data_seg *dseg;
struct ib_sge *sg;
@@ -123,7 +118,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE;
- extend_sge_num = wr->num_sge - num_in_wqe;
+ extend_sge_num = valid_num_sge - num_in_wqe;
sg = wr->sg_list + num_in_wqe;
shift = qp->hr_buf.page_shift;
@@ -159,14 +154,16 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
void *wqe, unsigned int *sge_ind,
+ int valid_num_sge,
const struct ib_send_wr **bad_wr)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_v2_wqe_data_seg *dseg = wqe;
struct hns_roce_qp *qp = to_hr_qp(ibqp);
+ int j = 0;
int i;
- if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) {
+ if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) {
if (le32_to_cpu(rc_sq_wqe->msg_len) >
hr_dev->caps.max_sq_inline) {
*bad_wr = wr;
@@ -190,7 +187,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
1);
} else {
- if (wr->num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) {
+ if (valid_num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) {
for (i = 0; i < wr->num_sge; i++) {
if (likely(wr->sg_list[i].length)) {
set_data_seg_v2(dseg, wr->sg_list + i);
@@ -203,19 +200,21 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
(*sge_ind) & (qp->sge.sge_cnt - 1));
- for (i = 0; i < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) {
+ for (i = 0; i < wr->num_sge &&
+ j < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) {
if (likely(wr->sg_list[i].length)) {
set_data_seg_v2(dseg, wr->sg_list + i);
dseg++;
+ j++;
}
}
- set_extend_sge(qp, wr, sge_ind);
+ set_extend_sge(qp, wr, sge_ind, valid_num_sge);
}
roce_set_field(rc_sq_wqe->byte_16,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, wr->num_sge);
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
}
return 0;
@@ -226,6 +225,30 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
int attr_mask, enum ib_qp_state cur_state,
enum ib_qp_state new_state);
+static int check_send_valid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ struct ib_qp *ibqp = &hr_qp->ibqp;
+ struct device *dev = hr_dev->dev;
+
+ if (unlikely(ibqp->qp_type != IB_QPT_RC &&
+ ibqp->qp_type != IB_QPT_GSI &&
+ ibqp->qp_type != IB_QPT_UD)) {
+ dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type);
+ return -EOPNOTSUPP;
+ } else if (unlikely(hr_qp->state == IB_QPS_RESET ||
+ hr_qp->state == IB_QPS_INIT ||
+ hr_qp->state == IB_QPS_RTR)) {
+ dev_err(dev, "Post WQE fail, QP state %d!\n", hr_qp->state);
+ return -EINVAL;
+ } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) {
+ dev_err(dev, "Post WQE fail, dev state %d!\n", hr_dev->state);
+ return -EIO;
+ }
+
+ return 0;
+}
+
static int hns_roce_v2_post_send(struct ib_qp *ibqp,
const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
@@ -239,38 +262,31 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
struct device *dev = hr_dev->dev;
struct hns_roce_v2_db sq_db;
struct ib_qp_attr attr;
- unsigned int sge_ind = 0;
unsigned int owner_bit;
+ unsigned int sge_idx;
+ unsigned int wqe_idx;
unsigned long flags;
- unsigned int ind;
+ int valid_num_sge;
void *wqe = NULL;
bool loopback;
int attr_mask;
u32 tmp_len;
- int ret = 0;
u32 hr_op;
u8 *smac;
int nreq;
+ int ret;
int i;
- if (unlikely(ibqp->qp_type != IB_QPT_RC &&
- ibqp->qp_type != IB_QPT_GSI &&
- ibqp->qp_type != IB_QPT_UD)) {
- dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type);
- *bad_wr = wr;
- return -EOPNOTSUPP;
- }
+ spin_lock_irqsave(&qp->sq.lock, flags);
- if (unlikely(qp->state == IB_QPS_RESET || qp->state == IB_QPS_INIT ||
- qp->state == IB_QPS_RTR)) {
- dev_err(dev, "Post WQE fail, QP state %d err!\n", qp->state);
+ ret = check_send_valid(hr_dev, qp);
+ if (ret) {
*bad_wr = wr;
- return -EINVAL;
+ nreq = 0;
+ goto out;
}
- spin_lock_irqsave(&qp->sq.lock, flags);
- ind = qp->sq_next_wqe;
- sge_ind = qp->next_sge;
+ sge_idx = qp->next_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
@@ -279,6 +295,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
goto out;
}
+ wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1);
+
if (unlikely(wr->num_sge > qp->sq.max_gs)) {
dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n",
wr->num_sge, qp->sq.max_gs);
@@ -287,14 +305,20 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
goto out;
}
- wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
- qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] =
- wr->wr_id;
-
+ wqe = get_send_wqe(qp, wqe_idx);
+ qp->sq.wrid[wqe_idx] = wr->wr_id;
owner_bit =
~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1);
+ valid_num_sge = 0;
tmp_len = 0;
+ for (i = 0; i < wr->num_sge; i++) {
+ if (likely(wr->sg_list[i].length)) {
+ tmp_len += wr->sg_list[i].length;
+ valid_num_sge++;
+ }
+ }
+
/* Corresponding to the QP type, wqe process separately */
if (ibqp->qp_type == IB_QPT_GSI) {
ud_sq_wqe = wqe;
@@ -330,9 +354,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
V2_UD_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_SEND);
- for (i = 0; i < wr->num_sge; i++)
- tmp_len += wr->sg_list[i].length;
-
ud_sq_wqe->msg_len =
cpu_to_le32(le32_to_cpu(ud_sq_wqe->msg_len) + tmp_len);
@@ -368,12 +389,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
roce_set_field(ud_sq_wqe->byte_16,
V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S,
- wr->num_sge);
+ valid_num_sge);
roce_set_field(ud_sq_wqe->byte_20,
V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
- sge_ind & (qp->sge.sge_cnt - 1));
+ sge_idx & (qp->sge.sge_cnt - 1));
roce_set_field(ud_sq_wqe->byte_24,
V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
@@ -389,7 +410,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
roce_set_field(ud_sq_wqe->byte_36,
V2_UD_SEND_WQE_BYTE_36_VLAN_M,
V2_UD_SEND_WQE_BYTE_36_VLAN_S,
- le16_to_cpu(ah->av.vlan));
+ ah->av.vlan_id);
roce_set_field(ud_sq_wqe->byte_36,
V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M,
V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S,
@@ -397,18 +418,15 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
roce_set_field(ud_sq_wqe->byte_36,
V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
V2_UD_SEND_WQE_BYTE_36_TCLASS_S,
- ah->av.sl_tclass_flowlabel >>
- HNS_ROCE_TCLASS_SHIFT);
+ ah->av.tclass);
roce_set_field(ud_sq_wqe->byte_40,
V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S,
- ah->av.sl_tclass_flowlabel &
- HNS_ROCE_FLOW_LABEL_MASK);
+ ah->av.flowlabel);
roce_set_field(ud_sq_wqe->byte_40,
V2_UD_SEND_WQE_BYTE_40_SL_M,
V2_UD_SEND_WQE_BYTE_40_SL_S,
- le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
- HNS_ROCE_SL_SHIFT);
+ ah->av.sl);
roce_set_field(ud_sq_wqe->byte_40,
V2_UD_SEND_WQE_BYTE_40_PORTN_M,
V2_UD_SEND_WQE_BYTE_40_PORTN_S,
@@ -426,13 +444,10 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0],
GID_LEN_V2);
- set_extend_sge(qp, wr, &sge_ind);
- ind++;
+ set_extend_sge(qp, wr, &sge_idx, valid_num_sge);
} else if (ibqp->qp_type == IB_QPT_RC) {
rc_sq_wqe = wqe;
memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
- for (i = 0; i < wr->num_sge; i++)
- tmp_len += wr->sg_list[i].length;
rc_sq_wqe->msg_len =
cpu_to_le32(le32_to_cpu(rc_sq_wqe->msg_len) + tmp_len);
@@ -553,15 +568,14 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
roce_set_field(rc_sq_wqe->byte_16,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
- wr->num_sge);
+ valid_num_sge);
} else if (wr->opcode != IB_WR_REG_MR) {
ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
- wqe, &sge_ind, bad_wr);
+ wqe, &sge_idx,
+ valid_num_sge, bad_wr);
if (ret)
goto out;
}
-
- ind++;
} else {
dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -591,8 +605,7 @@ out:
hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l);
- qp->sq_next_wqe = ind;
- qp->next_sge = sge_ind;
+ qp->next_sge = sge_idx;
if (qp->state == IB_QPS_ERR) {
attr_mask = IB_QP_STATE;
@@ -613,6 +626,17 @@ out:
return ret;
}
+static int check_recv_valid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
+ return -EIO;
+ else if (hr_qp->state == IB_QPS_RESET)
+ return -EINVAL;
+
+ return 0;
+}
+
static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
@@ -626,18 +650,18 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
unsigned long flags;
void *wqe = NULL;
int attr_mask;
- int ret = 0;
+ u32 wqe_idx;
int nreq;
- int ind;
+ int ret;
int i;
spin_lock_irqsave(&hr_qp->rq.lock, flags);
- ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1);
- if (hr_qp->state == IB_QPS_RESET) {
- spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
+ ret = check_recv_valid(hr_dev, hr_qp);
+ if (ret) {
*bad_wr = wr;
- return -EINVAL;
+ nreq = 0;
+ goto out;
}
for (nreq = 0; wr; ++nreq, wr = wr->next) {
@@ -648,6 +672,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
goto out;
}
+ wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
+
if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n",
wr->num_sge, hr_qp->rq.max_gs);
@@ -656,7 +682,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
goto out;
}
- wqe = get_recv_wqe(hr_qp, ind);
+ wqe = get_recv_wqe(hr_qp, wqe_idx);
dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
for (i = 0; i < wr->num_sge; i++) {
if (!wr->sg_list[i].length)
@@ -672,8 +698,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
/* rq support inline data */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
- sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list;
- hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt =
+ sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list;
+ hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt =
(u32)wr->num_sge;
for (i = 0; i < wr->num_sge; i++) {
sge_list[i].addr =
@@ -682,9 +708,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
}
}
- hr_qp->rq.wrid[ind] = wr->wr_id;
-
- ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1);
+ hr_qp->rq.wrid[wqe_idx] = wr->wr_id;
}
out:
@@ -887,8 +911,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type)
roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG,
upper_32_bits(dma));
roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG,
- (ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S) |
- HNS_ROCE_CMQ_ENABLE);
+ ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S);
roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0);
roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, 0);
} else {
@@ -896,8 +919,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type)
roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_H_REG,
upper_32_bits(dma));
roce_write(hr_dev, ROCEE_RX_CMQ_DEPTH_REG,
- (ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S) |
- HNS_ROCE_CMQ_ENABLE);
+ ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S);
roce_write(hr_dev, ROCEE_RX_CMQ_HEAD_REG, 0);
roce_write(hr_dev, ROCEE_RX_CMQ_TAIL_REG, 0);
}
@@ -1044,7 +1066,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
* If the command is sync, wait for the firmware to write back,
* if multi descriptors to be sent, use the first one to check
*/
- if ((desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) {
+ if (le16_to_cpu(desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) {
do {
if (hns_roce_cmq_csq_done(hr_dev))
break;
@@ -1061,7 +1083,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
desc_to_use = &csq->desc[ntc];
desc[handle] = *desc_to_use;
dev_dbg(hr_dev->dev, "Get cmq desc:\n");
- desc_ret = desc[handle].retval;
+ desc_ret = le16_to_cpu(desc[handle].retval);
if (desc_ret == CMD_EXEC_SUCCESS)
ret = 0;
else
@@ -1124,32 +1146,124 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev)
return ret;
resp = (struct hns_roce_query_version *)desc.data;
- hr_dev->hw_rev = le32_to_cpu(resp->rocee_hw_version);
+ hr_dev->hw_rev = le16_to_cpu(resp->rocee_hw_version);
hr_dev->vendor_id = hr_dev->pci_dev->vendor;
return 0;
}
+static bool hns_roce_func_clr_chk_rst(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ unsigned long reset_cnt;
+ bool sw_resetting;
+ bool hw_resetting;
+
+ reset_cnt = ops->ae_dev_reset_cnt(handle);
+ hw_resetting = ops->get_hw_reset_stat(handle);
+ sw_resetting = ops->ae_dev_resetting(handle);
+
+ if (reset_cnt != hr_dev->reset_cnt || hw_resetting || sw_resetting)
+ return true;
+
+ return false;
+}
+
+static void hns_roce_func_clr_rst_prc(struct hns_roce_dev *hr_dev, int retval,
+ int flag)
+{
+ struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ unsigned long instance_stage;
+ unsigned long reset_cnt;
+ unsigned long end;
+ bool sw_resetting;
+ bool hw_resetting;
+
+ instance_stage = handle->rinfo.instance_state;
+ reset_cnt = ops->ae_dev_reset_cnt(handle);
+ hw_resetting = ops->get_hw_reset_stat(handle);
+ sw_resetting = ops->ae_dev_resetting(handle);
+
+ if (reset_cnt != hr_dev->reset_cnt) {
+ hr_dev->dis_db = true;
+ hr_dev->is_reset = true;
+ dev_info(hr_dev->dev, "Func clear success after reset.\n");
+ } else if (hw_resetting) {
+ hr_dev->dis_db = true;
+
+ dev_warn(hr_dev->dev,
+ "Func clear is pending, device in resetting state.\n");
+ end = HNS_ROCE_V2_HW_RST_TIMEOUT;
+ while (end) {
+ if (!ops->get_hw_reset_stat(handle)) {
+ hr_dev->is_reset = true;
+ dev_info(hr_dev->dev,
+ "Func clear success after reset.\n");
+ return;
+ }
+ msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT);
+ end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT;
+ }
+
+ dev_warn(hr_dev->dev, "Func clear failed.\n");
+ } else if (sw_resetting && instance_stage == HNS_ROCE_STATE_INIT) {
+ hr_dev->dis_db = true;
+
+ dev_warn(hr_dev->dev,
+ "Func clear is pending, device in resetting state.\n");
+ end = HNS_ROCE_V2_HW_RST_TIMEOUT;
+ while (end) {
+ if (ops->ae_dev_reset_cnt(handle) !=
+ hr_dev->reset_cnt) {
+ hr_dev->is_reset = true;
+ dev_info(hr_dev->dev,
+ "Func clear success after sw reset\n");
+ return;
+ }
+ msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT);
+ end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT;
+ }
+
+ dev_warn(hr_dev->dev, "Func clear failed because of unfinished sw reset\n");
+ } else {
+ if (retval && !flag)
+ dev_warn(hr_dev->dev,
+ "Func clear read failed, ret = %d.\n", retval);
+
+ dev_warn(hr_dev->dev, "Func clear failed.\n");
+ }
+}
static void hns_roce_function_clear(struct hns_roce_dev *hr_dev)
{
+ bool fclr_write_fail_flag = false;
struct hns_roce_func_clear *resp;
struct hns_roce_cmq_desc desc;
unsigned long end;
- int ret;
+ int ret = 0;
+
+ if (hns_roce_func_clr_chk_rst(hr_dev))
+ goto out;
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR, false);
resp = (struct hns_roce_func_clear *)desc.data;
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret) {
+ fclr_write_fail_flag = true;
dev_err(hr_dev->dev, "Func clear write failed, ret = %d.\n",
ret);
- return;
+ goto out;
}
msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL);
end = HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS;
while (end) {
+ if (hns_roce_func_clr_chk_rst(hr_dev))
+ goto out;
msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT);
end -= HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT;
@@ -1166,7 +1280,8 @@ static void hns_roce_function_clear(struct hns_roce_dev *hr_dev)
}
}
- dev_err(hr_dev->dev, "Func clear fail.\n");
+out:
+ hns_roce_func_clr_rst_prc(hr_dev, ret, fclr_write_fail_flag);
}
static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev)
@@ -1289,8 +1404,7 @@ static int hns_roce_query_pf_timer_resource(struct hns_roce_dev *hr_dev)
return 0;
}
-static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
- int vf_id)
+static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, int vf_id)
{
struct hns_roce_cmq_desc desc;
struct hns_roce_vf_switch *swt;
@@ -1298,19 +1412,18 @@ static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
swt = (struct hns_roce_vf_switch *)desc.data;
hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true);
- swt->rocee_sel |= cpu_to_le16(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
- roce_set_field(swt->fun_id,
- VF_SWITCH_DATA_FUN_ID_VF_ID_M,
- VF_SWITCH_DATA_FUN_ID_VF_ID_S,
- vf_id);
+ swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
+ roce_set_field(swt->fun_id, VF_SWITCH_DATA_FUN_ID_VF_ID_M,
+ VF_SWITCH_DATA_FUN_ID_VF_ID_S, vf_id);
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret)
return ret;
+
desc.flag =
cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN);
desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1);
- roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 1);
+ roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 0);
roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1);
return hns_roce_cmq_send(hr_dev, &desc, 1);
@@ -1485,69 +1598,9 @@ static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev)
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
-static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
+static void set_default_caps(struct hns_roce_dev *hr_dev)
{
struct hns_roce_caps *caps = &hr_dev->caps;
- int ret;
-
- ret = hns_roce_cmq_query_hw_info(hr_dev);
- if (ret) {
- dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n",
- ret);
- return ret;
- }
-
- ret = hns_roce_query_fw_ver(hr_dev);
- if (ret) {
- dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n",
- ret);
- return ret;
- }
-
- ret = hns_roce_config_global_param(hr_dev);
- if (ret) {
- dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n",
- ret);
- return ret;
- }
-
- /* Get pf resource owned by every pf */
- ret = hns_roce_query_pf_resource(hr_dev);
- if (ret) {
- dev_err(hr_dev->dev, "Query pf resource fail, ret = %d.\n",
- ret);
- return ret;
- }
-
- if (hr_dev->pci_dev->revision == 0x21) {
- ret = hns_roce_query_pf_timer_resource(hr_dev);
- if (ret) {
- dev_err(hr_dev->dev,
- "Query pf timer resource fail, ret = %d.\n",
- ret);
- return ret;
- }
- }
-
- ret = hns_roce_alloc_vf_resource(hr_dev);
- if (ret) {
- dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n",
- ret);
- return ret;
- }
-
- if (hr_dev->pci_dev->revision == 0x21) {
- ret = hns_roce_set_vf_switch_param(hr_dev, 0);
- if (ret) {
- dev_err(hr_dev->dev,
- "Set function switch param fail, ret = %d.\n",
- ret);
- return ret;
- }
- }
-
- hr_dev->vendor_part_id = hr_dev->pci_dev->device;
- hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM;
caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM;
@@ -1555,17 +1608,15 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->num_srqs = HNS_ROCE_V2_MAX_SRQ_NUM;
caps->min_cqes = HNS_ROCE_MIN_CQE_NUM;
caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM;
- caps->max_srqwqes = HNS_ROCE_V2_MAX_SRQWQE_NUM;
caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM;
caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM;
caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM;
caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
- caps->max_srq_sg = HNS_ROCE_V2_MAX_SRQ_SGE_NUM;
caps->num_uars = HNS_ROCE_V2_UAR_NUM;
caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM;
caps->num_aeq_vectors = HNS_ROCE_V2_AEQE_VEC_NUM;
caps->num_comp_vectors = HNS_ROCE_V2_COMP_VEC_NUM;
- caps->num_other_vectors = HNS_ROCE_V2_ABNORMAL_VEC_NUM;
+ caps->num_other_vectors = HNS_ROCE_V2_ABNORMAL_VEC_NUM;
caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM;
caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
@@ -1579,12 +1630,12 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ;
caps->qpc_entry_sz = HNS_ROCE_V2_QPC_ENTRY_SZ;
caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ;
- caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ;
+ caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ;
caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ;
caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ;
caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ;
caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
- caps->idx_entry_sz = 4;
+ caps->idx_entry_sz = HNS_ROCE_V2_IDX_ENTRY_SZ;
caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE;
caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
caps->reserved_lkey = 0;
@@ -1607,16 +1658,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->mpt_ba_pg_sz = 0;
caps->mpt_buf_pg_sz = 0;
caps->mpt_hop_num = HNS_ROCE_CONTEXT_HOP_NUM;
- caps->pbl_ba_pg_sz = 2;
- caps->pbl_buf_pg_sz = 0;
- caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM;
caps->mtt_ba_pg_sz = 0;
caps->mtt_buf_pg_sz = 0;
caps->mtt_hop_num = HNS_ROCE_MTT_HOP_NUM;
- caps->wqe_sq_hop_num = 2;
- caps->wqe_sge_hop_num = 1;
- caps->wqe_rq_hop_num = 2;
- caps->cqe_ba_pg_sz = 6;
+ caps->wqe_sq_hop_num = HNS_ROCE_SQWQE_HOP_NUM;
+ caps->wqe_sge_hop_num = HNS_ROCE_EXT_SGE_HOP_NUM;
+ caps->wqe_rq_hop_num = HNS_ROCE_RQWQE_HOP_NUM;
+ caps->cqe_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_256K;
caps->cqe_buf_pg_sz = 0;
caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM;
caps->srqwqe_ba_pg_sz = 0;
@@ -1625,10 +1673,6 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->idx_ba_pg_sz = 0;
caps->idx_buf_pg_sz = 0;
caps->idx_hop_num = HNS_ROCE_IDX_HOP_NUM;
- caps->eqe_ba_pg_sz = 0;
- caps->eqe_buf_pg_sz = 0;
- caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM;
- caps->tsq_buf_pg_sz = 0;
caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE;
caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR |
@@ -1637,24 +1681,19 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
HNS_ROCE_CAP_FLAG_RECORD_DB |
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
- if (hr_dev->pci_dev->revision == 0x21)
- caps->flags |= HNS_ROCE_CAP_FLAG_MW |
- HNS_ROCE_CAP_FLAG_FRMR;
-
caps->pkey_table_len[0] = 1;
- caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
+ caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM;
caps->aeqe_depth = HNS_ROCE_V2_ASYNC_EQE_NUM;
caps->local_ca_ack_delay = 0;
caps->max_mtu = IB_MTU_4096;
- caps->max_srqs = HNS_ROCE_V2_MAX_SRQ;
caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR;
caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE;
- if (hr_dev->pci_dev->revision == 0x21) {
- caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC |
- HNS_ROCE_CAP_FLAG_SRQ |
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_B) {
+ caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW |
+ HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR |
HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
@@ -1668,12 +1707,345 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->cqc_timer_buf_pg_sz = 0;
caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
- caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ;
- caps->sccc_ba_pg_sz = 0;
- caps->sccc_buf_pg_sz = 0;
- caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM;
+ caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ;
+ caps->sccc_ba_pg_sz = 0;
+ caps->sccc_buf_pg_sz = 0;
+ caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM;
+ }
+}
+
+static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num,
+ int *buf_page_size, int *bt_page_size, u32 hem_type)
+{
+ u64 obj_per_chunk;
+ int bt_chunk_size = 1 << PAGE_SHIFT;
+ int buf_chunk_size = 1 << PAGE_SHIFT;
+ int obj_per_chunk_default = buf_chunk_size / obj_size;
+
+ *buf_page_size = 0;
+ *bt_page_size = 0;
+
+ switch (hop_num) {
+ case 3:
+ obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) *
+ (bt_chunk_size / BA_BYTE_LEN) *
+ (bt_chunk_size / BA_BYTE_LEN) *
+ obj_per_chunk_default;
+ break;
+ case 2:
+ obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) *
+ (bt_chunk_size / BA_BYTE_LEN) *
+ obj_per_chunk_default;
+ break;
+ case 1:
+ obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) *
+ obj_per_chunk_default;
+ break;
+ case HNS_ROCE_HOP_NUM_0:
+ obj_per_chunk = ctx_bt_num * obj_per_chunk_default;
+ break;
+ default:
+ pr_err("Table %d not support hop_num = %d!\n", hem_type,
+ hop_num);
+ return;
+ }
+
+ if (hem_type >= HEM_TYPE_MTT)
+ *bt_page_size = ilog2(DIV_ROUND_UP(obj_num, obj_per_chunk));
+ else
+ *buf_page_size = ilog2(DIV_ROUND_UP(obj_num, obj_per_chunk));
+}
+
+static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc[HNS_ROCE_QUERY_PF_CAPS_CMD_NUM];
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ struct hns_roce_query_pf_caps_a *resp_a;
+ struct hns_roce_query_pf_caps_b *resp_b;
+ struct hns_roce_query_pf_caps_c *resp_c;
+ struct hns_roce_query_pf_caps_d *resp_d;
+ struct hns_roce_query_pf_caps_e *resp_e;
+ int ctx_hop_num;
+ int pbl_hop_num;
+ int ret;
+ int i;
+
+ for (i = 0; i < HNS_ROCE_QUERY_PF_CAPS_CMD_NUM; i++) {
+ hns_roce_cmq_setup_basic_desc(&desc[i],
+ HNS_ROCE_OPC_QUERY_PF_CAPS_NUM,
+ true);
+ if (i < (HNS_ROCE_QUERY_PF_CAPS_CMD_NUM - 1))
+ desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ else
+ desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ }
+
+ ret = hns_roce_cmq_send(hr_dev, desc, HNS_ROCE_QUERY_PF_CAPS_CMD_NUM);
+ if (ret)
+ return ret;
+
+ resp_a = (struct hns_roce_query_pf_caps_a *)desc[0].data;
+ resp_b = (struct hns_roce_query_pf_caps_b *)desc[1].data;
+ resp_c = (struct hns_roce_query_pf_caps_c *)desc[2].data;
+ resp_d = (struct hns_roce_query_pf_caps_d *)desc[3].data;
+ resp_e = (struct hns_roce_query_pf_caps_e *)desc[4].data;
+
+ caps->local_ca_ack_delay = resp_a->local_ca_ack_delay;
+ caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg);
+ caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline);
+ caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg);
+ caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg);
+ caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer);
+ caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer);
+ caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges);
+ caps->num_aeq_vectors = resp_a->num_aeq_vectors;
+ caps->num_other_vectors = resp_a->num_other_vectors;
+ caps->max_sq_desc_sz = resp_a->max_sq_desc_sz;
+ caps->max_rq_desc_sz = resp_a->max_rq_desc_sz;
+ caps->max_srq_desc_sz = resp_a->max_srq_desc_sz;
+ caps->cq_entry_sz = resp_a->cq_entry_sz;
+
+ caps->mtpt_entry_sz = resp_b->mtpt_entry_sz;
+ caps->irrl_entry_sz = resp_b->irrl_entry_sz;
+ caps->trrl_entry_sz = resp_b->trrl_entry_sz;
+ caps->cqc_entry_sz = resp_b->cqc_entry_sz;
+ caps->srqc_entry_sz = resp_b->srqc_entry_sz;
+ caps->idx_entry_sz = resp_b->idx_entry_sz;
+ caps->sccc_entry_sz = resp_b->scc_ctx_entry_sz;
+ caps->max_mtu = resp_b->max_mtu;
+ caps->qpc_entry_sz = le16_to_cpu(resp_b->qpc_entry_sz);
+ caps->min_cqes = resp_b->min_cqes;
+ caps->min_wqes = resp_b->min_wqes;
+ caps->page_size_cap = le32_to_cpu(resp_b->page_size_cap);
+ caps->pkey_table_len[0] = resp_b->pkey_table_len;
+ caps->phy_num_uars = resp_b->phy_num_uars;
+ ctx_hop_num = resp_b->ctx_hop_num;
+ pbl_hop_num = resp_b->pbl_hop_num;
+
+ caps->num_pds = 1 << roce_get_field(resp_c->cap_flags_num_pds,
+ V2_QUERY_PF_CAPS_C_NUM_PDS_M,
+ V2_QUERY_PF_CAPS_C_NUM_PDS_S);
+ caps->flags = roce_get_field(resp_c->cap_flags_num_pds,
+ V2_QUERY_PF_CAPS_C_CAP_FLAGS_M,
+ V2_QUERY_PF_CAPS_C_CAP_FLAGS_S);
+ caps->num_cqs = 1 << roce_get_field(resp_c->max_gid_num_cqs,
+ V2_QUERY_PF_CAPS_C_NUM_CQS_M,
+ V2_QUERY_PF_CAPS_C_NUM_CQS_S);
+ caps->gid_table_len[0] = roce_get_field(resp_c->max_gid_num_cqs,
+ V2_QUERY_PF_CAPS_C_MAX_GID_M,
+ V2_QUERY_PF_CAPS_C_MAX_GID_S);
+ caps->max_cqes = 1 << roce_get_field(resp_c->cq_depth,
+ V2_QUERY_PF_CAPS_C_CQ_DEPTH_M,
+ V2_QUERY_PF_CAPS_C_CQ_DEPTH_S);
+ caps->num_mtpts = 1 << roce_get_field(resp_c->num_mrws,
+ V2_QUERY_PF_CAPS_C_NUM_MRWS_M,
+ V2_QUERY_PF_CAPS_C_NUM_MRWS_S);
+ caps->num_qps = 1 << roce_get_field(resp_c->ord_num_qps,
+ V2_QUERY_PF_CAPS_C_NUM_QPS_M,
+ V2_QUERY_PF_CAPS_C_NUM_QPS_S);
+ caps->max_qp_init_rdma = roce_get_field(resp_c->ord_num_qps,
+ V2_QUERY_PF_CAPS_C_MAX_ORD_M,
+ V2_QUERY_PF_CAPS_C_MAX_ORD_S);
+ caps->max_qp_dest_rdma = caps->max_qp_init_rdma;
+ caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth);
+ caps->num_srqs = 1 << roce_get_field(resp_d->wq_hop_num_max_srqs,
+ V2_QUERY_PF_CAPS_D_NUM_SRQS_M,
+ V2_QUERY_PF_CAPS_D_NUM_SRQS_S);
+ caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth);
+ caps->ceqe_depth = 1 << roce_get_field(resp_d->num_ceqs_ceq_depth,
+ V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M,
+ V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S);
+ caps->num_comp_vectors = roce_get_field(resp_d->num_ceqs_ceq_depth,
+ V2_QUERY_PF_CAPS_D_NUM_CEQS_M,
+ V2_QUERY_PF_CAPS_D_NUM_CEQS_S);
+ caps->aeqe_depth = 1 << roce_get_field(resp_d->arm_st_aeq_depth,
+ V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M,
+ V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S);
+ caps->default_aeq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth,
+ V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M,
+ V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S);
+ caps->default_ceq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth,
+ V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M,
+ V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S);
+ caps->reserved_pds = roce_get_field(resp_d->num_uars_rsv_pds,
+ V2_QUERY_PF_CAPS_D_RSV_PDS_M,
+ V2_QUERY_PF_CAPS_D_RSV_PDS_S);
+ caps->num_uars = 1 << roce_get_field(resp_d->num_uars_rsv_pds,
+ V2_QUERY_PF_CAPS_D_NUM_UARS_M,
+ V2_QUERY_PF_CAPS_D_NUM_UARS_S);
+ caps->reserved_qps = roce_get_field(resp_d->rsv_uars_rsv_qps,
+ V2_QUERY_PF_CAPS_D_RSV_QPS_M,
+ V2_QUERY_PF_CAPS_D_RSV_QPS_S);
+ caps->reserved_uars = roce_get_field(resp_d->rsv_uars_rsv_qps,
+ V2_QUERY_PF_CAPS_D_RSV_UARS_M,
+ V2_QUERY_PF_CAPS_D_RSV_UARS_S);
+ caps->reserved_mrws = roce_get_field(resp_e->chunk_size_shift_rsv_mrws,
+ V2_QUERY_PF_CAPS_E_RSV_MRWS_M,
+ V2_QUERY_PF_CAPS_E_RSV_MRWS_S);
+ caps->chunk_sz = 1 << roce_get_field(resp_e->chunk_size_shift_rsv_mrws,
+ V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M,
+ V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S);
+ caps->reserved_cqs = roce_get_field(resp_e->rsv_cqs,
+ V2_QUERY_PF_CAPS_E_RSV_CQS_M,
+ V2_QUERY_PF_CAPS_E_RSV_CQS_S);
+ caps->reserved_srqs = roce_get_field(resp_e->rsv_srqs,
+ V2_QUERY_PF_CAPS_E_RSV_SRQS_M,
+ V2_QUERY_PF_CAPS_E_RSV_SRQS_S);
+ caps->reserved_lkey = roce_get_field(resp_e->rsv_lkey,
+ V2_QUERY_PF_CAPS_E_RSV_LKEYS_M,
+ V2_QUERY_PF_CAPS_E_RSV_LKEYS_S);
+ caps->default_ceq_max_cnt = le16_to_cpu(resp_e->ceq_max_cnt);
+ caps->default_ceq_period = le16_to_cpu(resp_e->ceq_period);
+ caps->default_aeq_max_cnt = le16_to_cpu(resp_e->aeq_max_cnt);
+ caps->default_aeq_period = le16_to_cpu(resp_e->aeq_period);
+
+ caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ;
+ caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ;
+ caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
+ caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
+ caps->mtt_ba_pg_sz = 0;
+ caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
+ caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
+ caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS;
+
+ caps->qpc_hop_num = ctx_hop_num;
+ caps->srqc_hop_num = ctx_hop_num;
+ caps->cqc_hop_num = ctx_hop_num;
+ caps->mpt_hop_num = ctx_hop_num;
+ caps->mtt_hop_num = pbl_hop_num;
+ caps->cqe_hop_num = pbl_hop_num;
+ caps->srqwqe_hop_num = pbl_hop_num;
+ caps->idx_hop_num = pbl_hop_num;
+ caps->wqe_sq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs,
+ V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M,
+ V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S);
+ caps->wqe_sge_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs,
+ V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M,
+ V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S);
+ caps->wqe_rq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs,
+ V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M,
+ V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S);
+
+ calc_pg_sz(caps->num_qps, caps->qpc_entry_sz, caps->qpc_hop_num,
+ caps->qpc_bt_num, &caps->qpc_buf_pg_sz, &caps->qpc_ba_pg_sz,
+ HEM_TYPE_QPC);
+ calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num,
+ caps->mpt_bt_num, &caps->mpt_buf_pg_sz, &caps->mpt_ba_pg_sz,
+ HEM_TYPE_MTPT);
+ calc_pg_sz(caps->num_cqs, caps->cqc_entry_sz, caps->cqc_hop_num,
+ caps->cqc_bt_num, &caps->cqc_buf_pg_sz, &caps->cqc_ba_pg_sz,
+ HEM_TYPE_CQC);
+ calc_pg_sz(caps->num_srqs, caps->srqc_entry_sz, caps->srqc_hop_num,
+ caps->srqc_bt_num, &caps->srqc_buf_pg_sz,
+ &caps->srqc_ba_pg_sz, HEM_TYPE_SRQC);
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_B) {
+ caps->sccc_hop_num = ctx_hop_num;
+ caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+ caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+
+ calc_pg_sz(caps->num_qps, caps->sccc_entry_sz,
+ caps->sccc_hop_num, caps->sccc_bt_num,
+ &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz,
+ HEM_TYPE_SCCC);
+ calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz,
+ caps->cqc_timer_hop_num, caps->cqc_timer_bt_num,
+ &caps->cqc_timer_buf_pg_sz,
+ &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER);
+ }
+
+ calc_pg_sz(caps->num_cqe_segs, caps->mtt_entry_sz, caps->cqe_hop_num,
+ 1, &caps->cqe_buf_pg_sz, &caps->cqe_ba_pg_sz, HEM_TYPE_CQE);
+ calc_pg_sz(caps->num_srqwqe_segs, caps->mtt_entry_sz,
+ caps->srqwqe_hop_num, 1, &caps->srqwqe_buf_pg_sz,
+ &caps->srqwqe_ba_pg_sz, HEM_TYPE_SRQWQE);
+ calc_pg_sz(caps->num_idx_segs, caps->idx_entry_sz, caps->idx_hop_num,
+ 1, &caps->idx_buf_pg_sz, &caps->idx_ba_pg_sz, HEM_TYPE_IDX);
+
+ return 0;
+}
+
+static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ int ret;
+
+ ret = hns_roce_cmq_query_hw_info(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ ret = hns_roce_query_fw_ver(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ ret = hns_roce_config_global_param(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ /* Get pf resource owned by every pf */
+ ret = hns_roce_query_pf_resource(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "Query pf resource fail, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ if (hr_dev->pci_dev->revision == 0x21) {
+ ret = hns_roce_query_pf_timer_resource(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "Query pf timer resource fail, ret = %d.\n",
+ ret);
+ return ret;
+ }
}
+ ret = hns_roce_alloc_vf_resource(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ if (hr_dev->pci_dev->revision == 0x21) {
+ ret = hns_roce_set_vf_switch_param(hr_dev, 0);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "Set function switch param fail, ret = %d.\n",
+ ret);
+ return ret;
+ }
+ }
+
+ hr_dev->vendor_part_id = hr_dev->pci_dev->device;
+ hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
+
+ caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
+ caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
+ caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
+ caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS;
+
+ caps->pbl_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_16K;
+ caps->pbl_buf_pg_sz = 0;
+ caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM;
+ caps->eqe_ba_pg_sz = 0;
+ caps->eqe_buf_pg_sz = 0;
+ caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM;
+ caps->tsq_buf_pg_sz = 0;
+
+ ret = hns_roce_query_pf_caps(hr_dev);
+ if (ret)
+ set_default_caps(hr_dev);
+
ret = hns_roce_v2_set_bt(hr_dev);
if (ret)
dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
@@ -1724,39 +2096,37 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev,
desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
if (i == 0) {
- req_a->base_addr_l = link_tbl->table.map & 0xffffffff;
- req_a->base_addr_h = (link_tbl->table.map >> 32) &
- 0xffffffff;
+ req_a->base_addr_l =
+ cpu_to_le32(link_tbl->table.map & 0xffffffff);
+ req_a->base_addr_h =
+ cpu_to_le32(link_tbl->table.map >> 32);
roce_set_field(req_a->depth_pgsz_init_en,
- CFG_LLM_QUE_DEPTH_M,
- CFG_LLM_QUE_DEPTH_S,
+ CFG_LLM_QUE_DEPTH_M, CFG_LLM_QUE_DEPTH_S,
link_tbl->npages);
roce_set_field(req_a->depth_pgsz_init_en,
- CFG_LLM_QUE_PGSZ_M,
- CFG_LLM_QUE_PGSZ_S,
+ CFG_LLM_QUE_PGSZ_M, CFG_LLM_QUE_PGSZ_S,
link_tbl->pg_sz);
- req_a->head_ba_l = entry[0].blk_ba0;
- req_a->head_ba_h_nxtptr = entry[0].blk_ba1_nxt_ptr;
- roce_set_field(req_a->head_ptr,
- CFG_LLM_HEAD_PTR_M,
+ req_a->head_ba_l = cpu_to_le32(entry[0].blk_ba0);
+ req_a->head_ba_h_nxtptr =
+ cpu_to_le32(entry[0].blk_ba1_nxt_ptr);
+ roce_set_field(req_a->head_ptr, CFG_LLM_HEAD_PTR_M,
CFG_LLM_HEAD_PTR_S, 0);
} else {
- req_b->tail_ba_l = entry[page_num - 1].blk_ba0;
- roce_set_field(req_b->tail_ba_h,
- CFG_LLM_TAIL_BA_H_M,
+ req_b->tail_ba_l =
+ cpu_to_le32(entry[page_num - 1].blk_ba0);
+ roce_set_field(req_b->tail_ba_h, CFG_LLM_TAIL_BA_H_M,
CFG_LLM_TAIL_BA_H_S,
entry[page_num - 1].blk_ba1_nxt_ptr &
- HNS_ROCE_LINK_TABLE_BA1_M);
- roce_set_field(req_b->tail_ptr,
- CFG_LLM_TAIL_PTR_M,
+ HNS_ROCE_LINK_TABLE_BA1_M);
+ roce_set_field(req_b->tail_ptr, CFG_LLM_TAIL_PTR_M,
CFG_LLM_TAIL_PTR_S,
(entry[page_num - 2].blk_ba1_nxt_ptr &
- HNS_ROCE_LINK_TABLE_NXT_PTR_M) >>
- HNS_ROCE_LINK_TABLE_NXT_PTR_S);
+ HNS_ROCE_LINK_TABLE_NXT_PTR_M) >>
+ HNS_ROCE_LINK_TABLE_NXT_PTR_S);
}
}
- roce_set_field(req_a->depth_pgsz_init_en,
- CFG_LLM_INIT_EN_M, CFG_LLM_INIT_EN_S, 1);
+ roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_INIT_EN_M,
+ CFG_LLM_INIT_EN_S, 1);
return hns_roce_cmq_send(hr_dev, desc, 2);
}
@@ -1817,17 +2187,13 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev,
link_tbl->pg_list[i].map = t;
- entry[i].blk_ba0 = (t >> 12) & 0xffffffff;
- roce_set_field(entry[i].blk_ba1_nxt_ptr,
- HNS_ROCE_LINK_TABLE_BA1_M,
- HNS_ROCE_LINK_TABLE_BA1_S,
- t >> 44);
+ entry[i].blk_ba0 = (u32)(t >> 12);
+ entry[i].blk_ba1_nxt_ptr = (u32)(t >> 44);
if (i < (pg_num - 1))
- roce_set_field(entry[i].blk_ba1_nxt_ptr,
- HNS_ROCE_LINK_TABLE_NXT_PTR_M,
- HNS_ROCE_LINK_TABLE_NXT_PTR_S,
- i + 1);
+ entry[i].blk_ba1_nxt_ptr |=
+ (i + 1) << HNS_ROCE_LINK_TABLE_NXT_PTR_S;
+
}
link_tbl->npages = pg_num;
link_tbl->pg_sz = buf_chk_sz;
@@ -1888,7 +2254,7 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
goto err_tpq_init_failed;
}
- /* Alloc memory for QPC Timer buffer space chunk*/
+ /* Alloc memory for QPC Timer buffer space chunk */
for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num;
qpc_count++) {
ret = hns_roce_table_get(hr_dev, &hr_dev->qpc_timer_table,
@@ -1899,7 +2265,7 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
}
}
- /* Alloc memory for CQC Timer buffer space chunk*/
+ /* Alloc memory for CQC Timer buffer space chunk */
for (cqc_count = 0; cqc_count < hr_dev->caps.cqc_timer_bt_num;
cqc_count++) {
ret = hns_roce_table_get(hr_dev, &hr_dev->cqc_timer_table,
@@ -1952,7 +2318,7 @@ static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev)
if (status)
return status;
- return cpu_to_le32(mb_st->mb_status_hw_run);
+ return le32_to_cpu(mb_st->mb_status_hw_run);
}
static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev)
@@ -1978,10 +2344,10 @@ static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, u64 in_param,
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false);
- mb->in_param_l = cpu_to_le64(in_param);
- mb->in_param_h = cpu_to_le64(in_param) >> 32;
- mb->out_param_l = cpu_to_le64(out_param);
- mb->out_param_h = cpu_to_le64(out_param) >> 32;
+ mb->in_param_l = cpu_to_le32(in_param);
+ mb->in_param_h = cpu_to_le32(in_param >> 32);
+ mb->out_param_l = cpu_to_le32(out_param);
+ mb->out_param_h = cpu_to_le32(out_param >> 32);
mb->cmd_tag = cpu_to_le32(in_modifier << 8 | op);
mb->token_event_en = cpu_to_le32(event << 16 | token);
@@ -2053,11 +2419,9 @@ static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev,
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false);
- roce_set_field(sgid_tb->table_idx_rsv,
- CFG_SGID_TB_TABLE_IDX_M,
+ roce_set_field(sgid_tb->table_idx_rsv, CFG_SGID_TB_TABLE_IDX_M,
CFG_SGID_TB_TABLE_IDX_S, gid_index);
- roce_set_field(sgid_tb->vf_sgid_type_rsv,
- CFG_SGID_TB_VF_SGID_TYPE_M,
+ roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M,
CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type);
p = (u32 *)&gid->raw[0];
@@ -2123,7 +2487,7 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
roce_set_field(smac_tb->vf_smac_h_rsv,
CFG_SMAC_TB_VF_SMAC_H_M,
CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h);
- smac_tb->vf_smac_l = reg_smac_l;
+ smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l);
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
@@ -2328,11 +2692,10 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mw->pdn);
- roce_set_field(mpt_entry->byte_4_pd_hop_st,
- V2_MPT_BYTE_4_PBL_HOP_NUM_M,
+ roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
V2_MPT_BYTE_4_PBL_HOP_NUM_S,
- mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ?
- 0 : mw->pbl_hop_num);
+ mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
+ mw->pbl_hop_num);
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
@@ -2359,8 +2722,7 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
{
- return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
- n * HNS_ROCE_V2_CQE_ENTRY_SIZE);
+ return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V2_CQE_ENTRY_SIZE);
}
static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n)
@@ -2369,7 +2731,7 @@ static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n)
/* Get cqe when Owner bit is Conversely with the MSB of cons_idx */
return (roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_OWNER_S) ^
- !!(n & (hr_cq->ib_cq.cqe + 1))) ? cqe : NULL;
+ !!(n & hr_cq->cq_depth)) ? cqe : NULL;
}
static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq)
@@ -2409,7 +2771,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index);
++prod_index) {
- if (prod_index == hr_cq->cons_index + hr_cq->ib_cq.cqe)
+ if (prod_index > hr_cq->cons_index + hr_cq->ib_cq.cqe)
break;
}
@@ -2462,8 +2824,7 @@ static void hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
struct hns_roce_cq *hr_cq, void *mb_buf,
- u64 *mtts, dma_addr_t dma_handle, int nent,
- u32 vector)
+ u64 *mtts, dma_addr_t dma_handle)
{
struct hns_roce_v2_cq_context *cq_context;
@@ -2475,32 +2836,29 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_ARM_ST_M,
V2_CQC_BYTE_4_ARM_ST_S, REG_NXT_CEQE);
roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_SHIFT_M,
- V2_CQC_BYTE_4_SHIFT_S, ilog2((unsigned int)nent));
+ V2_CQC_BYTE_4_SHIFT_S, ilog2(hr_cq->cq_depth));
roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M,
- V2_CQC_BYTE_4_CEQN_S, vector);
- cq_context->byte_4_pg_ceqn = cpu_to_le32(cq_context->byte_4_pg_ceqn);
+ V2_CQC_BYTE_4_CEQN_S, hr_cq->vector);
roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M,
V2_CQC_BYTE_8_CQN_S, hr_cq->cqn);
- cq_context->cqe_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
- cq_context->cqe_cur_blk_addr =
- cpu_to_le32(cq_context->cqe_cur_blk_addr);
+ cq_context->cqe_cur_blk_addr = cpu_to_le32(mtts[0] >> PAGE_ADDR_SHIFT);
roce_set_field(cq_context->byte_16_hop_addr,
V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M,
V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S,
- cpu_to_le32((mtts[0]) >> (32 + PAGE_ADDR_SHIFT)));
+ mtts[0] >> (32 + PAGE_ADDR_SHIFT));
roce_set_field(cq_context->byte_16_hop_addr,
V2_CQC_BYTE_16_CQE_HOP_NUM_M,
V2_CQC_BYTE_16_CQE_HOP_NUM_S, hr_dev->caps.cqe_hop_num ==
HNS_ROCE_HOP_NUM_0 ? 0 : hr_dev->caps.cqe_hop_num);
- cq_context->cqe_nxt_blk_addr = (u32)(mtts[1] >> PAGE_ADDR_SHIFT);
+ cq_context->cqe_nxt_blk_addr = cpu_to_le32(mtts[1] >> PAGE_ADDR_SHIFT);
roce_set_field(cq_context->byte_24_pgsz_addr,
V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_M,
V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_S,
- cpu_to_le32((mtts[1]) >> (32 + PAGE_ADDR_SHIFT)));
+ mtts[1] >> (32 + PAGE_ADDR_SHIFT));
roce_set_field(cq_context->byte_24_pgsz_addr,
V2_CQC_BYTE_24_CQE_BA_PG_SZ_M,
V2_CQC_BYTE_24_CQE_BA_PG_SZ_S,
@@ -2510,7 +2868,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S,
hr_dev->caps.cqe_buf_pg_sz + PG_SHIFT_OFFSET);
- cq_context->cqe_ba = (u32)(dma_handle >> 3);
+ cq_context->cqe_ba = cpu_to_le32(dma_handle >> 3);
roce_set_field(cq_context->byte_40_cqe_ba, V2_CQC_BYTE_40_CQE_BA_M,
V2_CQC_BYTE_40_CQE_BA_S, (dma_handle >> (32 + 3)));
@@ -2523,7 +2881,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
V2_CQC_BYTE_44_DB_RECORD_ADDR_M,
V2_CQC_BYTE_44_DB_RECORD_ADDR_S,
((u32)hr_cq->db.dma) >> 1);
- cq_context->db_record_addr = hr_cq->db.dma >> 32;
+ cq_context->db_record_addr = cpu_to_le32(hr_cq->db.dma >> 32);
roce_set_field(cq_context->byte_56_cqe_period_maxcnt,
V2_CQC_BYTE_56_CQ_MAX_CNT_M,
@@ -2541,7 +2899,7 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
u32 notification_flag;
- u32 doorbell[2];
+ __le32 doorbell[2];
doorbell[0] = 0;
doorbell[1] = 0;
@@ -2603,6 +2961,55 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
return 0;
}
+static int sw_comp(struct hns_roce_qp *hr_qp, struct hns_roce_wq *wq,
+ int num_entries, struct ib_wc *wc)
+{
+ unsigned int left;
+ int npolled = 0;
+
+ left = wq->head - wq->tail;
+ if (left == 0)
+ return 0;
+
+ left = min_t(unsigned int, (unsigned int)num_entries, left);
+ while (npolled < left) {
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->vendor_err = 0;
+ wc->qp = &hr_qp->ibqp;
+
+ wq->tail++;
+ wc++;
+ npolled++;
+ }
+
+ return npolled;
+}
+
+static int hns_roce_v2_sw_poll_cq(struct hns_roce_cq *hr_cq, int num_entries,
+ struct ib_wc *wc)
+{
+ struct hns_roce_qp *hr_qp;
+ int npolled = 0;
+
+ list_for_each_entry(hr_qp, &hr_cq->sq_list, sq_node) {
+ npolled += sw_comp(hr_qp, &hr_qp->sq,
+ num_entries - npolled, wc + npolled);
+ if (npolled >= num_entries)
+ goto out;
+ }
+
+ list_for_each_entry(hr_qp, &hr_cq->rq_list, rq_node) {
+ npolled += sw_comp(hr_qp, &hr_qp->rq,
+ num_entries - npolled, wc + npolled);
+ if (npolled >= num_entries)
+ goto out;
+ }
+
+out:
+ return npolled;
+}
+
static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
struct hns_roce_qp **cur_qp, struct ib_wc *wc)
{
@@ -2668,9 +3075,9 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
++wq->tail;
} else if ((*cur_qp)->ibqp.srq) {
srq = to_hr_srq((*cur_qp)->ibqp.srq);
- wqe_ctr = le16_to_cpu(roce_get_field(cqe->byte_4,
- V2_CQE_BYTE_4_WQE_INDX_M,
- V2_CQE_BYTE_4_WQE_INDX_S));
+ wqe_ctr = (u16)roce_get_field(cqe->byte_4,
+ V2_CQE_BYTE_4_WQE_INDX_M,
+ V2_CQE_BYTE_4_WQE_INDX_S);
wc->wr_id = srq->wrid[wqe_ctr];
hns_roce_free_srq_wqe(srq, wqe_ctr);
} else {
@@ -2862,15 +3269,16 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->smac[5] = roce_get_field(cqe->byte_28,
V2_CQE_BYTE_28_SMAC_5_M,
V2_CQE_BYTE_28_SMAC_5_S);
+ wc->wc_flags |= IB_WC_WITH_SMAC;
if (roce_get_bit(cqe->byte_28, V2_CQE_BYTE_28_VID_VLD_S)) {
wc->vlan_id = (u16)roce_get_field(cqe->byte_28,
V2_CQE_BYTE_28_VID_M,
V2_CQE_BYTE_28_VID_S);
+ wc->wc_flags |= IB_WC_WITH_VLAN;
} else {
wc->vlan_id = 0xffff;
}
- wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
wc->network_hdr_type = roce_get_field(cqe->byte_28,
V2_CQE_BYTE_28_PORT_TYPE_M,
V2_CQE_BYTE_28_PORT_TYPE_S);
@@ -2882,6 +3290,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
struct ib_wc *wc)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
struct hns_roce_qp *cur_qp = NULL;
unsigned long flags;
@@ -2889,6 +3298,18 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
spin_lock_irqsave(&hr_cq->lock, flags);
+ /*
+ * When the device starts to reset, the state is RST_DOWN. At this time,
+ * there may still be some valid CQEs in the hardware that are not
+ * polled. Therefore, it is not allowed to switch to the software mode
+ * immediately. When the state changes to UNINIT, CQE no longer exists
+ * in the hardware, and then switch to software mode.
+ */
+ if (hr_dev->state == HNS_ROCE_DEVICE_STATE_UNINIT) {
+ npolled = hns_roce_v2_sw_poll_cq(hr_cq, num_entries, wc);
+ goto out;
+ }
+
for (npolled = 0; npolled < num_entries; ++npolled) {
if (hns_roce_v2_poll_one(hr_cq, &cur_qp, wc + npolled))
break;
@@ -2900,16 +3321,55 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index);
}
+out:
spin_unlock_irqrestore(&hr_cq->lock, flags);
return npolled;
}
+static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type,
+ int step_idx)
+{
+ int op;
+
+ if (type == HEM_TYPE_SCCC && step_idx)
+ return -EINVAL;
+
+ switch (type) {
+ case HEM_TYPE_QPC:
+ op = HNS_ROCE_CMD_WRITE_QPC_BT0;
+ break;
+ case HEM_TYPE_MTPT:
+ op = HNS_ROCE_CMD_WRITE_MPT_BT0;
+ break;
+ case HEM_TYPE_CQC:
+ op = HNS_ROCE_CMD_WRITE_CQC_BT0;
+ break;
+ case HEM_TYPE_SRQC:
+ op = HNS_ROCE_CMD_WRITE_SRQC_BT0;
+ break;
+ case HEM_TYPE_SCCC:
+ op = HNS_ROCE_CMD_WRITE_SCCC_BT0;
+ break;
+ case HEM_TYPE_QPC_TIMER:
+ op = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0;
+ break;
+ case HEM_TYPE_CQC_TIMER:
+ op = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0;
+ break;
+ default:
+ dev_warn(hr_dev->dev,
+ "Table %d not to be written by mailbox!\n", type);
+ return -EINVAL;
+ }
+
+ return op + step_idx;
+}
+
static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, int obj,
int step_idx)
{
- struct device *dev = hr_dev->dev;
struct hns_roce_cmd_mailbox *mailbox;
struct hns_roce_hem_iter iter;
struct hns_roce_hem_mhop mhop;
@@ -2922,7 +3382,7 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
u64 bt_ba = 0;
u32 chunk_ba_num;
u32 hop_num;
- u16 op = 0xff;
+ int op;
if (!hns_roce_check_whether_mhop(hr_dev, table->type))
return 0;
@@ -2944,39 +3404,10 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
hem_idx = i;
}
- switch (table->type) {
- case HEM_TYPE_QPC:
- op = HNS_ROCE_CMD_WRITE_QPC_BT0;
- break;
- case HEM_TYPE_MTPT:
- op = HNS_ROCE_CMD_WRITE_MPT_BT0;
- break;
- case HEM_TYPE_CQC:
- op = HNS_ROCE_CMD_WRITE_CQC_BT0;
- break;
- case HEM_TYPE_SRQC:
- op = HNS_ROCE_CMD_WRITE_SRQC_BT0;
- break;
- case HEM_TYPE_SCCC:
- op = HNS_ROCE_CMD_WRITE_SCCC_BT0;
- break;
- case HEM_TYPE_QPC_TIMER:
- op = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0;
- break;
- case HEM_TYPE_CQC_TIMER:
- op = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0;
- break;
- default:
- dev_warn(dev, "Table %d not to be written by mailbox!\n",
- table->type);
- return 0;
- }
-
- if (table->type == HEM_TYPE_SCCC && step_idx)
+ op = get_op_for_set_hem(hr_dev, table->type, step_idx);
+ if (op == -EINVAL)
return 0;
- op += step_idx;
-
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
@@ -3065,8 +3496,6 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev,
}
static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev,
- enum ib_qp_state cur_state,
- enum ib_qp_state new_state,
struct hns_roce_v2_qp_context *context,
struct hns_roce_qp *hr_qp)
{
@@ -3116,6 +3545,46 @@ static void set_access_flags(struct hns_roce_qp *hr_qp,
roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
!!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
+ roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S,
+ !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
+ roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, 0);
+}
+
+static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
+ roce_set_field(context->byte_4_sqpn_tst,
+ V2_QPC_BYTE_4_SGE_SHIFT_M,
+ V2_QPC_BYTE_4_SGE_SHIFT_S,
+ ilog2((unsigned int)hr_qp->sge.sge_cnt));
+ else
+ roce_set_field(context->byte_4_sqpn_tst,
+ V2_QPC_BYTE_4_SGE_SHIFT_M,
+ V2_QPC_BYTE_4_SGE_SHIFT_S,
+ hr_qp->sq.max_gs >
+ HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ?
+ ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
+
+ roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
+ V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
+
+ roce_set_field(context->byte_20_smac_sgid_idx,
+ V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S,
+ ilog2((unsigned int)hr_qp->sq.wqe_cnt));
+ roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
+ V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0);
+
+ roce_set_field(context->byte_20_smac_sgid_idx,
+ V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
+ (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT ||
+ hr_qp->ibqp.srq) ? 0 :
+ ilog2((unsigned int)hr_qp->rq.wqe_cnt));
+
+ roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
+ V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
}
static void modify_qp_reset_to_init(struct ib_qp *ibqp,
@@ -3138,21 +3607,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
V2_QPC_BYTE_4_TST_S, 0);
- if (ibqp->qp_type == IB_QPT_GSI)
- roce_set_field(context->byte_4_sqpn_tst,
- V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sge.sge_cnt));
- else
- roce_set_field(context->byte_4_sqpn_tst,
- V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S,
- hr_qp->sq.max_gs > 2 ?
- ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
-
- roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
-
roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn);
roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
@@ -3168,19 +3622,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQWS_M,
V2_QPC_BYTE_20_RQWS_S, 0);
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0);
-
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
- (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
- hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
+ set_qpc_wqe_cnt(hr_qp, context, qpc_mask);
/* No VLAN need to set 0xFFF */
roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
@@ -3225,7 +3667,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_68_rq_db,
V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_M,
V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_S, 0);
- context->rq_db_record_addr = hr_qp->rdb.dma >> 32;
+ context->rq_db_record_addr = cpu_to_le32(hr_qp->rdb.dma >> 32);
qpc_mask->rq_db_record_addr = 0;
roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S,
@@ -3456,22 +3898,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
V2_QPC_BYTE_4_TST_S, 0);
- if (ibqp->qp_type == IB_QPT_GSI)
- roce_set_field(context->byte_4_sqpn_tst,
- V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sge.sge_cnt));
- else
- roce_set_field(context->byte_4_sqpn_tst,
- V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S,
- hr_qp->sq.max_gs >
- HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ?
- ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
-
- roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
-
if (attr_mask & IB_QP_ACCESS_FLAGS) {
roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
!!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ));
@@ -3489,6 +3915,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
IB_ACCESS_REMOTE_ATOMIC));
roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
0);
+ roce_set_bit(context->byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_EXT_ATE_S,
+ !!(attr->qp_access_flags &
+ IB_ACCESS_REMOTE_ATOMIC));
+ roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_EXT_ATE_S, 0);
} else {
roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
!!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ));
@@ -3504,22 +3936,13 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
!!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
0);
+ roce_set_bit(context->byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_EXT_ATE_S,
+ !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
+ roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_EXT_ATE_S, 0);
}
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0);
-
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
- (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
- hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
-
roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn);
roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
@@ -3638,7 +4061,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
}
dmac = (u8 *)attr->ah_attr.roce.dmac;
- context->wqe_sge_ba = (u32)(wqe_sge_ba >> 3);
+ context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3);
qpc_mask->wqe_sge_ba = 0;
/*
@@ -3694,7 +4117,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M,
V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0);
- context->rq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
+ context->rq_cur_blk_addr = cpu_to_le32(mtts[0] >> PAGE_ADDR_SHIFT);
qpc_mask->rq_cur_blk_addr = 0;
roce_set_field(context->byte_92_srq_info,
@@ -3705,7 +4128,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M,
V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, 0);
- context->rq_nxt_blk_addr = (u32)(mtts[1] >> PAGE_ADDR_SHIFT);
+ context->rq_nxt_blk_addr = cpu_to_le32(mtts[1] >> PAGE_ADDR_SHIFT);
qpc_mask->rq_nxt_blk_addr = 0;
roce_set_field(context->byte_104_rq_sge,
@@ -3720,7 +4143,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
V2_QPC_BYTE_132_TRRL_BA_S, dma_handle_3 >> 4);
roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_BA_M,
V2_QPC_BYTE_132_TRRL_BA_S, 0);
- context->trrl_ba = (u32)(dma_handle_3 >> (16 + 4));
+ context->trrl_ba = cpu_to_le32(dma_handle_3 >> (16 + 4));
qpc_mask->trrl_ba = 0;
roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_TRRL_BA_M,
V2_QPC_BYTE_140_TRRL_BA_S,
@@ -3728,7 +4151,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_TRRL_BA_M,
V2_QPC_BYTE_140_TRRL_BA_S, 0);
- context->irrl_ba = (u32)(dma_handle_2 >> 6);
+ context->irrl_ba = cpu_to_le32(dma_handle_2 >> 6);
qpc_mask->irrl_ba = 0;
roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_IRRL_BA_M,
V2_QPC_BYTE_208_IRRL_BA_S,
@@ -3764,13 +4187,11 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
/* Configure GID index */
port_num = rdma_ah_get_port_num(&attr->ah_attr);
roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGID_IDX_M,
- V2_QPC_BYTE_20_SGID_IDX_S,
+ V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S,
hns_get_gid_index(hr_dev, port_num - 1,
grh->sgid_index));
roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGID_IDX_M,
- V2_QPC_BYTE_20_SGID_IDX_S, 0);
+ V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0);
memcpy(&(context->dmac), dmac, sizeof(u32));
roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M,
V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4])));
@@ -3876,7 +4297,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
* we should set all bits of the relevant fields in context mask to
* 0 at the same time, else set them to 0x1.
*/
- context->sq_cur_blk_addr = (u32)(sq_cur_blk >> PAGE_ADDR_SHIFT);
+ context->sq_cur_blk_addr = cpu_to_le32(sq_cur_blk >> PAGE_ADDR_SHIFT);
roce_set_field(context->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S,
@@ -3888,8 +4309,8 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
context->sq_cur_sge_blk_addr = ((ibqp->qp_type == IB_QPT_GSI) ||
hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ?
- ((u32)(sge_cur_blk >>
- PAGE_ADDR_SHIFT)) : 0;
+ cpu_to_le32(sge_cur_blk >>
+ PAGE_ADDR_SHIFT) : 0;
roce_set_field(context->byte_184_irrl_idx,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S,
@@ -3902,7 +4323,8 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, 0);
- context->rx_sq_cur_blk_addr = (u32)(sq_cur_blk >> PAGE_ADDR_SHIFT);
+ context->rx_sq_cur_blk_addr =
+ cpu_to_le32(sq_cur_blk >> PAGE_ADDR_SHIFT);
roce_set_field(context->byte_232_irrl_sge,
V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M,
V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S,
@@ -3974,30 +4396,119 @@ static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state,
}
-static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr,
- int attr_mask, enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
+static int hns_roce_v2_set_path(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_v2_qp_context *context;
- struct hns_roce_v2_qp_context *qpc_mask;
- struct device *dev = hr_dev->dev;
- int ret = -EINVAL;
+ const struct ib_gid_attr *gid_attr = NULL;
+ int is_roce_protocol;
+ u16 vlan_id = 0xffff;
+ bool is_udp = false;
+ u8 ib_port;
+ u8 hr_port;
+ int ret;
- context = kcalloc(2, sizeof(*context), GFP_ATOMIC);
- if (!context)
- return -ENOMEM;
+ ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num : hr_qp->port + 1;
+ hr_port = ib_port - 1;
+ is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) &&
+ rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
+
+ if (is_roce_protocol) {
+ gid_attr = attr->ah_attr.grh.sgid_attr;
+ ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
+ if (ret)
+ return ret;
+
+ if (gid_attr)
+ is_udp = (gid_attr->gid_type ==
+ IB_GID_TYPE_ROCE_UDP_ENCAP);
+ }
+
+ if (vlan_id < VLAN_N_VID) {
+ roce_set_bit(context->byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
+ roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0);
+ roce_set_bit(context->byte_168_irrl_idx,
+ V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1);
+ roce_set_bit(qpc_mask->byte_168_irrl_idx,
+ V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0);
+ }
+
+ roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
+ V2_QPC_BYTE_24_VLAN_ID_S, vlan_id);
+ roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
+ V2_QPC_BYTE_24_VLAN_ID_S, 0);
+
+ if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) {
+ dev_err(hr_dev->dev, "sgid_index(%u) too large. max is %d\n",
+ grh->sgid_index, hr_dev->caps.gid_table_len[hr_port]);
+ return -EINVAL;
+ }
+
+ if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) {
+ dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n");
+ return -EINVAL;
+ }
+
+ roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M,
+ V2_QPC_BYTE_52_UDPSPN_S,
+ is_udp ? 0x12b7 : 0);
+
+ roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M,
+ V2_QPC_BYTE_52_UDPSPN_S, 0);
+
+ roce_set_field(context->byte_20_smac_sgid_idx,
+ V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S,
+ grh->sgid_index);
+
+ roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
+ V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0);
+
+ roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M,
+ V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit);
+ roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M,
+ V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
+
+ if (hr_dev->pci_dev->revision == 0x21 && is_udp)
+ roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
+ V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2);
+ else
+ roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
+ V2_QPC_BYTE_24_TC_S, grh->traffic_class);
+ roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
+ V2_QPC_BYTE_24_TC_S, 0);
+ roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
+ V2_QPC_BYTE_28_FL_S, grh->flow_label);
+ roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
+ V2_QPC_BYTE_28_FL_S, 0);
+ memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
+ memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
+ roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
+ V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr));
+ roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
+ V2_QPC_BYTE_28_SL_S, 0);
+ hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+
+ return 0;
+}
+
+static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+ enum ib_qp_state cur_state,
+ enum ib_qp_state new_state,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ int ret = 0;
- qpc_mask = context + 1;
- /*
- * In v2 engine, software pass context and context mask to hardware
- * when modifying qp. If software need modify some fields in context,
- * we should set all bits of the relevant fields in context mask to
- * 0 at the same time, else set them to 0x1.
- */
- memset(qpc_mask, 0xff, sizeof(*qpc_mask));
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
memset(qpc_mask, 0, sizeof(*qpc_mask));
modify_qp_reset_to_init(ibqp, attr, attr_mask, context,
@@ -4019,134 +4530,30 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
/* Nothing */
;
} else {
- dev_err(dev, "Illegal state for QP!\n");
+ dev_err(hr_dev->dev, "Illegal state for QP!\n");
ret = -EINVAL;
goto out;
}
- /* When QP state is err, SQ and RQ WQE should be flushed */
- if (new_state == IB_QPS_ERR) {
- roce_set_field(context->byte_160_sq_ci_pi,
- V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S,
- hr_qp->sq.head);
- roce_set_field(qpc_mask->byte_160_sq_ci_pi,
- V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
+out:
+ return ret;
+}
- if (!ibqp->srq) {
- roce_set_field(context->byte_84_rq_ci_pi,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S,
- hr_qp->rq.head);
- roce_set_field(qpc_mask->byte_84_rq_ci_pi,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0);
- }
- }
+static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ int ret = 0;
if (attr_mask & IB_QP_AV) {
- const struct ib_global_route *grh =
- rdma_ah_read_grh(&attr->ah_attr);
- const struct ib_gid_attr *gid_attr = NULL;
- int is_roce_protocol;
- u16 vlan = 0xffff;
- u8 ib_port;
- u8 hr_port;
-
- ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num :
- hr_qp->port + 1;
- hr_port = ib_port - 1;
- is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) &&
- rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
-
- if (is_roce_protocol) {
- gid_attr = attr->ah_attr.grh.sgid_attr;
- ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL);
- if (ret)
- goto out;
- }
-
- if (vlan < VLAN_CFI_MASK) {
- roce_set_bit(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
- roce_set_bit(qpc_mask->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0);
- roce_set_bit(context->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1);
- roce_set_bit(qpc_mask->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0);
- }
-
- roce_set_field(context->byte_24_mtu_tc,
- V2_QPC_BYTE_24_VLAN_ID_M,
- V2_QPC_BYTE_24_VLAN_ID_S, vlan);
- roce_set_field(qpc_mask->byte_24_mtu_tc,
- V2_QPC_BYTE_24_VLAN_ID_M,
- V2_QPC_BYTE_24_VLAN_ID_S, 0);
-
- if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) {
- dev_err(hr_dev->dev,
- "sgid_index(%u) too large. max is %d\n",
- grh->sgid_index,
- hr_dev->caps.gid_table_len[hr_port]);
- ret = -EINVAL;
- goto out;
- }
-
- if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) {
- dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n");
- ret = -EINVAL;
- goto out;
- }
-
- roce_set_field(context->byte_52_udpspn_dmac,
- V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S,
- (gid_attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) ?
- 0 : 0x12b7);
-
- roce_set_field(qpc_mask->byte_52_udpspn_dmac,
- V2_QPC_BYTE_52_UDPSPN_M,
- V2_QPC_BYTE_52_UDPSPN_S, 0);
-
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGID_IDX_M,
- V2_QPC_BYTE_20_SGID_IDX_S, grh->sgid_index);
-
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGID_IDX_M,
- V2_QPC_BYTE_20_SGID_IDX_S, 0);
-
- roce_set_field(context->byte_24_mtu_tc,
- V2_QPC_BYTE_24_HOP_LIMIT_M,
- V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit);
- roce_set_field(qpc_mask->byte_24_mtu_tc,
- V2_QPC_BYTE_24_HOP_LIMIT_M,
- V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
-
- if (hr_dev->pci_dev->revision == 0x21 &&
- gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
- roce_set_field(context->byte_24_mtu_tc,
- V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
- grh->traffic_class >> 2);
- else
- roce_set_field(context->byte_24_mtu_tc,
- V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
- grh->traffic_class);
- roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
- V2_QPC_BYTE_24_TC_S, 0);
- roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
- V2_QPC_BYTE_28_FL_S, grh->flow_label);
- roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
- V2_QPC_BYTE_28_FL_S, 0);
- memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
- memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
- roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
- V2_QPC_BYTE_28_SL_S,
- rdma_ah_get_sl(&attr->ah_attr));
- roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
- V2_QPC_BYTE_28_SL_S, 0);
- hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+ ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context,
+ qpc_mask);
+ if (ret)
+ return ret;
}
if (attr_mask & IB_QP_TIMEOUT) {
@@ -4158,7 +4565,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S,
0);
} else {
- dev_warn(dev, "Local ACK timeout shall be 0 to 30.\n");
+ dev_warn(hr_dev->dev,
+ "Local ACK timeout shall be 0 to 30.\n");
}
}
@@ -4173,8 +4581,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
roce_set_field(context->byte_212_lsn,
V2_QPC_BYTE_212_RETRY_CNT_M,
- V2_QPC_BYTE_212_RETRY_CNT_S,
- attr->retry_cnt);
+ V2_QPC_BYTE_212_RETRY_CNT_S, attr->retry_cnt);
roce_set_field(qpc_mask->byte_212_lsn,
V2_QPC_BYTE_212_RETRY_CNT_M,
V2_QPC_BYTE_212_RETRY_CNT_S, 0);
@@ -4196,6 +4603,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
V2_QPC_BYTE_244_RNR_CNT_S, 0);
}
+ /* RC&UC&UD required attr */
if (attr_mask & IB_QP_SQ_PSN) {
roce_set_field(context->byte_172_sq_psn,
V2_QPC_BYTE_172_SQ_CUR_PSN_M,
@@ -4290,11 +4698,85 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_QKEY) {
- context->qkey_xrcd = attr->qkey;
+ context->qkey_xrcd = cpu_to_le32(attr->qkey);
qpc_mask->qkey_xrcd = 0;
hr_qp->qkey = attr->qkey;
}
+ return ret;
+}
+
+static void hns_roce_v2_record_opt_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ hr_qp->atomic_rd_en = attr->qp_access_flags;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ hr_qp->resp_depth = attr->max_dest_rd_atomic;
+ if (attr_mask & IB_QP_PORT) {
+ hr_qp->port = attr->port_num - 1;
+ hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
+ }
+}
+
+static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask, enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_v2_qp_context ctx[2];
+ struct hns_roce_v2_qp_context *context = ctx;
+ struct hns_roce_v2_qp_context *qpc_mask = ctx + 1;
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ /*
+ * In v2 engine, software pass context and context mask to hardware
+ * when modifying qp. If software need modify some fields in context,
+ * we should set all bits of the relevant fields in context mask to
+ * 0 at the same time, else set them to 0x1.
+ */
+ memset(context, 0, sizeof(*context));
+ memset(qpc_mask, 0xff, sizeof(*qpc_mask));
+ ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state,
+ new_state, context, qpc_mask);
+ if (ret)
+ goto out;
+
+ /* When QP state is err, SQ and RQ WQE should be flushed */
+ if (new_state == IB_QPS_ERR) {
+ roce_set_field(context->byte_160_sq_ci_pi,
+ V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
+ V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S,
+ hr_qp->sq.head);
+ roce_set_field(qpc_mask->byte_160_sq_ci_pi,
+ V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
+ V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
+
+ if (!ibqp->srq) {
+ roce_set_field(context->byte_84_rq_ci_pi,
+ V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
+ V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S,
+ hr_qp->rq.head);
+ roce_set_field(qpc_mask->byte_84_rq_ci_pi,
+ V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
+ V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0);
+ }
+ }
+
+ /* Configure the optional fields */
+ ret = hns_roce_v2_set_opt_fields(ibqp, attr, attr_mask, context,
+ qpc_mask);
+ if (ret)
+ goto out;
+
roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S,
ibqp->srq ? 1 : 0);
roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
@@ -4307,8 +4789,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
V2_QPC_BYTE_60_QP_ST_S, 0);
/* SW pass context to HW */
- ret = hns_roce_v2_qp_modify(hr_dev, cur_state, new_state,
- context, hr_qp);
+ ret = hns_roce_v2_qp_modify(hr_dev, ctx, hr_qp);
if (ret) {
dev_err(dev, "hns_roce_qp_modify failed(%d)\n", ret);
goto out;
@@ -4316,15 +4797,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
hr_qp->state = new_state;
- if (attr_mask & IB_QP_ACCESS_FLAGS)
- hr_qp->atomic_rd_en = attr->qp_access_flags;
-
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- hr_qp->resp_depth = attr->max_dest_rd_atomic;
- if (attr_mask & IB_QP_PORT) {
- hr_qp->port = attr->port_num - 1;
- hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
- }
+ hns_roce_v2_record_opt_fields(ibqp, attr, attr_mask);
if (new_state == IB_QPS_RESET && !ibqp->uobject) {
hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn,
@@ -4337,14 +4810,12 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
hr_qp->rq.tail = 0;
hr_qp->sq.head = 0;
hr_qp->sq.tail = 0;
- hr_qp->sq_next_wqe = 0;
hr_qp->next_sge = 0;
if (hr_qp->rq.wqe_cnt)
*hr_qp->rdb.db_record = 0;
}
out:
- kfree(context);
return ret;
}
@@ -4395,16 +4866,12 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_v2_qp_context *context;
+ struct hns_roce_v2_qp_context context = {};
struct device *dev = hr_dev->dev;
int tmp_qp_state;
int state;
int ret;
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
-
memset(qp_attr, 0, sizeof(*qp_attr));
memset(qp_init_attr, 0, sizeof(*qp_init_attr));
@@ -4416,14 +4883,14 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
goto done;
}
- ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, context);
+ ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context);
if (ret) {
dev_err(dev, "query qpc error\n");
ret = -EINVAL;
goto out;
}
- state = roce_get_field(context->byte_60_qpst_tempid,
+ state = roce_get_field(context.byte_60_qpst_tempid,
V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S);
tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
if (tmp_qp_state == -1) {
@@ -4433,7 +4900,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
}
hr_qp->state = (u8)tmp_qp_state;
qp_attr->qp_state = (enum ib_qp_state)hr_qp->state;
- qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context->byte_24_mtu_tc,
+ qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context.byte_24_mtu_tc,
V2_QPC_BYTE_24_MTU_M,
V2_QPC_BYTE_24_MTU_S);
qp_attr->path_mig_state = IB_MIG_ARMED;
@@ -4441,20 +4908,20 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
if (hr_qp->ibqp.qp_type == IB_QPT_UD)
qp_attr->qkey = V2_QKEY_VAL;
- qp_attr->rq_psn = roce_get_field(context->byte_108_rx_reqepsn,
+ qp_attr->rq_psn = roce_get_field(context.byte_108_rx_reqepsn,
V2_QPC_BYTE_108_RX_REQ_EPSN_M,
V2_QPC_BYTE_108_RX_REQ_EPSN_S);
- qp_attr->sq_psn = (u32)roce_get_field(context->byte_172_sq_psn,
+ qp_attr->sq_psn = (u32)roce_get_field(context.byte_172_sq_psn,
V2_QPC_BYTE_172_SQ_CUR_PSN_M,
V2_QPC_BYTE_172_SQ_CUR_PSN_S);
- qp_attr->dest_qp_num = (u8)roce_get_field(context->byte_56_dqpn_err,
+ qp_attr->dest_qp_num = (u8)roce_get_field(context.byte_56_dqpn_err,
V2_QPC_BYTE_56_DQPN_M,
V2_QPC_BYTE_56_DQPN_S);
- qp_attr->qp_access_flags = ((roce_get_bit(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_RRE_S)) << V2_QP_RWE_S) |
- ((roce_get_bit(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_RWE_S)) << V2_QP_RRE_S) |
- ((roce_get_bit(context->byte_76_srqn_op_en,
+ qp_attr->qp_access_flags = ((roce_get_bit(context.byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_RRE_S)) << V2_QP_RRE_S) |
+ ((roce_get_bit(context.byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_RWE_S)) << V2_QP_RWE_S) |
+ ((roce_get_bit(context.byte_76_srqn_op_en,
V2_QPC_BYTE_76_ATE_S)) << V2_QP_ATE_S);
if (hr_qp->ibqp.qp_type == IB_QPT_RC ||
@@ -4463,43 +4930,43 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
rdma_ah_retrieve_grh(&qp_attr->ah_attr);
rdma_ah_set_sl(&qp_attr->ah_attr,
- roce_get_field(context->byte_28_at_fl,
+ roce_get_field(context.byte_28_at_fl,
V2_QPC_BYTE_28_SL_M,
V2_QPC_BYTE_28_SL_S));
- grh->flow_label = roce_get_field(context->byte_28_at_fl,
+ grh->flow_label = roce_get_field(context.byte_28_at_fl,
V2_QPC_BYTE_28_FL_M,
V2_QPC_BYTE_28_FL_S);
- grh->sgid_index = roce_get_field(context->byte_20_smac_sgid_idx,
+ grh->sgid_index = roce_get_field(context.byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_SGID_IDX_M,
V2_QPC_BYTE_20_SGID_IDX_S);
- grh->hop_limit = roce_get_field(context->byte_24_mtu_tc,
+ grh->hop_limit = roce_get_field(context.byte_24_mtu_tc,
V2_QPC_BYTE_24_HOP_LIMIT_M,
V2_QPC_BYTE_24_HOP_LIMIT_S);
- grh->traffic_class = roce_get_field(context->byte_24_mtu_tc,
+ grh->traffic_class = roce_get_field(context.byte_24_mtu_tc,
V2_QPC_BYTE_24_TC_M,
V2_QPC_BYTE_24_TC_S);
- memcpy(grh->dgid.raw, context->dgid, sizeof(grh->dgid.raw));
+ memcpy(grh->dgid.raw, context.dgid, sizeof(grh->dgid.raw));
}
qp_attr->port_num = hr_qp->port + 1;
qp_attr->sq_draining = 0;
- qp_attr->max_rd_atomic = 1 << roce_get_field(context->byte_208_irrl,
+ qp_attr->max_rd_atomic = 1 << roce_get_field(context.byte_208_irrl,
V2_QPC_BYTE_208_SR_MAX_M,
V2_QPC_BYTE_208_SR_MAX_S);
- qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context->byte_140_raq,
+ qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context.byte_140_raq,
V2_QPC_BYTE_140_RR_MAX_M,
V2_QPC_BYTE_140_RR_MAX_S);
- qp_attr->min_rnr_timer = (u8)roce_get_field(context->byte_80_rnr_rx_cqn,
+ qp_attr->min_rnr_timer = (u8)roce_get_field(context.byte_80_rnr_rx_cqn,
V2_QPC_BYTE_80_MIN_RNR_TIME_M,
V2_QPC_BYTE_80_MIN_RNR_TIME_S);
- qp_attr->timeout = (u8)roce_get_field(context->byte_28_at_fl,
+ qp_attr->timeout = (u8)roce_get_field(context.byte_28_at_fl,
V2_QPC_BYTE_28_AT_M,
V2_QPC_BYTE_28_AT_S);
- qp_attr->retry_cnt = roce_get_field(context->byte_212_lsn,
+ qp_attr->retry_cnt = roce_get_field(context.byte_212_lsn,
V2_QPC_BYTE_212_RETRY_CNT_M,
V2_QPC_BYTE_212_RETRY_CNT_S);
- qp_attr->rnr_retry = context->rq_rnr_timer;
+ qp_attr->rnr_retry = le32_to_cpu(context.rq_rnr_timer);
done:
qp_attr->cur_qp_state = qp_attr->qp_state;
@@ -4518,7 +4985,6 @@ done:
out:
mutex_unlock(&hr_qp->mutex);
- kfree(context);
return ret;
}
@@ -4527,35 +4993,44 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
struct ib_udata *udata)
{
struct hns_roce_cq *send_cq, *recv_cq;
- struct device *dev = hr_dev->dev;
- int ret;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ unsigned long flags;
+ int ret = 0;
if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) {
/* Modify qp to reset before destroying qp */
ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
hr_qp->state, IB_QPS_RESET);
- if (ret) {
- dev_err(dev, "modify QP %06lx to ERR failed.\n",
- hr_qp->qpn);
- return ret;
- }
+ if (ret)
+ ibdev_err(ibdev, "modify QP to Reset failed.\n");
}
- send_cq = to_hr_cq(hr_qp->ibqp.send_cq);
- recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq);
+ send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL;
+ recv_cq = hr_qp->ibqp.recv_cq ? to_hr_cq(hr_qp->ibqp.recv_cq) : NULL;
+ spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
hns_roce_lock_cqs(send_cq, recv_cq);
+ list_del(&hr_qp->node);
+ list_del(&hr_qp->sq_node);
+ list_del(&hr_qp->rq_node);
+
if (!udata) {
- __hns_roce_v2_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ?
- to_hr_srq(hr_qp->ibqp.srq) : NULL);
- if (send_cq != recv_cq)
+ if (recv_cq)
+ __hns_roce_v2_cq_clean(recv_cq, hr_qp->qpn,
+ (hr_qp->ibqp.srq ?
+ to_hr_srq(hr_qp->ibqp.srq) :
+ NULL));
+
+ if (send_cq && send_cq != recv_cq)
__hns_roce_v2_cq_clean(send_cq, hr_qp->qpn, NULL);
+
}
hns_roce_qp_remove(hr_dev, hr_qp);
hns_roce_unlock_cqs(send_cq, recv_cq);
+ spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
hns_roce_qp_free(hr_dev, hr_qp);
@@ -4594,7 +5069,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
kfree(hr_qp->rq_inl_buf.wqe_list);
}
- return 0;
+ return ret;
}
static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
@@ -4604,15 +5079,11 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
int ret;
ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata);
- if (ret) {
- dev_err(hr_dev->dev, "Destroy qp failed(%d)\n", ret);
- return ret;
- }
+ if (ret)
+ ibdev_err(&hr_dev->ib_dev, "Destroy qp 0x%06lx failed(%d)\n",
+ hr_qp->qpn, ret);
- if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
- kfree(hr_to_hr_sqp(hr_qp));
- else
- kfree(hr_qp);
+ kfree(hr_qp);
return 0;
}
@@ -4829,10 +5300,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
{
struct hns_roce_dev *hr_dev = eq->hr_dev;
- u32 doorbell[2];
-
- doorbell[0] = 0;
- doorbell[1] = 0;
+ __le32 doorbell[2] = {};
if (eq->type_flag == HNS_ROCE_AEQ) {
roce_set_field(doorbell[0], HNS_ROCE_V2_EQ_DB_CMD_M,
@@ -4904,7 +5372,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
struct hns_roce_eq *eq)
{
struct device *dev = hr_dev->dev;
- struct hns_roce_aeqe *aeqe;
+ struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq);
int aeqe_found = 0;
int event_type;
int sub_type;
@@ -4912,8 +5380,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
u32 qpn;
u32 cqn;
- while ((aeqe = next_aeqe_sw_v2(eq))) {
-
+ while (aeqe) {
/* Make sure we read AEQ entry after we have checked the
* ownership bit
*/
@@ -4977,11 +5444,12 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
++eq->cons_index;
aeqe_found = 1;
- if (eq->cons_index > (2 * eq->entries - 1)) {
- dev_warn(dev, "cons_index overflow, set back to 0.\n");
+ if (eq->cons_index > (2 * eq->entries - 1))
eq->cons_index = 0;
- }
+
hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn);
+
+ aeqe = next_aeqe_sw_v2(eq);
}
set_eq_cons_index_v2(eq);
@@ -5034,19 +5502,17 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
struct hns_roce_eq *eq)
{
struct device *dev = hr_dev->dev;
- struct hns_roce_ceqe *ceqe;
+ struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
int ceqe_found = 0;
u32 cqn;
- while ((ceqe = next_ceqe_sw_v2(eq))) {
-
+ while (ceqe) {
/* Make sure we read CEQ entry after we have checked the
* ownership bit
*/
dma_rmb();
- cqn = roce_get_field(ceqe->comp,
- HNS_ROCE_V2_CEQE_COMP_CQN_M,
+ cqn = roce_get_field(ceqe->comp, HNS_ROCE_V2_CEQE_COMP_CQN_M,
HNS_ROCE_V2_CEQE_COMP_CQN_S);
hns_roce_cq_completion(hr_dev, cqn);
@@ -5054,10 +5520,12 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
++eq->cons_index;
ceqe_found = 1;
- if (eq->cons_index > (2 * eq->entries - 1)) {
+ if (eq->cons_index > (EQ_DEPTH_COEFF * eq->entries - 1)) {
dev_warn(dev, "cons_index overflow, set back to 0.\n");
eq->cons_index = 0;
}
+
+ ceqe = next_ceqe_sw_v2(eq);
}
set_eq_cons_index_v2(eq);
@@ -5093,14 +5561,14 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG);
int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG);
- if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) {
+ if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) {
struct pci_dev *pdev = hr_dev->pci_dev;
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
const struct hnae3_ae_ops *ops = ae_dev->ops;
dev_err(dev, "AEQ overflow!\n");
- roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S, 1);
+ int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S;
roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
/* Set reset level for reset_event() */
@@ -5110,27 +5578,27 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
if (ops->reset_event)
ops->reset_event(pdev, NULL);
- roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
+ int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S;
roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
int_work = 1;
- } else if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S)) {
+ } else if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S)) {
dev_err(dev, "BUS ERR!\n");
- roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S, 1);
+ int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S;
roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
- roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
+ int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S;
roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
int_work = 1;
- } else if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S)) {
+ } else if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S)) {
dev_err(dev, "OTHER ERR!\n");
- roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S, 1);
+ int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S;
roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
- roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
+ int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S;
roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
int_work = 1;
@@ -5202,14 +5670,12 @@ static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
- /* hop_num = 0 */
if (mhop_num == HNS_ROCE_HOP_NUM_0) {
dma_free_coherent(dev, (unsigned int)(eq->entries *
eq->eqe_size), eq->bt_l0, eq->l0_dma);
return;
}
- /* hop_num = 1 or hop = 2 */
dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
if (mhop_num == 1) {
for (i = 0; i < eq->l0_last_num; i++) {
@@ -5268,9 +5734,9 @@ static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev,
return;
}
- if (eq->buf_list)
- dma_free_coherent(hr_dev->dev, buf_chk_sz,
- eq->buf_list->buf, eq->buf_list->map);
+ dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf,
+ eq->buf_list->map);
+ kfree(eq->buf_list);
}
static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
@@ -5299,126 +5765,98 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
eq->eqe_ba = eq->l0_dma;
/* set eqc state */
- roce_set_field(eqc->byte_4,
- HNS_ROCE_EQC_EQ_ST_M,
- HNS_ROCE_EQC_EQ_ST_S,
+ roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQ_ST_M, HNS_ROCE_EQC_EQ_ST_S,
HNS_ROCE_V2_EQ_STATE_VALID);
/* set eqe hop num */
- roce_set_field(eqc->byte_4,
- HNS_ROCE_EQC_HOP_NUM_M,
+ roce_set_field(eqc->byte_4, HNS_ROCE_EQC_HOP_NUM_M,
HNS_ROCE_EQC_HOP_NUM_S, eq->hop_num);
/* set eqc over_ignore */
- roce_set_field(eqc->byte_4,
- HNS_ROCE_EQC_OVER_IGNORE_M,
+ roce_set_field(eqc->byte_4, HNS_ROCE_EQC_OVER_IGNORE_M,
HNS_ROCE_EQC_OVER_IGNORE_S, eq->over_ignore);
/* set eqc coalesce */
- roce_set_field(eqc->byte_4,
- HNS_ROCE_EQC_COALESCE_M,
+ roce_set_field(eqc->byte_4, HNS_ROCE_EQC_COALESCE_M,
HNS_ROCE_EQC_COALESCE_S, eq->coalesce);
/* set eqc arm_state */
- roce_set_field(eqc->byte_4,
- HNS_ROCE_EQC_ARM_ST_M,
+ roce_set_field(eqc->byte_4, HNS_ROCE_EQC_ARM_ST_M,
HNS_ROCE_EQC_ARM_ST_S, eq->arm_st);
/* set eqn */
- roce_set_field(eqc->byte_4,
- HNS_ROCE_EQC_EQN_M,
- HNS_ROCE_EQC_EQN_S, eq->eqn);
+ roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQN_M, HNS_ROCE_EQC_EQN_S,
+ eq->eqn);
/* set eqe_cnt */
- roce_set_field(eqc->byte_4,
- HNS_ROCE_EQC_EQE_CNT_M,
- HNS_ROCE_EQC_EQE_CNT_S,
- HNS_ROCE_EQ_INIT_EQE_CNT);
+ roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQE_CNT_M,
+ HNS_ROCE_EQC_EQE_CNT_S, HNS_ROCE_EQ_INIT_EQE_CNT);
/* set eqe_ba_pg_sz */
- roce_set_field(eqc->byte_8,
- HNS_ROCE_EQC_BA_PG_SZ_M,
+ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BA_PG_SZ_M,
HNS_ROCE_EQC_BA_PG_SZ_S,
eq->eqe_ba_pg_sz + PG_SHIFT_OFFSET);
/* set eqe_buf_pg_sz */
- roce_set_field(eqc->byte_8,
- HNS_ROCE_EQC_BUF_PG_SZ_M,
+ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BUF_PG_SZ_M,
HNS_ROCE_EQC_BUF_PG_SZ_S,
eq->eqe_buf_pg_sz + PG_SHIFT_OFFSET);
/* set eq_producer_idx */
- roce_set_field(eqc->byte_8,
- HNS_ROCE_EQC_PROD_INDX_M,
- HNS_ROCE_EQC_PROD_INDX_S,
- HNS_ROCE_EQ_INIT_PROD_IDX);
+ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_PROD_INDX_M,
+ HNS_ROCE_EQC_PROD_INDX_S, HNS_ROCE_EQ_INIT_PROD_IDX);
/* set eq_max_cnt */
- roce_set_field(eqc->byte_12,
- HNS_ROCE_EQC_MAX_CNT_M,
+ roce_set_field(eqc->byte_12, HNS_ROCE_EQC_MAX_CNT_M,
HNS_ROCE_EQC_MAX_CNT_S, eq->eq_max_cnt);
/* set eq_period */
- roce_set_field(eqc->byte_12,
- HNS_ROCE_EQC_PERIOD_M,
+ roce_set_field(eqc->byte_12, HNS_ROCE_EQC_PERIOD_M,
HNS_ROCE_EQC_PERIOD_S, eq->eq_period);
/* set eqe_report_timer */
- roce_set_field(eqc->eqe_report_timer,
- HNS_ROCE_EQC_REPORT_TIMER_M,
+ roce_set_field(eqc->eqe_report_timer, HNS_ROCE_EQC_REPORT_TIMER_M,
HNS_ROCE_EQC_REPORT_TIMER_S,
HNS_ROCE_EQ_INIT_REPORT_TIMER);
/* set eqe_ba [34:3] */
- roce_set_field(eqc->eqe_ba0,
- HNS_ROCE_EQC_EQE_BA_L_M,
+ roce_set_field(eqc->eqe_ba0, HNS_ROCE_EQC_EQE_BA_L_M,
HNS_ROCE_EQC_EQE_BA_L_S, eq->eqe_ba >> 3);
/* set eqe_ba [64:35] */
- roce_set_field(eqc->eqe_ba1,
- HNS_ROCE_EQC_EQE_BA_H_M,
+ roce_set_field(eqc->eqe_ba1, HNS_ROCE_EQC_EQE_BA_H_M,
HNS_ROCE_EQC_EQE_BA_H_S, eq->eqe_ba >> 35);
/* set eq shift */
- roce_set_field(eqc->byte_28,
- HNS_ROCE_EQC_SHIFT_M,
- HNS_ROCE_EQC_SHIFT_S, eq->shift);
+ roce_set_field(eqc->byte_28, HNS_ROCE_EQC_SHIFT_M, HNS_ROCE_EQC_SHIFT_S,
+ eq->shift);
/* set eq MSI_IDX */
- roce_set_field(eqc->byte_28,
- HNS_ROCE_EQC_MSI_INDX_M,
- HNS_ROCE_EQC_MSI_INDX_S,
- HNS_ROCE_EQ_INIT_MSI_IDX);
+ roce_set_field(eqc->byte_28, HNS_ROCE_EQC_MSI_INDX_M,
+ HNS_ROCE_EQC_MSI_INDX_S, HNS_ROCE_EQ_INIT_MSI_IDX);
/* set cur_eqe_ba [27:12] */
- roce_set_field(eqc->byte_28,
- HNS_ROCE_EQC_CUR_EQE_BA_L_M,
+ roce_set_field(eqc->byte_28, HNS_ROCE_EQC_CUR_EQE_BA_L_M,
HNS_ROCE_EQC_CUR_EQE_BA_L_S, eq->cur_eqe_ba >> 12);
/* set cur_eqe_ba [59:28] */
- roce_set_field(eqc->byte_32,
- HNS_ROCE_EQC_CUR_EQE_BA_M_M,
+ roce_set_field(eqc->byte_32, HNS_ROCE_EQC_CUR_EQE_BA_M_M,
HNS_ROCE_EQC_CUR_EQE_BA_M_S, eq->cur_eqe_ba >> 28);
/* set cur_eqe_ba [63:60] */
- roce_set_field(eqc->byte_36,
- HNS_ROCE_EQC_CUR_EQE_BA_H_M,
+ roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CUR_EQE_BA_H_M,
HNS_ROCE_EQC_CUR_EQE_BA_H_S, eq->cur_eqe_ba >> 60);
/* set eq consumer idx */
- roce_set_field(eqc->byte_36,
- HNS_ROCE_EQC_CONS_INDX_M,
- HNS_ROCE_EQC_CONS_INDX_S,
- HNS_ROCE_EQ_INIT_CONS_IDX);
+ roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CONS_INDX_M,
+ HNS_ROCE_EQC_CONS_INDX_S, HNS_ROCE_EQ_INIT_CONS_IDX);
/* set nex_eqe_ba[43:12] */
- roce_set_field(eqc->nxt_eqe_ba0,
- HNS_ROCE_EQC_NXT_EQE_BA_L_M,
+ roce_set_field(eqc->nxt_eqe_ba0, HNS_ROCE_EQC_NXT_EQE_BA_L_M,
HNS_ROCE_EQC_NXT_EQE_BA_L_S, eq->nxt_eqe_ba >> 12);
/* set nex_eqe_ba[63:44] */
- roce_set_field(eqc->nxt_eqe_ba1,
- HNS_ROCE_EQC_NXT_EQE_BA_H_M,
+ roce_set_field(eqc->nxt_eqe_ba1, HNS_ROCE_EQC_NXT_EQE_BA_H_M,
HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44);
}
@@ -5449,7 +5887,6 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
buf_chk_sz);
bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN);
- /* hop_num = 0 */
if (mhop_num == HNS_ROCE_HOP_NUM_0) {
if (eq->entries > buf_chk_sz / eq->eqe_size) {
dev_err(dev, "eq entries %d is larger than buf_pg_sz!",
@@ -5515,7 +5952,8 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
break;
}
eq->cur_eqe_ba = eq->buf_dma[0];
- eq->nxt_eqe_ba = eq->buf_dma[1];
+ if (ba_num > 1)
+ eq->nxt_eqe_ba = eq->buf_dma[1];
} else if (mhop_num == 2) {
/* alloc L1 BT and buf */
@@ -5556,7 +5994,8 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
break;
}
eq->cur_eqe_ba = eq->buf_dma[0];
- eq->nxt_eqe_ba = eq->buf_dma[1];
+ if (ba_num > 1)
+ eq->nxt_eqe_ba = eq->buf_dma[1];
}
eq->l0_last_num = i + 1;
@@ -5699,6 +6138,93 @@ free_cmd_mbox:
return ret;
}
+static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
+ int comp_num, int aeq_num, int other_num)
+{
+ struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+ int i, j;
+ int ret;
+
+ for (i = 0; i < irq_num; i++) {
+ hr_dev->irq_names[i] = kzalloc(HNS_ROCE_INT_NAME_LEN,
+ GFP_KERNEL);
+ if (!hr_dev->irq_names[i]) {
+ ret = -ENOMEM;
+ goto err_kzalloc_failed;
+ }
+ }
+
+ /* irq contains: abnormal + AEQ + CEQ */
+ for (j = 0; j < other_num; j++)
+ snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
+ "hns-abn-%d", j);
+
+ for (j = other_num; j < (other_num + aeq_num); j++)
+ snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
+ "hns-aeq-%d", j - other_num);
+
+ for (j = (other_num + aeq_num); j < irq_num; j++)
+ snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
+ "hns-ceq-%d", j - other_num - aeq_num);
+
+ for (j = 0; j < irq_num; j++) {
+ if (j < other_num)
+ ret = request_irq(hr_dev->irq[j],
+ hns_roce_v2_msix_interrupt_abn,
+ 0, hr_dev->irq_names[j], hr_dev);
+
+ else if (j < (other_num + comp_num))
+ ret = request_irq(eq_table->eq[j - other_num].irq,
+ hns_roce_v2_msix_interrupt_eq,
+ 0, hr_dev->irq_names[j + aeq_num],
+ &eq_table->eq[j - other_num]);
+ else
+ ret = request_irq(eq_table->eq[j - other_num].irq,
+ hns_roce_v2_msix_interrupt_eq,
+ 0, hr_dev->irq_names[j - comp_num],
+ &eq_table->eq[j - other_num]);
+ if (ret) {
+ dev_err(hr_dev->dev, "Request irq error!\n");
+ goto err_request_failed;
+ }
+ }
+
+ return 0;
+
+err_request_failed:
+ for (j -= 1; j >= 0; j--)
+ if (j < other_num)
+ free_irq(hr_dev->irq[j], hr_dev);
+ else
+ free_irq(eq_table->eq[j - other_num].irq,
+ &eq_table->eq[j - other_num]);
+
+err_kzalloc_failed:
+ for (i -= 1; i >= 0; i--)
+ kfree(hr_dev->irq_names[i]);
+
+ return ret;
+}
+
+static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev)
+{
+ int irq_num;
+ int eq_num;
+ int i;
+
+ eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
+ irq_num = eq_num + hr_dev->caps.num_other_vectors;
+
+ for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
+ free_irq(hr_dev->irq[i], hr_dev);
+
+ for (i = 0; i < eq_num; i++)
+ free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]);
+
+ for (i = 0; i < irq_num; i++)
+ kfree(hr_dev->irq_names[i]);
+}
+
static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
@@ -5710,7 +6236,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
int other_num;
int comp_num;
int aeq_num;
- int i, j, k;
+ int i;
int ret;
other_num = hr_dev->caps.num_other_vectors;
@@ -5724,27 +6250,18 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
if (!eq_table->eq)
return -ENOMEM;
- for (i = 0; i < irq_num; i++) {
- hr_dev->irq_names[i] = kzalloc(HNS_ROCE_INT_NAME_LEN,
- GFP_KERNEL);
- if (!hr_dev->irq_names[i]) {
- ret = -ENOMEM;
- goto err_failed_kzalloc;
- }
- }
-
/* create eq */
- for (j = 0; j < eq_num; j++) {
- eq = &eq_table->eq[j];
+ for (i = 0; i < eq_num; i++) {
+ eq = &eq_table->eq[i];
eq->hr_dev = hr_dev;
- eq->eqn = j;
- if (j < comp_num) {
+ eq->eqn = i;
+ if (i < comp_num) {
/* CEQ */
eq_cmd = HNS_ROCE_CMD_CREATE_CEQC;
eq->type_flag = HNS_ROCE_CEQ;
eq->entries = hr_dev->caps.ceqe_depth;
eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
- eq->irq = hr_dev->irq[j + other_num + aeq_num];
+ eq->irq = hr_dev->irq[i + other_num + aeq_num];
eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM;
eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL;
} else {
@@ -5753,7 +6270,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
eq->type_flag = HNS_ROCE_AEQ;
eq->entries = hr_dev->caps.aeqe_depth;
eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
- eq->irq = hr_dev->irq[j - comp_num + other_num];
+ eq->irq = hr_dev->irq[i - comp_num + other_num];
eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM;
eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL;
}
@@ -5768,40 +6285,11 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
/* enable irq */
hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_ENABLE);
- /* irq contains: abnormal + AEQ + CEQ*/
- for (k = 0; k < irq_num; k++)
- if (k < other_num)
- snprintf((char *)hr_dev->irq_names[k],
- HNS_ROCE_INT_NAME_LEN, "hns-abn-%d", k);
- else if (k < (other_num + aeq_num))
- snprintf((char *)hr_dev->irq_names[k],
- HNS_ROCE_INT_NAME_LEN, "hns-aeq-%d",
- k - other_num);
- else
- snprintf((char *)hr_dev->irq_names[k],
- HNS_ROCE_INT_NAME_LEN, "hns-ceq-%d",
- k - other_num - aeq_num);
-
- for (k = 0; k < irq_num; k++) {
- if (k < other_num)
- ret = request_irq(hr_dev->irq[k],
- hns_roce_v2_msix_interrupt_abn,
- 0, hr_dev->irq_names[k], hr_dev);
-
- else if (k < (other_num + comp_num))
- ret = request_irq(eq_table->eq[k - other_num].irq,
- hns_roce_v2_msix_interrupt_eq,
- 0, hr_dev->irq_names[k + aeq_num],
- &eq_table->eq[k - other_num]);
- else
- ret = request_irq(eq_table->eq[k - other_num].irq,
- hns_roce_v2_msix_interrupt_eq,
- 0, hr_dev->irq_names[k - comp_num],
- &eq_table->eq[k - other_num]);
- if (ret) {
- dev_err(dev, "Request irq error!\n");
- goto err_request_irq_fail;
- }
+ ret = __hns_roce_request_irq(hr_dev, irq_num, comp_num,
+ aeq_num, other_num);
+ if (ret) {
+ dev_err(dev, "Request irq failed.\n");
+ goto err_request_irq_fail;
}
hr_dev->irq_workq =
@@ -5809,26 +6297,20 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
if (!hr_dev->irq_workq) {
dev_err(dev, "Create irq workqueue failed!\n");
ret = -ENOMEM;
- goto err_request_irq_fail;
+ goto err_create_wq_fail;
}
return 0;
+err_create_wq_fail:
+ __hns_roce_free_irq(hr_dev);
+
err_request_irq_fail:
- for (k -= 1; k >= 0; k--)
- if (k < other_num)
- free_irq(hr_dev->irq[k], hr_dev);
- else
- free_irq(eq_table->eq[k - other_num].irq,
- &eq_table->eq[k - other_num]);
+ hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE);
err_create_eq_fail:
- for (j -= 1; j >= 0; j--)
- hns_roce_v2_free_eq(hr_dev, &eq_table->eq[j]);
-
-err_failed_kzalloc:
for (i -= 1; i >= 0; i--)
- kfree(hr_dev->irq_names[i]);
+ hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
kfree(eq_table->eq);
return ret;
@@ -5837,30 +6319,22 @@ err_failed_kzalloc:
static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
- int irq_num;
int eq_num;
int i;
eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
- irq_num = eq_num + hr_dev->caps.num_other_vectors;
/* Disable irq */
hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE);
- for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
- free_irq(hr_dev->irq[i], hr_dev);
+ __hns_roce_free_irq(hr_dev);
for (i = 0; i < eq_num; i++) {
hns_roce_v2_destroy_eqc(hr_dev, i);
- free_irq(eq_table->eq[i].irq, &eq_table->eq[i]);
-
hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
}
- for (i = 0; i < irq_num; i++)
- kfree(hr_dev->irq_names[i]);
-
kfree(eq_table->eq);
flush_workqueue(hr_dev->irq_workq);
@@ -5888,7 +6362,7 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
hr_dev->caps.srqwqe_hop_num));
roce_set_field(srq_context->byte_4_srqn_srqst,
SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S,
- ilog2(srq->max));
+ ilog2(srq->wqe_cnt));
roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M,
SRQC_BYTE_4_SRQN_S, srq->srqn);
@@ -5904,7 +6378,7 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
roce_set_field(srq_context->byte_24_wqe_bt_ba,
SRQC_BYTE_24_SRQ_WQE_BT_BA_M,
SRQC_BYTE_24_SRQ_WQE_BT_BA_S,
- cpu_to_le32(dma_handle_wqe >> 35));
+ dma_handle_wqe >> 35);
roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M,
SRQC_BYTE_28_PD_S, pdn);
@@ -5912,20 +6386,18 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 :
fls(srq->max_gs - 1));
- srq_context->idx_bt_ba = (u32)(dma_handle_idx >> 3);
- srq_context->idx_bt_ba = cpu_to_le32(srq_context->idx_bt_ba);
+ srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3);
roce_set_field(srq_context->rsv_idx_bt_ba,
SRQC_BYTE_36_SRQ_IDX_BT_BA_M,
SRQC_BYTE_36_SRQ_IDX_BT_BA_S,
- cpu_to_le32(dma_handle_idx >> 35));
+ dma_handle_idx >> 35);
- srq_context->idx_cur_blk_addr = (u32)(mtts_idx[0] >> PAGE_ADDR_SHIFT);
srq_context->idx_cur_blk_addr =
- cpu_to_le32(srq_context->idx_cur_blk_addr);
+ cpu_to_le32(mtts_idx[0] >> PAGE_ADDR_SHIFT);
roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M,
SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S,
- cpu_to_le32((mtts_idx[0]) >> (32 + PAGE_ADDR_SHIFT)));
+ mtts_idx[0] >> (32 + PAGE_ADDR_SHIFT));
roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M,
SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S,
@@ -5935,19 +6407,18 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M,
SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S,
- hr_dev->caps.idx_ba_pg_sz);
+ hr_dev->caps.idx_ba_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M,
SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S,
- hr_dev->caps.idx_buf_pg_sz);
+ hr_dev->caps.idx_buf_pg_sz + PG_SHIFT_OFFSET);
- srq_context->idx_nxt_blk_addr = (u32)(mtts_idx[1] >> PAGE_ADDR_SHIFT);
srq_context->idx_nxt_blk_addr =
- cpu_to_le32(srq_context->idx_nxt_blk_addr);
+ cpu_to_le32(mtts_idx[1] >> PAGE_ADDR_SHIFT);
roce_set_field(srq_context->rsv_idxnxtblkaddr,
SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M,
SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S,
- cpu_to_le32((mtts_idx[1]) >> (32 + PAGE_ADDR_SHIFT)));
+ mtts_idx[1] >> (32 + PAGE_ADDR_SHIFT));
roce_set_field(srq_context->byte_56_xrc_cqn,
SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S,
cqn);
@@ -5977,7 +6448,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
int ret;
if (srq_attr_mask & IB_SRQ_LIMIT) {
- if (srq_attr->srq_limit >= srq->max)
+ if (srq_attr->srq_limit >= srq->wqe_cnt)
return -EINVAL;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
@@ -6037,7 +6508,7 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
SRQC_BYTE_8_SRQ_LIMIT_WL_S);
attr->srq_limit = limit_wl;
- attr->max_wr = srq->max - 1;
+ attr->max_wr = srq->wqe_cnt - 1;
attr->max_sge = srq->max_gs;
memcpy(srq_context, mailbox->buf, sizeof(*srq_context));
@@ -6090,7 +6561,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
spin_lock_irqsave(&srq->lock, flags);
- ind = srq->head & (srq->max - 1);
+ ind = srq->head & (srq->wqe_cnt - 1);
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (unlikely(wr->num_sge > srq->max_gs)) {
@@ -6105,7 +6576,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
break;
}
- wqe_idx = find_empty_entry(&srq->idx_que, srq->max);
+ wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt);
if (wqe_idx < 0) {
ret = -ENOMEM;
*bad_wr = wr;
@@ -6129,7 +6600,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
}
srq->wrid[wqe_idx] = wr->wr_id;
- ind = (ind + 1) & (srq->max - 1);
+ ind = (ind + 1) & (srq->wqe_cnt - 1);
}
if (likely(nreq)) {
@@ -6141,9 +6612,10 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
*/
wmb();
- srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S |
- (srq->srqn & V2_DB_BYTE_4_TAG_M);
- srq_db.parameter = srq->head;
+ srq_db.byte_4 =
+ cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S |
+ (srq->srqn & V2_DB_BYTE_4_TAG_M));
+ srq_db.parameter = cpu_to_le32(srq->head);
hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l);
@@ -6223,12 +6695,14 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl);
-static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
+static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
struct hnae3_handle *handle)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
int i;
+ hr_dev->pci_dev = handle->pdev;
+ hr_dev->dev = &handle->pdev->dev;
hr_dev->hw = &hns_roce_hw_v2;
hr_dev->dfx = &hns_roce_dfx_hw_v2;
hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG;
@@ -6253,8 +6727,6 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
hr_dev->reset_cnt = handle->ae_algo->ops->ae_dev_reset_cnt(handle);
priv->handle = handle;
-
- return 0;
}
static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
@@ -6272,14 +6744,7 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
goto error_failed_kzalloc;
}
- hr_dev->pci_dev = handle->pdev;
- hr_dev->dev = &handle->pdev->dev;
-
- ret = hns_roce_hw_v2_get_cfg(hr_dev, handle);
- if (ret) {
- dev_err(hr_dev->dev, "Get Configuration failed!\n");
- goto error_failed_get_cfg;
- }
+ hns_roce_hw_v2_get_cfg(hr_dev, handle);
ret = hns_roce_init(hr_dev);
if (ret) {
@@ -6309,6 +6774,10 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
return;
handle->priv = NULL;
+
+ hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT;
+ hns_roce_handle_device_err(hr_dev);
+
hns_roce_exit(hr_dev);
kfree(hr_dev->priv);
ib_dealloc_device(&hr_dev->ib_dev);
@@ -6370,7 +6839,6 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
{
struct hns_roce_dev *hr_dev;
- struct ib_event event;
if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) {
set_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state);
@@ -6388,10 +6856,7 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
hr_dev->active = false;
hr_dev->dis_db = true;
- event.event = IB_EVENT_DEVICE_FATAL;
- event.device = &hr_dev->ib_dev;
- event.element.port_num = 1;
- ib_dispatch_event(&event);
+ hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
return 0;
}
@@ -6433,7 +6898,7 @@ static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle)
handle->rinfo.reset_state = HNS_ROCE_STATE_RST_UNINIT;
dev_info(&handle->pdev->dev, "In reset process RoCE client uninit.\n");
- msleep(100);
+ msleep(HNS_ROCE_V2_HW_RST_UNINT_DELAY);
__hns_roce_hw_v2_uninit_instance(handle, false);
return 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 478f5a5b7aa1..2a117ff6a6be 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -81,14 +81,16 @@
#define HNS_ROCE_V2_QPC_ENTRY_SZ 256
#define HNS_ROCE_V2_IRRL_ENTRY_SZ 64
#define HNS_ROCE_V2_TRRL_ENTRY_SZ 48
+#define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ 100
#define HNS_ROCE_V2_CQC_ENTRY_SZ 64
#define HNS_ROCE_V2_SRQC_ENTRY_SZ 64
#define HNS_ROCE_V2_MTPT_ENTRY_SZ 64
#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
+#define HNS_ROCE_V2_IDX_ENTRY_SZ 4
#define HNS_ROCE_V2_CQE_ENTRY_SIZE 32
#define HNS_ROCE_V2_SCCC_ENTRY_SZ 32
-#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ 4096
-#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ 4096
+#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE
+#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE
#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000
#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
#define HNS_ROCE_INVALID_LKEY 0x100
@@ -96,7 +98,10 @@
#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
#define HNS_ROCE_V2_RSV_QPS 8
-#define HNS_ROCE_V2_HW_RST_TIMEOUT 1000
+#define HNS_ROCE_V2_HW_RST_TIMEOUT 1000
+#define HNS_ROCE_V2_HW_RST_UNINT_DELAY 100
+
+#define HNS_ROCE_V2_HW_RST_COMPLETION_WAIT 20
#define HNS_ROCE_CONTEXT_HOP_NUM 1
#define HNS_ROCE_SCCC_HOP_NUM 1
@@ -106,7 +111,12 @@
#define HNS_ROCE_PBL_HOP_NUM 2
#define HNS_ROCE_EQE_HOP_NUM 2
#define HNS_ROCE_IDX_HOP_NUM 1
+#define HNS_ROCE_SQWQE_HOP_NUM 2
+#define HNS_ROCE_EXT_SGE_HOP_NUM 1
+#define HNS_ROCE_RQWQE_HOP_NUM 2
+#define HNS_ROCE_BA_PG_SZ_SUPPORTED_256K 6
+#define HNS_ROCE_BA_PG_SZ_SUPPORTED_16K 2
#define HNS_ROCE_V2_GID_INDEX_NUM 256
#define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18)
@@ -126,8 +136,6 @@
#define HNS_ROCE_CMD_FLAG_ERR_INTR BIT(HNS_ROCE_CMD_FLAG_ERR_INTR_SHIFT)
#define HNS_ROCE_CMQ_DESC_NUM_S 3
-#define HNS_ROCE_CMQ_EN_B 16
-#define HNS_ROCE_CMQ_ENABLE BIT(HNS_ROCE_CMQ_EN_B)
#define HNS_ROCE_CMQ_SCC_CLR_DONE_CNT 5
@@ -236,6 +244,7 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403,
HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404,
HNS_ROCE_OPC_QUERY_PF_TIMER_RES = 0x8406,
+ HNS_ROCE_OPC_QUERY_PF_CAPS_NUM = 0x8408,
HNS_ROCE_OPC_CFG_SGID_TB = 0x8500,
HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501,
HNS_ROCE_OPC_POST_MB = 0x8504,
@@ -642,7 +651,7 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_76_ATE_S 27
#define V2_QPC_BYTE_76_RQIE_S 28
-
+#define V2_QPC_BYTE_76_EXT_ATE_S 29
#define V2_QPC_BYTE_76_RQ_VLAN_EN_S 30
#define V2_QPC_BYTE_80_RX_CQN_S 0
#define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0)
@@ -1568,6 +1577,155 @@ struct hns_roce_cfg_smac_tb {
#define CFG_SMAC_TB_VF_SMAC_H_S 0
#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0)
+#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5
+struct hns_roce_query_pf_caps_a {
+ u8 number_ports;
+ u8 local_ca_ack_delay;
+ __le16 max_sq_sg;
+ __le16 max_sq_inline;
+ __le16 max_rq_sg;
+ __le32 max_extend_sg;
+ __le16 num_qpc_timer;
+ __le16 num_cqc_timer;
+ __le16 max_srq_sges;
+ u8 num_aeq_vectors;
+ u8 num_other_vectors;
+ u8 max_sq_desc_sz;
+ u8 max_rq_desc_sz;
+ u8 max_srq_desc_sz;
+ u8 cq_entry_sz;
+};
+
+struct hns_roce_query_pf_caps_b {
+ u8 mtpt_entry_sz;
+ u8 irrl_entry_sz;
+ u8 trrl_entry_sz;
+ u8 cqc_entry_sz;
+ u8 srqc_entry_sz;
+ u8 idx_entry_sz;
+ u8 scc_ctx_entry_sz;
+ u8 max_mtu;
+ __le16 qpc_entry_sz;
+ __le16 qpc_timer_entry_sz;
+ __le16 cqc_timer_entry_sz;
+ u8 min_cqes;
+ u8 min_wqes;
+ __le32 page_size_cap;
+ u8 pkey_table_len;
+ u8 phy_num_uars;
+ u8 ctx_hop_num;
+ u8 pbl_hop_num;
+};
+
+struct hns_roce_query_pf_caps_c {
+ __le32 cap_flags_num_pds;
+ __le32 max_gid_num_cqs;
+ __le32 cq_depth;
+ __le32 num_mrws;
+ __le32 ord_num_qps;
+ __le16 sq_depth;
+ __le16 rq_depth;
+};
+
+#define V2_QUERY_PF_CAPS_C_NUM_PDS_S 0
+#define V2_QUERY_PF_CAPS_C_NUM_PDS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_S 20
+#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_M GENMASK(31, 20)
+
+#define V2_QUERY_PF_CAPS_C_NUM_CQS_S 0
+#define V2_QUERY_PF_CAPS_C_NUM_CQS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_C_MAX_GID_S 20
+#define V2_QUERY_PF_CAPS_C_MAX_GID_M GENMASK(28, 20)
+
+#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_S 0
+#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_M GENMASK(22, 0)
+
+#define V2_QUERY_PF_CAPS_C_NUM_MRWS_S 0
+#define V2_QUERY_PF_CAPS_C_NUM_MRWS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_C_NUM_QPS_S 0
+#define V2_QUERY_PF_CAPS_C_NUM_QPS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_C_MAX_ORD_S 20
+#define V2_QUERY_PF_CAPS_C_MAX_ORD_M GENMASK(27, 20)
+
+struct hns_roce_query_pf_caps_d {
+ __le32 wq_hop_num_max_srqs;
+ __le16 srq_depth;
+ __le16 rsv;
+ __le32 num_ceqs_ceq_depth;
+ __le32 arm_st_aeq_depth;
+ __le32 num_uars_rsv_pds;
+ __le32 rsv_uars_rsv_qps;
+};
+#define V2_QUERY_PF_CAPS_D_NUM_SRQS_S 0
+#define V2_QUERY_PF_CAPS_D_NUM_SRQS_M GENMASK(20, 0)
+
+#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S 20
+#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M GENMASK(21, 20)
+
+#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S 22
+#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M GENMASK(23, 22)
+
+#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S 24
+#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M GENMASK(25, 24)
+
+
+#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S 0
+#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M GENMASK(21, 0)
+
+#define V2_QUERY_PF_CAPS_D_NUM_CEQS_S 22
+#define V2_QUERY_PF_CAPS_D_NUM_CEQS_M GENMASK(31, 22)
+
+#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S 0
+#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M GENMASK(21, 0)
+
+#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S 22
+#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M GENMASK(23, 22)
+
+#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S 24
+#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M GENMASK(25, 24)
+
+#define V2_QUERY_PF_CAPS_D_RSV_PDS_S 0
+#define V2_QUERY_PF_CAPS_D_RSV_PDS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_D_NUM_UARS_S 20
+#define V2_QUERY_PF_CAPS_D_NUM_UARS_M GENMASK(27, 20)
+
+#define V2_QUERY_PF_CAPS_D_RSV_QPS_S 0
+#define V2_QUERY_PF_CAPS_D_RSV_QPS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_D_RSV_UARS_S 20
+#define V2_QUERY_PF_CAPS_D_RSV_UARS_M GENMASK(27, 20)
+
+struct hns_roce_query_pf_caps_e {
+ __le32 chunk_size_shift_rsv_mrws;
+ __le32 rsv_cqs;
+ __le32 rsv_srqs;
+ __le32 rsv_lkey;
+ __le16 ceq_max_cnt;
+ __le16 ceq_period;
+ __le16 aeq_max_cnt;
+ __le16 aeq_period;
+};
+
+#define V2_QUERY_PF_CAPS_E_RSV_MRWS_S 0
+#define V2_QUERY_PF_CAPS_E_RSV_MRWS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S 20
+#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M GENMASK(31, 20)
+
+#define V2_QUERY_PF_CAPS_E_RSV_CQS_S 0
+#define V2_QUERY_PF_CAPS_E_RSV_CQS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_E_RSV_SRQS_S 0
+#define V2_QUERY_PF_CAPS_E_RSV_SRQS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_S 0
+#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_M GENMASK(19, 0)
+
struct hns_roce_cmq_desc {
__le16 opcode;
__le16 flag;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 1e4ba48f5613..d0031d559213 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -90,7 +90,7 @@ static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context)
static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context)
{
struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
- struct ib_gid_attr zattr = { };
+ struct ib_gid_attr zattr = {};
u8 port = attr->port_num - 1;
int ret;
@@ -111,7 +111,7 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
netdev = hr_dev->iboe.netdevs[port];
if (!netdev) {
- dev_err(dev, "port(%d) can't find netdev\n", port);
+ dev_err(dev, "Can't find netdev on port(%u)!\n", port);
return -ENODEV;
}
@@ -210,7 +210,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
props->max_pkeys = 1;
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
- props->max_srq = hr_dev->caps.max_srqs;
+ props->max_srq = hr_dev->caps.num_srqs;
props->max_srq_wr = hr_dev->caps.max_srq_wrs;
props->max_srq_sge = hr_dev->caps.max_srq_sges;
}
@@ -253,16 +253,18 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
net_dev = hr_dev->iboe.netdevs[port];
if (!net_dev) {
spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
- dev_err(dev, "find netdev %d failed!\r\n", port);
+ dev_err(dev, "Find netdev %u failed!\n", port);
return -EINVAL;
}
mtu = iboe_get_mtu(net_dev->mtu);
props->active_mtu = mtu ? min(props->max_mtu, mtu) : IB_MTU_256;
- props->state = (netif_running(net_dev) && netif_carrier_ok(net_dev)) ?
- IB_PORT_ACTIVE : IB_PORT_DOWN;
- props->phys_state = (props->state == IB_PORT_ACTIVE) ?
- HNS_ROCE_PHY_LINKUP : HNS_ROCE_PHY_DISABLED;
+ props->state = netif_running(net_dev) && netif_carrier_ok(net_dev) ?
+ IB_PORT_ACTIVE :
+ IB_PORT_DOWN;
+ props->phys_state = props->state == IB_PORT_ACTIVE ?
+ IB_PORT_PHYS_STATE_LINK_UP :
+ IB_PORT_PHYS_STATE_DISABLED;
spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
@@ -300,12 +302,6 @@ static int hns_roce_modify_device(struct ib_device *ib_dev, int mask,
return 0;
}
-static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask,
- struct ib_port_modify *props)
-{
- return 0;
-}
-
static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
@@ -358,7 +354,8 @@ static int hns_roce_mmap(struct ib_ucontext *context,
return rdma_user_mmap_io(context, vma,
to_hr_ucontext(context)->uar.pfn,
PAGE_SIZE,
- pgprot_noncached(vma->vm_page_prot));
+ pgprot_noncached(vma->vm_page_prot),
+ NULL);
/* vm_pgoff: 1 -- TPTR */
case 1:
@@ -371,7 +368,8 @@ static int hns_roce_mmap(struct ib_ucontext *context,
return rdma_user_mmap_io(context, vma,
hr_dev->tptr_dma_addr >> PAGE_SHIFT,
hr_dev->tptr_size,
- vma->vm_page_prot);
+ vma->vm_page_prot,
+ NULL);
default:
return -EINVAL;
@@ -422,14 +420,14 @@ static const struct ib_device_ops hns_roce_dev_ops = {
.alloc_pd = hns_roce_alloc_pd,
.alloc_ucontext = hns_roce_alloc_ucontext,
.create_ah = hns_roce_create_ah,
- .create_cq = hns_roce_ib_create_cq,
+ .create_cq = hns_roce_create_cq,
.create_qp = hns_roce_create_qp,
.dealloc_pd = hns_roce_dealloc_pd,
.dealloc_ucontext = hns_roce_dealloc_ucontext,
.del_gid = hns_roce_del_gid,
.dereg_mr = hns_roce_dereg_mr,
.destroy_ah = hns_roce_destroy_ah,
- .destroy_cq = hns_roce_ib_destroy_cq,
+ .destroy_cq = hns_roce_destroy_cq,
.disassociate_ucontext = hns_roce_disassociate_ucontext,
.fill_res_entry = hns_roce_fill_res_entry,
.get_dma_mr = hns_roce_get_dma_mr,
@@ -437,7 +435,6 @@ static const struct ib_device_ops hns_roce_dev_ops = {
.get_port_immutable = hns_roce_port_immutable,
.mmap = hns_roce_mmap,
.modify_device = hns_roce_modify_device,
- .modify_port = hns_roce_modify_port,
.modify_qp = hns_roce_modify_qp,
.query_ah = hns_roce_query_ah,
.query_device = hns_roce_query_device,
@@ -485,13 +482,13 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev = &hr_dev->ib_dev;
- ib_dev->node_type = RDMA_NODE_IB_CA;
- ib_dev->dev.parent = dev;
+ ib_dev->node_type = RDMA_NODE_IB_CA;
+ ib_dev->dev.parent = dev;
- ib_dev->phys_port_cnt = hr_dev->caps.num_ports;
- ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey;
- ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors;
- ib_dev->uverbs_cmd_mask =
+ ib_dev->phys_port_cnt = hr_dev->caps.num_ports;
+ ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey;
+ ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors;
+ ib_dev->uverbs_cmd_mask =
(1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) |
(1ULL << IB_USER_VERBS_CMD_QUERY_PORT) |
@@ -507,8 +504,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
(1ULL << IB_USER_VERBS_CMD_QUERY_QP) |
(1ULL << IB_USER_VERBS_CMD_DESTROY_QP);
- ib_dev->uverbs_ex_cmd_mask |=
- (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
+ ib_dev->uverbs_ex_cmd_mask |= (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) {
ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR);
@@ -593,11 +589,13 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) {
ret = hns_roce_init_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_cqe_table,
- HEM_TYPE_CQE, hr_dev->caps.mtt_entry_sz,
- hr_dev->caps.num_cqe_segs, 1);
+ &hr_dev->mr_table.mtt_cqe_table,
+ HEM_TYPE_CQE,
+ hr_dev->caps.mtt_entry_sz,
+ hr_dev->caps.num_cqe_segs, 1);
if (ret) {
- dev_err(dev, "Failed to init MTT CQE context memory, aborting.\n");
+ dev_err(dev,
+ "Failed to init CQE context memory, aborting.\n");
goto err_unmap_cqe;
}
}
@@ -637,7 +635,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
hr_dev->caps.num_qps, 1);
if (ret) {
dev_err(dev,
- "Failed to init trrl_table memory, aborting.\n");
+ "Failed to init trrl_table memory, aborting.\n");
goto err_unmap_irrl;
}
}
@@ -657,7 +655,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
hr_dev->caps.num_srqs, 1);
if (ret) {
dev_err(dev,
- "Failed to init SRQ context memory, aborting.\n");
+ "Failed to init SRQ context memory, aborting.\n");
goto err_unmap_cq;
}
}
@@ -696,33 +694,31 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
hr_dev->caps.num_qps, 1);
if (ret) {
dev_err(dev,
- "Failed to init SCC context memory, aborting.\n");
+ "Failed to init SCC context memory, aborting.\n");
goto err_unmap_idx;
}
}
if (hr_dev->caps.qpc_timer_entry_sz) {
- ret = hns_roce_init_hem_table(hr_dev,
- &hr_dev->qpc_timer_table,
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qpc_timer_table,
HEM_TYPE_QPC_TIMER,
hr_dev->caps.qpc_timer_entry_sz,
hr_dev->caps.num_qpc_timer, 1);
if (ret) {
dev_err(dev,
- "Failed to init QPC timer memory, aborting.\n");
+ "Failed to init QPC timer memory, aborting.\n");
goto err_unmap_ctx;
}
}
if (hr_dev->caps.cqc_timer_entry_sz) {
- ret = hns_roce_init_hem_table(hr_dev,
- &hr_dev->cqc_timer_table,
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cqc_timer_table,
HEM_TYPE_CQC_TIMER,
hr_dev->caps.cqc_timer_entry_sz,
hr_dev->caps.num_cqc_timer, 1);
if (ret) {
dev_err(dev,
- "Failed to init CQC timer memory, aborting.\n");
+ "Failed to init CQC timer memory, aborting.\n");
goto err_unmap_qpc_timer;
}
}
@@ -731,8 +727,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
err_unmap_qpc_timer:
if (hr_dev->caps.qpc_timer_entry_sz)
- hns_roce_cleanup_hem_table(hr_dev,
- &hr_dev->qpc_timer_table);
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table);
err_unmap_ctx:
if (hr_dev->caps.sccc_entry_sz)
@@ -867,6 +862,50 @@ err_uar_table_free:
return ret;
}
+static void check_and_get_armed_cq(struct list_head *cq_list, struct ib_cq *cq)
+{
+ struct hns_roce_cq *hr_cq = to_hr_cq(cq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&hr_cq->lock, flags);
+ if (cq->comp_handler) {
+ if (!hr_cq->is_armed) {
+ hr_cq->is_armed = 1;
+ list_add_tail(&hr_cq->node, cq_list);
+ }
+ }
+ spin_unlock_irqrestore(&hr_cq->lock, flags);
+}
+
+void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_qp *hr_qp;
+ struct hns_roce_cq *hr_cq;
+ struct list_head cq_list;
+ unsigned long flags_qp;
+ unsigned long flags;
+
+ INIT_LIST_HEAD(&cq_list);
+
+ spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
+ list_for_each_entry(hr_qp, &hr_dev->qp_list, node) {
+ spin_lock_irqsave(&hr_qp->sq.lock, flags_qp);
+ if (hr_qp->sq.tail != hr_qp->sq.head)
+ check_and_get_armed_cq(&cq_list, hr_qp->ibqp.send_cq);
+ spin_unlock_irqrestore(&hr_qp->sq.lock, flags_qp);
+
+ spin_lock_irqsave(&hr_qp->rq.lock, flags_qp);
+ if ((!hr_qp->ibqp.srq) && (hr_qp->rq.tail != hr_qp->rq.head))
+ check_and_get_armed_cq(&cq_list, hr_qp->ibqp.recv_cq);
+ spin_unlock_irqrestore(&hr_qp->rq.lock, flags_qp);
+ }
+
+ list_for_each_entry(hr_cq, &cq_list, node)
+ hns_roce_cq_completion(hr_dev, hr_cq->cqn);
+
+ spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
+}
+
int hns_roce_init(struct hns_roce_dev *hr_dev)
{
int ret;
@@ -901,6 +940,7 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
goto error_failed_cmd_init;
}
+ /* EQ depends on poll mode, event mode depends on EQ */
ret = hr_dev->hw->init_eq(hr_dev);
if (ret) {
dev_err(dev, "eq init failed!\n");
@@ -910,8 +950,9 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
if (hr_dev->cmd_mod) {
ret = hns_roce_cmd_use_events(hr_dev);
if (ret) {
- dev_err(dev, "Switch to event-driven cmd failed!\n");
- goto error_failed_use_event;
+ dev_warn(dev,
+ "Cmd event mode failed, set back to poll!\n");
+ hns_roce_cmd_use_polling(hr_dev);
}
}
@@ -935,6 +976,9 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
}
}
+ INIT_LIST_HEAD(&hr_dev->qp_list);
+ spin_lock_init(&hr_dev->qp_list_lock);
+
ret = hns_roce_register_device(hr_dev);
if (ret)
goto error_failed_register_device;
@@ -954,8 +998,6 @@ error_failed_setup_hca:
error_failed_init_hem:
if (hr_dev->cmd_mod)
hns_roce_cmd_use_polling(hr_dev);
-
-error_failed_use_event:
hr_dev->hw->cleanup_eq(hr_dev);
error_failed_eq_table:
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 549e1a38dfe0..b9898e71655a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -48,21 +48,21 @@ unsigned long key_to_hw_index(u32 key)
return (key << 24) | (key >> 8);
}
-static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long mpt_index)
+static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ unsigned long mpt_index)
{
return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
- HNS_ROCE_CMD_SW2HW_MPT,
+ HNS_ROCE_CMD_CREATE_MPT,
HNS_ROCE_CMD_TIMEOUT_MSECS);
}
-int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long mpt_index)
+int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ unsigned long mpt_index)
{
return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0,
- mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT,
+ mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT,
HNS_ROCE_CMD_TIMEOUT_MSECS);
}
@@ -83,7 +83,7 @@ static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
}
}
spin_unlock(&buddy->lock);
- return -1;
+ return -EINVAL;
found:
clear_bit(*seg, buddy->bits[o]);
@@ -206,13 +206,14 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
}
ret = hns_roce_buddy_alloc(buddy, order, seg);
- if (ret == -1)
- return -1;
+ if (ret)
+ return ret;
- if (hns_roce_table_get_range(hr_dev, table, *seg,
- *seg + (1 << order) - 1)) {
+ ret = hns_roce_table_get_range(hr_dev, table, *seg,
+ *seg + (1 << order) - 1);
+ if (ret) {
hns_roce_buddy_free(buddy, *seg, order);
- return -1;
+ return ret;
}
return 0;
@@ -347,155 +348,207 @@ static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
mr->pbl_bt_l0 = NULL;
mr->pbl_l0_dma_addr = 0;
}
+static int pbl_1hop_alloc(struct hns_roce_dev *hr_dev, int npages,
+ struct hns_roce_mr *mr, u32 pbl_bt_sz)
+{
+ struct device *dev = hr_dev->dev;
-/* PBL multi hop addressing */
-static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
- struct hns_roce_mr *mr)
+ if (npages > pbl_bt_sz / 8) {
+ dev_err(dev, "npages %d is larger than buf_pg_sz!",
+ npages);
+ return -EINVAL;
+ }
+ mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
+ &(mr->pbl_dma_addr),
+ GFP_KERNEL);
+ if (!mr->pbl_buf)
+ return -ENOMEM;
+
+ mr->pbl_size = npages;
+ mr->pbl_ba = mr->pbl_dma_addr;
+ mr->pbl_hop_num = 1;
+ mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
+ mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
+ return 0;
+
+}
+
+
+static int pbl_2hop_alloc(struct hns_roce_dev *hr_dev, int npages,
+ struct hns_roce_mr *mr, u32 pbl_bt_sz)
{
struct device *dev = hr_dev->dev;
- int mr_alloc_done = 0;
int npages_allocated;
- int i = 0, j = 0;
- u32 pbl_bt_sz;
- u32 mhop_num;
u64 pbl_last_bt_num;
u64 pbl_bt_cnt = 0;
- u64 bt_idx;
u64 size;
+ int i;
- mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
- pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
- if (mhop_num == HNS_ROCE_HOP_NUM_0)
- return 0;
-
- /* hop_num = 1 */
- if (mhop_num == 1) {
- if (npages > pbl_bt_sz / 8) {
- dev_err(dev, "npages %d is larger than buf_pg_sz!",
- npages);
- return -EINVAL;
+ /* alloc L1 BT */
+ for (i = 0; i < pbl_bt_sz / 8; i++) {
+ if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
+ size = pbl_bt_sz;
+ } else {
+ npages_allocated = i * (pbl_bt_sz / 8);
+ size = (npages - npages_allocated) * 8;
}
- mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
- &(mr->pbl_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_buf)
+ mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
+ &(mr->pbl_l1_dma_addr[i]),
+ GFP_KERNEL);
+ if (!mr->pbl_bt_l1[i]) {
+ hns_roce_loop_free(hr_dev, mr, 1, i, 0);
return -ENOMEM;
+ }
- mr->pbl_size = npages;
- mr->pbl_ba = mr->pbl_dma_addr;
- mr->pbl_hop_num = mhop_num;
- mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
- mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
- return 0;
+ *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
+
+ pbl_bt_cnt++;
+ if (pbl_bt_cnt >= pbl_last_bt_num)
+ break;
}
- mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
- sizeof(*mr->pbl_l1_dma_addr),
+ mr->l0_chunk_last_num = i + 1;
+
+ return 0;
+}
+
+static int pbl_3hop_alloc(struct hns_roce_dev *hr_dev, int npages,
+ struct hns_roce_mr *mr, u32 pbl_bt_sz)
+{
+ struct device *dev = hr_dev->dev;
+ int mr_alloc_done = 0;
+ int npages_allocated;
+ u64 pbl_last_bt_num;
+ u64 pbl_bt_cnt = 0;
+ u64 bt_idx;
+ u64 size;
+ int i;
+ int j = 0;
+
+ pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
+
+ mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
+ sizeof(*mr->pbl_l2_dma_addr),
GFP_KERNEL);
- if (!mr->pbl_l1_dma_addr)
+ if (!mr->pbl_l2_dma_addr)
return -ENOMEM;
- mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
+ mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
+ sizeof(*mr->pbl_bt_l2),
GFP_KERNEL);
- if (!mr->pbl_bt_l1)
- goto err_kcalloc_bt_l1;
-
- if (mhop_num == 3) {
- mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
- sizeof(*mr->pbl_l2_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_l2_dma_addr)
- goto err_kcalloc_l2_dma;
+ if (!mr->pbl_bt_l2)
+ goto err_kcalloc_bt_l2;
+
+ /* alloc L1, L2 BT */
+ for (i = 0; i < pbl_bt_sz / 8; i++) {
+ mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
+ &(mr->pbl_l1_dma_addr[i]),
+ GFP_KERNEL);
+ if (!mr->pbl_bt_l1[i]) {
+ hns_roce_loop_free(hr_dev, mr, 1, i, 0);
+ goto err_dma_alloc_l0;
+ }
- mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
- sizeof(*mr->pbl_bt_l2),
- GFP_KERNEL);
- if (!mr->pbl_bt_l2)
- goto err_kcalloc_bt_l2;
- }
+ *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
- /* alloc L0 BT */
- mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
- &(mr->pbl_l0_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_bt_l0)
- goto err_dma_alloc_l0;
+ for (j = 0; j < pbl_bt_sz / 8; j++) {
+ bt_idx = i * pbl_bt_sz / 8 + j;
- if (mhop_num == 2) {
- /* alloc L1 BT */
- for (i = 0; i < pbl_bt_sz / 8; i++) {
if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
size = pbl_bt_sz;
} else {
- npages_allocated = i * (pbl_bt_sz / 8);
+ npages_allocated = bt_idx *
+ (pbl_bt_sz / 8);
size = (npages - npages_allocated) * 8;
}
- mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
- &(mr->pbl_l1_dma_addr[i]),
- GFP_KERNEL);
- if (!mr->pbl_bt_l1[i]) {
- hns_roce_loop_free(hr_dev, mr, 1, i, 0);
+ mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
+ dev, size,
+ &(mr->pbl_l2_dma_addr[bt_idx]),
+ GFP_KERNEL);
+ if (!mr->pbl_bt_l2[bt_idx]) {
+ hns_roce_loop_free(hr_dev, mr, 2, i, j);
goto err_dma_alloc_l0;
}
- *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
+ *(mr->pbl_bt_l1[i] + j) =
+ mr->pbl_l2_dma_addr[bt_idx];
pbl_bt_cnt++;
- if (pbl_bt_cnt >= pbl_last_bt_num)
+ if (pbl_bt_cnt >= pbl_last_bt_num) {
+ mr_alloc_done = 1;
break;
- }
- } else if (mhop_num == 3) {
- /* alloc L1, L2 BT */
- for (i = 0; i < pbl_bt_sz / 8; i++) {
- mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
- &(mr->pbl_l1_dma_addr[i]),
- GFP_KERNEL);
- if (!mr->pbl_bt_l1[i]) {
- hns_roce_loop_free(hr_dev, mr, 1, i, 0);
- goto err_dma_alloc_l0;
}
+ }
- *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
+ if (mr_alloc_done)
+ break;
+ }
- for (j = 0; j < pbl_bt_sz / 8; j++) {
- bt_idx = i * pbl_bt_sz / 8 + j;
+ mr->l0_chunk_last_num = i + 1;
+ mr->l1_chunk_last_num = j + 1;
- if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
- size = pbl_bt_sz;
- } else {
- npages_allocated = bt_idx *
- (pbl_bt_sz / 8);
- size = (npages - npages_allocated) * 8;
- }
- mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
- dev, size,
- &(mr->pbl_l2_dma_addr[bt_idx]),
- GFP_KERNEL);
- if (!mr->pbl_bt_l2[bt_idx]) {
- hns_roce_loop_free(hr_dev, mr, 2, i, j);
- goto err_dma_alloc_l0;
- }
- *(mr->pbl_bt_l1[i] + j) =
- mr->pbl_l2_dma_addr[bt_idx];
+ return 0;
- pbl_bt_cnt++;
- if (pbl_bt_cnt >= pbl_last_bt_num) {
- mr_alloc_done = 1;
- break;
- }
- }
+err_dma_alloc_l0:
+ kfree(mr->pbl_bt_l2);
+ mr->pbl_bt_l2 = NULL;
- if (mr_alloc_done)
- break;
- }
+err_kcalloc_bt_l2:
+ kfree(mr->pbl_l2_dma_addr);
+ mr->pbl_l2_dma_addr = NULL;
+
+ return -ENOMEM;
+}
+
+
+/* PBL multi hop addressing */
+static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
+ struct hns_roce_mr *mr)
+{
+ struct device *dev = hr_dev->dev;
+ u32 pbl_bt_sz;
+ u32 mhop_num;
+
+ mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
+ pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
+
+ if (mhop_num == HNS_ROCE_HOP_NUM_0)
+ return 0;
+
+ if (mhop_num == 1)
+ return pbl_1hop_alloc(hr_dev, npages, mr, pbl_bt_sz);
+
+ mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
+ sizeof(*mr->pbl_l1_dma_addr),
+ GFP_KERNEL);
+ if (!mr->pbl_l1_dma_addr)
+ return -ENOMEM;
+
+ mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
+ GFP_KERNEL);
+ if (!mr->pbl_bt_l1)
+ goto err_kcalloc_bt_l1;
+
+ /* alloc L0 BT */
+ mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
+ &(mr->pbl_l0_dma_addr),
+ GFP_KERNEL);
+ if (!mr->pbl_bt_l0)
+ goto err_kcalloc_l2_dma;
+
+ if (mhop_num == 2) {
+ if (pbl_2hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
+ goto err_kcalloc_l2_dma;
+ }
+
+ if (mhop_num == 3) {
+ if (pbl_3hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
+ goto err_kcalloc_l2_dma;
}
- mr->l0_chunk_last_num = i + 1;
- if (mhop_num == 3)
- mr->l1_chunk_last_num = j + 1;
mr->pbl_size = npages;
mr->pbl_ba = mr->pbl_l0_dma_addr;
@@ -505,14 +558,6 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
return 0;
-err_dma_alloc_l0:
- kfree(mr->pbl_bt_l2);
- mr->pbl_bt_l2 = NULL;
-
-err_kcalloc_bt_l2:
- kfree(mr->pbl_l2_dma_addr);
- mr->pbl_l2_dma_addr = NULL;
-
err_kcalloc_l2_dma:
kfree(mr->pbl_bt_l1);
mr->pbl_bt_l1 = NULL;
@@ -534,7 +579,7 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
/* Allocate a key for mr from mr_table */
ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
- if (ret == -1)
+ if (ret)
return -ENOMEM;
mr->iova = iova; /* MR va starting addr */
@@ -663,10 +708,11 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
int ret;
if (mr->enabled) {
- ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key)
- & (hr_dev->caps.num_mtpts - 1));
+ ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
+ key_to_hw_index(mr->key) &
+ (hr_dev->caps.num_mtpts - 1));
if (ret)
- dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret);
+ dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret);
}
if (mr->size != ~0ULL) {
@@ -719,10 +765,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
goto err_page;
}
- ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
- mtpt_idx & (hr_dev->caps.num_mtpts - 1));
+ ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
+ mtpt_idx & (hr_dev->caps.num_mtpts - 1));
if (ret) {
- dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
+ dev_err(dev, "CREATE_MPT failed (%d)\n", ret);
goto err_page;
}
@@ -1018,8 +1064,8 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) {
if (page_addr & ((1 << mtt->page_shift) - 1)) {
dev_err(dev,
- "page_addr 0x%llx is not page_shift %d alignment!\n",
- page_addr, mtt->page_shift);
+ "page_addr is not page_shift %d alignment!\n",
+ mtt->page_shift);
ret = -EINVAL;
goto out;
}
@@ -1099,7 +1145,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr)
return ERR_PTR(-ENOMEM);
- mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
+ mr->umem = ib_umem_get(pd->device, start, length, access_flags);
if (IS_ERR(mr->umem)) {
ret = PTR_ERR(mr->umem);
goto err_free;
@@ -1161,6 +1207,83 @@ err_free:
return ERR_PTR(ret);
}
+static int rereg_mr_trans(struct ib_mr *ibmr, int flags,
+ u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct hns_roce_cmd_mailbox *mailbox,
+ u32 pdn, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
+ struct device *dev = hr_dev->dev;
+ int npages;
+ int ret;
+
+ if (mr->size != ~0ULL) {
+ npages = ib_umem_page_count(mr->umem);
+
+ if (hr_dev->caps.pbl_hop_num)
+ hns_roce_mhop_free(hr_dev, mr);
+ else
+ dma_free_coherent(dev, npages * 8,
+ mr->pbl_buf, mr->pbl_dma_addr);
+ }
+ ib_umem_release(mr->umem);
+
+ mr->umem = ib_umem_get(ibmr->device, start, length, mr_access_flags);
+ if (IS_ERR(mr->umem)) {
+ ret = PTR_ERR(mr->umem);
+ mr->umem = NULL;
+ return -ENOMEM;
+ }
+ npages = ib_umem_page_count(mr->umem);
+
+ if (hr_dev->caps.pbl_hop_num) {
+ ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
+ if (ret)
+ goto release_umem;
+ } else {
+ mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
+ &(mr->pbl_dma_addr),
+ GFP_KERNEL);
+ if (!mr->pbl_buf) {
+ ret = -ENOMEM;
+ goto release_umem;
+ }
+ }
+
+ ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
+ mr_access_flags, virt_addr,
+ length, mailbox->buf);
+ if (ret)
+ goto release_umem;
+
+
+ ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
+ if (ret) {
+ if (mr->size != ~0ULL) {
+ npages = ib_umem_page_count(mr->umem);
+
+ if (hr_dev->caps.pbl_hop_num)
+ hns_roce_mhop_free(hr_dev, mr);
+ else
+ dma_free_coherent(dev, npages * 8,
+ mr->pbl_buf,
+ mr->pbl_dma_addr);
+ }
+
+ goto release_umem;
+ }
+
+ return 0;
+
+release_umem:
+ ib_umem_release(mr->umem);
+ return ret;
+
+}
+
+
int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
struct ib_udata *udata)
@@ -1171,7 +1294,6 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
struct device *dev = hr_dev->dev;
unsigned long mtpt_idx;
u32 pdn = 0;
- int npages;
int ret;
if (!mr->enabled)
@@ -1188,9 +1310,9 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
if (ret)
goto free_cmd_mbox;
- ret = hns_roce_hw2sw_mpt(hr_dev, NULL, mtpt_idx);
+ ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx);
if (ret)
- dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret);
+ dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret);
mr->enabled = 0;
@@ -1198,73 +1320,25 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
pdn = to_hr_pd(pd)->pdn;
if (flags & IB_MR_REREG_TRANS) {
- if (mr->size != ~0ULL) {
- npages = ib_umem_page_count(mr->umem);
-
- if (hr_dev->caps.pbl_hop_num)
- hns_roce_mhop_free(hr_dev, mr);
- else
- dma_free_coherent(dev, npages * 8, mr->pbl_buf,
- mr->pbl_dma_addr);
- }
- ib_umem_release(mr->umem);
-
- mr->umem =
- ib_umem_get(udata, start, length, mr_access_flags, 0);
- if (IS_ERR(mr->umem)) {
- ret = PTR_ERR(mr->umem);
- mr->umem = NULL;
+ ret = rereg_mr_trans(ibmr, flags,
+ start, length,
+ virt_addr, mr_access_flags,
+ mailbox, pdn, udata);
+ if (ret)
goto free_cmd_mbox;
- }
- npages = ib_umem_page_count(mr->umem);
-
- if (hr_dev->caps.pbl_hop_num) {
- ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
- if (ret)
- goto release_umem;
- } else {
- mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
- &(mr->pbl_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_buf) {
- ret = -ENOMEM;
- goto release_umem;
- }
- }
- }
-
- ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
- mr_access_flags, virt_addr,
- length, mailbox->buf);
- if (ret) {
- if (flags & IB_MR_REREG_TRANS)
- goto release_umem;
- else
+ } else {
+ ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
+ mr_access_flags, virt_addr,
+ length, mailbox->buf);
+ if (ret)
goto free_cmd_mbox;
}
- if (flags & IB_MR_REREG_TRANS) {
- ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
- if (ret) {
- if (mr->size != ~0ULL) {
- npages = ib_umem_page_count(mr->umem);
-
- if (hr_dev->caps.pbl_hop_num)
- hns_roce_mhop_free(hr_dev, mr);
- else
- dma_free_coherent(dev, npages * 8,
- mr->pbl_buf,
- mr->pbl_dma_addr);
- }
-
- goto release_umem;
- }
- }
-
- ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, mtpt_idx);
+ ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx);
if (ret) {
- dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
- goto release_umem;
+ dev_err(dev, "CREATE_MPT failed (%d)\n", ret);
+ ib_umem_release(mr->umem);
+ goto free_cmd_mbox;
}
mr->enabled = 1;
@@ -1275,9 +1349,6 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
return 0;
-release_umem:
- ib_umem_release(mr->umem);
-
free_cmd_mbox:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -1357,7 +1428,7 @@ static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
{
struct hns_roce_mr *mr = to_hr_mr(ibmr);
- mr->pbl_buf[mr->npages++] = cpu_to_le64(addr);
+ mr->pbl_buf[mr->npages++] = addr;
return 0;
}
@@ -1379,10 +1450,11 @@ static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
int ret;
if (mw->enabled) {
- ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey)
- & (hr_dev->caps.num_mtpts - 1));
+ ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
+ key_to_hw_index(mw->rkey) &
+ (hr_dev->caps.num_mtpts - 1));
if (ret)
- dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret);
+ dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret);
hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
key_to_hw_index(mw->rkey));
@@ -1418,10 +1490,10 @@ static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
goto err_page;
}
- ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
- mtpt_idx & (hr_dev->caps.num_mtpts - 1));
+ ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
+ mtpt_idx & (hr_dev->caps.num_mtpts - 1));
if (ret) {
- dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret);
+ dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret);
goto err_page;
}
@@ -1528,10 +1600,9 @@ static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev,
/* Save page addr, low 12 bits : 0 */
for (i = 0; i < count; i++) {
if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
- mtts[i] = cpu_to_le64(bufs[npage] >>
- PAGE_ADDR_SHIFT);
+ mtts[i] = bufs[npage] >> PAGE_ADDR_SHIFT;
else
- mtts[i] = cpu_to_le64(bufs[npage]);
+ mtts[i] = bufs[npage];
npage++;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index 912b89b4da34..780c780fdb22 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -96,7 +96,7 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
/* Using bitmap to manager UAR index */
ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->logic_idx);
- if (ret == -1)
+ if (ret)
return -ENOMEM;
if (uar->logic_idx > 0 && hr_dev->caps.phy_num_uars > 1)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index e0424029b058..3257ad11be48 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -318,38 +318,55 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
* hr_qp->rq.max_gs);
}
- cap->max_recv_wr = hr_qp->rq.max_post = hr_qp->rq.wqe_cnt;
+ cap->max_recv_wr = hr_qp->rq.wqe_cnt;
cap->max_recv_sge = hr_qp->rq.max_gs;
return 0;
}
+static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
+ u8 max_sq_stride = ilog2(roundup_sq_stride);
+
+ /* Sanity check SQ size before proceeding */
+ if (ucmd->log_sq_stride > max_sq_stride ||
+ ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
+ ibdev_err(&hr_dev->ib_dev, "check SQ size error!\n");
+ return -EINVAL;
+ }
+
+ if (cap->max_send_sge > hr_dev->caps.max_sq_sg) {
+ ibdev_err(&hr_dev->ib_dev, "SQ sge error! max_send_sge=%d\n",
+ cap->max_send_sge);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
struct ib_qp_cap *cap,
struct hns_roce_qp *hr_qp,
struct hns_roce_ib_create_qp *ucmd)
{
- u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
- u8 max_sq_stride = ilog2(roundup_sq_stride);
u32 ex_sge_num;
u32 page_size;
u32 max_cnt;
+ int ret;
- /* Sanity check SQ size before proceeding */
- if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes ||
- ucmd->log_sq_stride > max_sq_stride ||
- ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
- dev_err(hr_dev->dev, "check SQ size error!\n");
+ if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) ||
+ hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes)
return -EINVAL;
- }
- if (cap->max_send_sge > hr_dev->caps.max_sq_sg) {
- dev_err(hr_dev->dev, "SQ sge error! max_send_sge=%d\n",
- cap->max_send_sge);
- return -EINVAL;
+ ret = check_sq_size_with_integrity(hr_dev, cap, ucmd);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev, "Sanity check sq size failed\n");
+ return ret;
}
- hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
max_cnt = max(1U, cap->max_send_sge);
@@ -376,40 +393,38 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
/* Get buf size, SQ and RQ are aligned to page_szie */
if (hr_dev->caps.max_sq_sg <= 2) {
- hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
+ hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt <<
hr_qp->rq.wqe_shift), PAGE_SIZE) +
- HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
+ round_up((hr_qp->sq.wqe_cnt <<
hr_qp->sq.wqe_shift), PAGE_SIZE);
hr_qp->sq.offset = 0;
- hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
+ hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt <<
hr_qp->sq.wqe_shift), PAGE_SIZE);
} else {
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
hr_qp->sge.sge_cnt = ex_sge_num ?
max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0;
- hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
+ hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt <<
hr_qp->rq.wqe_shift), page_size) +
- HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
+ round_up((hr_qp->sge.sge_cnt <<
hr_qp->sge.sge_shift), page_size) +
- HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
+ round_up((hr_qp->sq.wqe_cnt <<
hr_qp->sq.wqe_shift), page_size);
hr_qp->sq.offset = 0;
if (ex_sge_num) {
- hr_qp->sge.offset = HNS_ROCE_ALOGN_UP(
- (hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift),
- page_size);
+ hr_qp->sge.offset = round_up((hr_qp->sq.wqe_cnt <<
+ hr_qp->sq.wqe_shift),
+ page_size);
hr_qp->rq.offset = hr_qp->sge.offset +
- HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
- hr_qp->sge.sge_shift),
- page_size);
+ round_up((hr_qp->sge.sge_cnt <<
+ hr_qp->sge.sge_shift),
+ page_size);
} else {
- hr_qp->rq.offset = HNS_ROCE_ALOGN_UP(
- (hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift),
- page_size);
+ hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt <<
+ hr_qp->sq.wqe_shift),
+ page_size);
}
}
@@ -501,6 +516,35 @@ static int calc_wqe_bt_page_shift(struct hns_roce_dev *hr_dev,
return bt_pg_shift - PAGE_SHIFT;
}
+static int set_extend_sge_param(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ struct device *dev = hr_dev->dev;
+
+ if (hr_qp->sq.max_gs > 2) {
+ hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
+ (hr_qp->sq.max_gs - 2));
+ hr_qp->sge.sge_shift = 4;
+ }
+
+ /* ud sqwqe's sge use extend sge */
+ if (hr_dev->caps.max_sq_sg > 2 && hr_qp->ibqp.qp_type == IB_QPT_GSI) {
+ hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
+ hr_qp->sq.max_gs);
+ hr_qp->sge.sge_shift = 4;
+ }
+
+ if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
+ if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
+ dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
+ hr_qp->sge.sge_cnt);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
struct ib_qp_cap *cap,
struct hns_roce_qp *hr_qp)
@@ -509,6 +553,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
u32 page_size;
u32 max_cnt;
int size;
+ int ret;
if (cap->max_send_wr > hr_dev->caps.max_wqes ||
cap->max_send_sge > hr_dev->caps.max_sq_sg ||
@@ -518,8 +563,6 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
}
hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
- hr_qp->sq_max_wqes_per_wr = 1;
- hr_qp->sq_spare_wqes = 0;
if (hr_dev->caps.min_wqes)
max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes);
@@ -539,48 +582,31 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
else
hr_qp->sq.max_gs = max_cnt;
- if (hr_qp->sq.max_gs > 2) {
- hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
- (hr_qp->sq.max_gs - 2));
- hr_qp->sge.sge_shift = 4;
- }
-
- /* ud sqwqe's sge use extend sge */
- if (hr_dev->caps.max_sq_sg > 2 && hr_qp->ibqp.qp_type == IB_QPT_GSI) {
- hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
- hr_qp->sq.max_gs);
- hr_qp->sge.sge_shift = 4;
- }
-
- if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
- if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
- dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
- hr_qp->sge.sge_cnt);
- return -EINVAL;
- }
+ ret = set_extend_sge_param(hr_dev, hr_qp);
+ if (ret) {
+ dev_err(dev, "set extend sge parameters fail\n");
+ return ret;
}
/* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
hr_qp->sq.offset = 0;
- size = HNS_ROCE_ALOGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift,
- page_size);
+ size = round_up(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size);
if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) {
hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift),
- (u32)hr_qp->sge.sge_cnt);
+ (u32)hr_qp->sge.sge_cnt);
hr_qp->sge.offset = size;
- size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt <<
- hr_qp->sge.sge_shift, page_size);
+ size += round_up(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift,
+ page_size);
}
hr_qp->rq.offset = size;
- size += HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift),
- page_size);
+ size += round_up((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size);
hr_qp->buff_size = size;
/* Get wr and sge number which send */
- cap->max_send_wr = hr_qp->sq.max_post = hr_qp->sq.wqe_cnt;
+ cap->max_send_wr = hr_qp->sq.wqe_cnt;
cap->max_send_sge = hr_qp->sq.max_gs;
/* We don't support inline sends for kernel QPs (yet) */
@@ -607,13 +633,80 @@ static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)
return 1;
}
+static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ u32 max_recv_sge = init_attr->cap.max_recv_sge;
+ struct hns_roce_rinl_wqe *wqe_list;
+ u32 wqe_cnt = hr_qp->rq.wqe_cnt;
+ int i;
+
+ /* allocate recv inline buf */
+ wqe_list = kcalloc(wqe_cnt, sizeof(struct hns_roce_rinl_wqe),
+ GFP_KERNEL);
+
+ if (!wqe_list)
+ goto err;
+
+ /* Allocate a continuous buffer for all inline sge we need */
+ wqe_list[0].sg_list = kcalloc(wqe_cnt, (max_recv_sge *
+ sizeof(struct hns_roce_rinl_sge)),
+ GFP_KERNEL);
+ if (!wqe_list[0].sg_list)
+ goto err_wqe_list;
+
+ /* Assign buffers of sg_list to each inline wqe */
+ for (i = 1; i < wqe_cnt; i++)
+ wqe_list[i].sg_list = &wqe_list[0].sg_list[i * max_recv_sge];
+
+ hr_qp->rq_inl_buf.wqe_list = wqe_list;
+ hr_qp->rq_inl_buf.wqe_cnt = wqe_cnt;
+
+ return 0;
+
+err_wqe_list:
+ kfree(wqe_list);
+
+err:
+ return -ENOMEM;
+}
+
+static void free_rq_inline_buf(struct hns_roce_qp *hr_qp)
+{
+ kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
+ kfree(hr_qp->rq_inl_buf.wqe_list);
+}
+
+static void add_qp_to_list(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_cq *send_cq, struct ib_cq *recv_cq)
+{
+ struct hns_roce_cq *hr_send_cq, *hr_recv_cq;
+ unsigned long flags;
+
+ hr_send_cq = send_cq ? to_hr_cq(send_cq) : NULL;
+ hr_recv_cq = recv_cq ? to_hr_cq(recv_cq) : NULL;
+
+ spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
+ hns_roce_lock_cqs(hr_send_cq, hr_recv_cq);
+
+ list_add_tail(&hr_qp->node, &hr_dev->qp_list);
+ if (hr_send_cq)
+ list_add_tail(&hr_qp->sq_node, &hr_send_cq->sq_list);
+ if (hr_recv_cq)
+ list_add_tail(&hr_qp->rq_node, &hr_recv_cq->rq_list);
+
+ hns_roce_unlock_cqs(hr_send_cq, hr_recv_cq);
+ spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
+}
+
static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
struct ib_pd *ib_pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, unsigned long sqpn,
struct hns_roce_qp *hr_qp)
{
- dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { 0 };
+ dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { NULL };
struct device *dev = hr_dev->dev;
struct hns_roce_ib_create_qp ucmd;
struct hns_roce_ib_create_qp_resp resp = {};
@@ -635,9 +728,9 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
hr_qp->ibqp.qp_type = init_attr->qp_type;
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
- hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_ALL_WR);
+ hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
else
- hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_REQ_WR);
+ hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, udata,
hns_roce_qp_has_rq(init_attr), hr_qp);
@@ -648,33 +741,11 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
hns_roce_qp_has_rq(init_attr)) {
- /* allocate recv inline buf */
- hr_qp->rq_inl_buf.wqe_list = kcalloc(hr_qp->rq.wqe_cnt,
- sizeof(struct hns_roce_rinl_wqe),
- GFP_KERNEL);
- if (!hr_qp->rq_inl_buf.wqe_list) {
- ret = -ENOMEM;
+ ret = alloc_rq_inline_buf(hr_qp, init_attr);
+ if (ret) {
+ dev_err(dev, "allocate receive inline buffer failed\n");
goto err_out;
}
-
- hr_qp->rq_inl_buf.wqe_cnt = hr_qp->rq.wqe_cnt;
-
- /* Firstly, allocate a list of sge space buffer */
- hr_qp->rq_inl_buf.wqe_list[0].sg_list =
- kcalloc(hr_qp->rq_inl_buf.wqe_cnt,
- init_attr->cap.max_recv_sge *
- sizeof(struct hns_roce_rinl_sge),
- GFP_KERNEL);
- if (!hr_qp->rq_inl_buf.wqe_list[0].sg_list) {
- ret = -ENOMEM;
- goto err_wqe_list;
- }
-
- for (i = 1; i < hr_qp->rq_inl_buf.wqe_cnt; i++)
- /* Secondly, reallocate the buffer */
- hr_qp->rq_inl_buf.wqe_list[i].sg_list =
- &hr_qp->rq_inl_buf.wqe_list[0].sg_list[i *
- init_attr->cap.max_recv_sge];
}
page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
@@ -682,22 +753,22 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
dev_err(dev, "ib_copy_from_udata error for create qp\n");
ret = -EFAULT;
- goto err_rq_sge_list;
+ goto err_alloc_rq_inline_buf;
}
ret = hns_roce_set_user_sq_size(hr_dev, &init_attr->cap, hr_qp,
&ucmd);
if (ret) {
dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n");
- goto err_rq_sge_list;
+ goto err_alloc_rq_inline_buf;
}
- hr_qp->umem = ib_umem_get(udata, ucmd.buf_addr,
- hr_qp->buff_size, 0, 0);
+ hr_qp->umem = ib_umem_get(ib_pd->device, ucmd.buf_addr,
+ hr_qp->buff_size, 0);
if (IS_ERR(hr_qp->umem)) {
dev_err(dev, "ib_umem_get error for create qp\n");
ret = PTR_ERR(hr_qp->umem);
- goto err_rq_sge_list;
+ goto err_alloc_rq_inline_buf;
}
hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp,
hr_qp->regions, ARRAY_SIZE(hr_qp->regions),
@@ -758,13 +829,13 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
dev_err(dev, "init_attr->create_flags error!\n");
ret = -EINVAL;
- goto err_rq_sge_list;
+ goto err_alloc_rq_inline_buf;
}
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) {
dev_err(dev, "init_attr->create_flags error!\n");
ret = -EINVAL;
- goto err_rq_sge_list;
+ goto err_alloc_rq_inline_buf;
}
/* Set SQ size */
@@ -772,7 +843,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
hr_qp);
if (ret) {
dev_err(dev, "hns_roce_set_kernel_sq_size error!\n");
- goto err_rq_sge_list;
+ goto err_alloc_rq_inline_buf;
}
/* QP doorbell register address */
@@ -786,7 +857,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0);
if (ret) {
dev_err(dev, "rq record doorbell alloc failed!\n");
- goto err_rq_sge_list;
+ goto err_alloc_rq_inline_buf;
}
*hr_qp->rdb.db_record = 0;
hr_qp->rdb_en = 1;
@@ -826,11 +897,18 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
hr_qp->sq.wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64),
GFP_KERNEL);
- hr_qp->rq.wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64),
- GFP_KERNEL);
- if (!hr_qp->sq.wrid || !hr_qp->rq.wrid) {
+ if (ZERO_OR_NULL_PTR(hr_qp->sq.wrid)) {
ret = -ENOMEM;
- goto err_wrid;
+ goto err_get_bufs;
+ }
+
+ if (hr_qp->rq.wqe_cnt) {
+ hr_qp->rq.wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64),
+ GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(hr_qp->rq.wrid)) {
+ ret = -ENOMEM;
+ goto err_sq_wrid;
+ }
}
}
@@ -875,7 +953,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
if (sqpn)
hr_qp->doorbell_qpn = 1;
else
- hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn);
+ hr_qp->doorbell_qpn = (u32)hr_qp->qpn;
if (udata) {
ret = ib_copy_to_udata(udata, &resp,
@@ -891,6 +969,9 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
}
hr_qp->event = hns_roce_ib_qp_event;
+
+ add_qp_to_list(hr_dev, hr_qp, init_attr->send_cq, init_attr->recv_cq);
+
hns_roce_free_buf_list(buf_list, hr_qp->region_cnt);
return 0;
@@ -916,8 +997,8 @@ err_wrid:
hns_roce_qp_has_rq(init_attr))
hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
} else {
- kfree(hr_qp->sq.wrid);
- kfree(hr_qp->rq.wrid);
+ if (hr_qp->rq.wqe_cnt)
+ kfree(hr_qp->rq.wrid);
}
err_sq_dbmap:
@@ -928,6 +1009,10 @@ err_sq_dbmap:
hns_roce_qp_has_sq(init_attr))
hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
+err_sq_wrid:
+ if (!udata)
+ kfree(hr_qp->sq.wrid);
+
err_get_bufs:
hns_roce_free_buf_list(buf_list, hr_qp->region_cnt);
@@ -941,13 +1026,10 @@ err_db:
(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB))
hns_roce_free_db(hr_dev, &hr_qp->rdb);
-err_rq_sge_list:
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
- kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
-
-err_wqe_list:
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
- kfree(hr_qp->rq_inl_buf.wqe_list);
+err_alloc_rq_inline_buf:
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
+ hns_roce_qp_has_rq(init_attr))
+ free_rq_inline_buf(hr_qp);
err_out:
return ret;
@@ -958,8 +1040,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
- struct device *dev = hr_dev->dev;
- struct hns_roce_sqp *hr_sqp;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_qp *hr_qp;
int ret;
@@ -972,7 +1053,8 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0,
hr_qp);
if (ret) {
- dev_err(dev, "Create RC QP failed\n");
+ ibdev_err(ibdev, "Create QP 0x%06lx failed(%d)\n",
+ hr_qp->qpn, ret);
kfree(hr_qp);
return ERR_PTR(ret);
}
@@ -984,15 +1066,14 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
case IB_QPT_GSI: {
/* Userspace is not allowed to create special QPs: */
if (udata) {
- dev_err(dev, "not support usr space GSI\n");
+ ibdev_err(ibdev, "not support usr space GSI\n");
return ERR_PTR(-EINVAL);
}
- hr_sqp = kzalloc(sizeof(*hr_sqp), GFP_KERNEL);
- if (!hr_sqp)
+ hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
+ if (!hr_qp)
return ERR_PTR(-ENOMEM);
- hr_qp = &hr_sqp->hr_qp;
hr_qp->port = init_attr->port_num - 1;
hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
@@ -1006,15 +1087,16 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
hr_qp->ibqp.qp_num, hr_qp);
if (ret) {
- dev_err(dev, "Create GSI QP failed!\n");
- kfree(hr_sqp);
+ ibdev_err(ibdev, "Create GSI QP failed!\n");
+ kfree(hr_qp);
return ERR_PTR(ret);
}
break;
}
default:{
- dev_err(dev, "not support QP type %d\n", init_attr->qp_type);
+ ibdev_err(ibdev, "not support QP type %d\n",
+ init_attr->qp_type);
return ERR_PTR(-EINVAL);
}
}
@@ -1040,23 +1122,88 @@ int to_hr_qp_type(int qp_type)
return transport_type;
}
+static int check_mtu_validate(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_qp_attr *attr, int attr_mask)
+{
+ enum ib_mtu active_mtu;
+ int p;
+
+ p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
+ active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu);
+
+ if ((hr_dev->caps.max_mtu >= IB_MTU_2048 &&
+ attr->path_mtu > hr_dev->caps.max_mtu) ||
+ attr->path_mtu < IB_MTU_256 || attr->path_mtu > active_mtu) {
+ ibdev_err(&hr_dev->ib_dev,
+ "attr path_mtu(%d)invalid while modify qp",
+ attr->path_mtu);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ int p;
+
+ if ((attr_mask & IB_QP_PORT) &&
+ (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) {
+ ibdev_err(&hr_dev->ib_dev,
+ "attr port_num invalid.attr->port_num=%d\n",
+ attr->port_num);
+ return -EINVAL;
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
+ if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) {
+ ibdev_err(&hr_dev->ib_dev,
+ "attr pkey_index invalid.attr->pkey_index=%d\n",
+ attr->pkey_index);
+ return -EINVAL;
+ }
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+ attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) {
+ ibdev_err(&hr_dev->ib_dev,
+ "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n",
+ attr->max_rd_atomic);
+ return -EINVAL;
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+ attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) {
+ ibdev_err(&hr_dev->ib_dev,
+ "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n",
+ attr->max_dest_rd_atomic);
+ return -EINVAL;
+ }
+
+ if (attr_mask & IB_QP_PATH_MTU)
+ return check_mtu_validate(hr_dev, hr_qp, attr, attr_mask);
+
+ return 0;
+}
+
int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
enum ib_qp_state cur_state, new_state;
- struct device *dev = hr_dev->dev;
int ret = -EINVAL;
- int p;
- enum ib_mtu active_mtu;
mutex_lock(&hr_qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ?
attr->cur_qp_state : (enum ib_qp_state)hr_qp->state;
- new_state = attr_mask & IB_QP_STATE ?
- attr->qp_state : cur_state;
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (ibqp->uobject &&
(attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) {
@@ -1066,67 +1213,27 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (hr_qp->rdb_en == 1)
hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
} else {
- dev_warn(dev, "flush cqe is not supported in userspace!\n");
+ ibdev_warn(&hr_dev->ib_dev,
+ "flush cqe is not supported in userspace!\n");
goto out;
}
}
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
attr_mask)) {
- dev_err(dev, "ib_modify_qp_is_ok failed\n");
- goto out;
- }
-
- if ((attr_mask & IB_QP_PORT) &&
- (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) {
- dev_err(dev, "attr port_num invalid.attr->port_num=%d\n",
- attr->port_num);
+ ibdev_err(&hr_dev->ib_dev, "ib_modify_qp_is_ok failed\n");
goto out;
}
- if (attr_mask & IB_QP_PKEY_INDEX) {
- p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
- if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) {
- dev_err(dev, "attr pkey_index invalid.attr->pkey_index=%d\n",
- attr->pkey_index);
- goto out;
- }
- }
-
- if (attr_mask & IB_QP_PATH_MTU) {
- p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
- active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu);
-
- if ((hr_dev->caps.max_mtu == IB_MTU_4096 &&
- attr->path_mtu > IB_MTU_4096) ||
- (hr_dev->caps.max_mtu == IB_MTU_2048 &&
- attr->path_mtu > IB_MTU_2048) ||
- attr->path_mtu < IB_MTU_256 ||
- attr->path_mtu > active_mtu) {
- dev_err(dev, "attr path_mtu(%d)invalid while modify qp",
- attr->path_mtu);
- goto out;
- }
- }
-
- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
- attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) {
- dev_err(dev, "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n",
- attr->max_rd_atomic);
- goto out;
- }
-
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
- attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) {
- dev_err(dev, "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n",
- attr->max_dest_rd_atomic);
+ ret = hns_roce_check_qp_attr(ibqp, attr, attr_mask);
+ if (ret)
goto out;
- }
if (cur_state == new_state && cur_state == IB_QPS_RESET) {
if (hr_dev->caps.min_wqes) {
ret = -EPERM;
- dev_err(dev, "cur_state=%d new_state=%d\n", cur_state,
+ ibdev_err(&hr_dev->ib_dev,
+ "cur_state=%d new_state=%d\n", cur_state,
new_state);
} else {
ret = 0;
@@ -1147,7 +1254,16 @@ out:
void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
__acquires(&send_cq->lock) __acquires(&recv_cq->lock)
{
- if (send_cq == recv_cq) {
+ if (unlikely(send_cq == NULL && recv_cq == NULL)) {
+ __acquire(&send_cq->lock);
+ __acquire(&recv_cq->lock);
+ } else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
+ spin_lock_irq(&send_cq->lock);
+ __acquire(&recv_cq->lock);
+ } else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
+ spin_lock_irq(&recv_cq->lock);
+ __acquire(&send_cq->lock);
+ } else if (send_cq == recv_cq) {
spin_lock_irq(&send_cq->lock);
__acquire(&recv_cq->lock);
} else if (send_cq->cqn < recv_cq->cqn) {
@@ -1163,7 +1279,16 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
struct hns_roce_cq *recv_cq) __releases(&send_cq->lock)
__releases(&recv_cq->lock)
{
- if (send_cq == recv_cq) {
+ if (unlikely(send_cq == NULL && recv_cq == NULL)) {
+ __release(&recv_cq->lock);
+ __release(&send_cq->lock);
+ } else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
+ __release(&recv_cq->lock);
+ spin_unlock(&send_cq->lock);
+ } else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
+ __release(&send_cq->lock);
+ spin_unlock(&recv_cq->lock);
+ } else if (send_cq == recv_cq) {
__release(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
} else if (send_cq->cqn < recv_cq->cqn) {
@@ -1204,7 +1329,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
u32 cur;
cur = hr_wq->head - hr_wq->tail;
- if (likely(cur + nreq < hr_wq->max_post))
+ if (likely(cur + nreq < hr_wq->wqe_cnt))
return false;
hr_cq = to_hr_cq(ib_cq);
@@ -1212,7 +1337,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
cur = hr_wq->head - hr_wq->tail;
spin_unlock(&hr_cq->lock);
- return cur + nreq >= hr_wq->max_post;
+ return cur + nreq >= hr_wq->wqe_cnt;
}
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c
index 0a31d0a3d657..06871731ac43 100644
--- a/drivers/infiniband/hw/hns/hns_roce_restrack.c
+++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c
@@ -98,11 +98,15 @@ static int hns_roce_fill_res_cq_entry(struct sk_buff *msg,
goto err;
table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
- if (!table_attr)
+ if (!table_attr) {
+ ret = -EMSGSIZE;
goto err;
+ }
- if (hns_roce_fill_cq(msg, context))
+ if (hns_roce_fill_cq(msg, context)) {
+ ret = -EMSGSIZE;
goto err_cancel_table;
+ }
nla_nest_end(msg, table_attr);
kfree(context);
@@ -113,7 +117,7 @@ err_cancel_table:
nla_nest_cancel(msg, table_attr);
err:
kfree(context);
- return -EMSGSIZE;
+ return ret;
}
int hns_roce_fill_res_entry(struct sk_buff *msg,
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 38bb548eaa6d..c6d5f06f9cde 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -59,21 +59,21 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
}
}
-static int hns_roce_sw2hw_srq(struct hns_roce_dev *dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long srq_num)
+static int hns_roce_hw_create_srq(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ unsigned long srq_num)
{
return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0,
- HNS_ROCE_CMD_SW2HW_SRQ,
+ HNS_ROCE_CMD_CREATE_SRQ,
HNS_ROCE_CMD_TIMEOUT_MSECS);
}
-static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long srq_num)
+static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ unsigned long srq_num)
{
return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
- mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_SRQ,
+ mailbox ? 0 : 1, HNS_ROCE_CMD_DESTROY_SRQ,
HNS_ROCE_CMD_TIMEOUT_MSECS);
}
@@ -95,8 +95,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn,
srq->mtt.first_seg,
&dma_handle_wqe);
if (!mtts_wqe) {
- dev_err(hr_dev->dev,
- "SRQ alloc.Failed to find srq buf addr.\n");
+ dev_err(hr_dev->dev, "Failed to find mtt for srq buf.\n");
return -EINVAL;
}
@@ -106,13 +105,14 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn,
&dma_handle_idx);
if (!mtts_idx) {
dev_err(hr_dev->dev,
- "SRQ alloc.Failed to find idx que buf addr.\n");
+ "Failed to find mtt for srq idx queue buf.\n");
return -EINVAL;
}
ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn);
- if (ret == -1) {
- dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n");
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "Failed to alloc a bit from srq bitmap.\n");
return -ENOMEM;
}
@@ -134,7 +134,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn,
mtts_wqe, mtts_idx, dma_handle_wqe,
dma_handle_idx);
- ret = hns_roce_sw2hw_srq(hr_dev, mailbox, srq->srqn);
+ ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
if (ret)
goto err_xa;
@@ -160,9 +160,9 @@ static void hns_roce_srq_free(struct hns_roce_dev *hr_dev,
struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
int ret;
- ret = hns_roce_hw2sw_srq(hr_dev, NULL, srq->srqn);
+ ret = hns_roce_hw_destroy_srq(hr_dev, NULL, srq->srqn);
if (ret)
- dev_err(hr_dev->dev, "HW2SW_SRQ failed (%d) for CQN %06lx\n",
+ dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
ret, srq->srqn);
xa_erase(&srq_table->xa, srq->srqn);
@@ -175,13 +175,88 @@ static void hns_roce_srq_free(struct hns_roce_dev *hr_dev,
hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
}
+static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata,
+ int srq_buf_size)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ struct hns_roce_ib_create_srq ucmd;
+ struct hns_roce_buf *buf;
+ int ret;
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+ return -EFAULT;
+
+ srq->umem =
+ ib_umem_get(srq->ibsrq.device, ucmd.buf_addr, srq_buf_size, 0);
+ if (IS_ERR(srq->umem))
+ return PTR_ERR(srq->umem);
+
+ buf = &srq->buf;
+ buf->npages = (ib_umem_page_count(srq->umem) +
+ (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) /
+ (1 << hr_dev->caps.srqwqe_buf_pg_sz);
+ buf->page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+ ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift,
+ &srq->mtt);
+ if (ret)
+ goto err_user_buf;
+
+ ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem);
+ if (ret)
+ goto err_user_srq_mtt;
+
+ /* config index queue BA */
+ srq->idx_que.umem = ib_umem_get(srq->ibsrq.device, ucmd.que_addr,
+ srq->idx_que.buf_size, 0);
+ if (IS_ERR(srq->idx_que.umem)) {
+ dev_err(hr_dev->dev, "ib_umem_get error for index queue\n");
+ ret = PTR_ERR(srq->idx_que.umem);
+ goto err_user_srq_mtt;
+ }
+
+ buf = &srq->idx_que.idx_buf;
+ buf->npages = DIV_ROUND_UP(ib_umem_page_count(srq->idx_que.umem),
+ 1 << hr_dev->caps.idx_buf_pg_sz);
+ buf->page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
+ ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift,
+ &srq->idx_que.mtt);
+ if (ret) {
+ dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n");
+ goto err_user_idx_mtt;
+ }
+
+ ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt,
+ srq->idx_que.umem);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "hns_roce_ib_umem_write_mtt error for idx que\n");
+ goto err_user_idx_buf;
+ }
+
+ return 0;
+
+err_user_idx_buf:
+ hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+
+err_user_idx_mtt:
+ ib_umem_release(srq->idx_que.umem);
+
+err_user_srq_mtt:
+ hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+
+err_user_buf:
+ ib_umem_release(srq->umem);
+
+ return ret;
+}
+
static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
u32 page_shift)
{
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct hns_roce_idx_que *idx_que = &srq->idx_que;
- idx_que->bitmap = bitmap_zalloc(srq->max, GFP_KERNEL);
+ idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL);
if (!idx_que->bitmap)
return -ENOMEM;
@@ -196,8 +271,95 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
return 0;
}
+static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ u32 page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+ int ret;
+
+ if (hns_roce_buf_alloc(hr_dev, srq_buf_size, (1 << page_shift) * 2,
+ &srq->buf, page_shift))
+ return -ENOMEM;
+
+ srq->head = 0;
+ srq->tail = srq->wqe_cnt - 1;
+
+ ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, srq->buf.page_shift,
+ &srq->mtt);
+ if (ret)
+ goto err_kernel_buf;
+
+ ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf);
+ if (ret)
+ goto err_kernel_srq_mtt;
+
+ page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
+ ret = hns_roce_create_idx_que(srq->ibsrq.pd, srq, page_shift);
+ if (ret) {
+ dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n", ret);
+ goto err_kernel_srq_mtt;
+ }
+
+ /* Init mtt table for idx_que */
+ ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages,
+ srq->idx_que.idx_buf.page_shift,
+ &srq->idx_que.mtt);
+ if (ret)
+ goto err_kernel_create_idx;
+
+ /* Write buffer address into the mtt table */
+ ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt,
+ &srq->idx_que.idx_buf);
+ if (ret)
+ goto err_kernel_idx_buf;
+
+ srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL);
+ if (!srq->wrid) {
+ ret = -ENOMEM;
+ goto err_kernel_idx_buf;
+ }
+
+ return 0;
+
+err_kernel_idx_buf:
+ hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+
+err_kernel_create_idx:
+ hns_roce_buf_free(hr_dev, srq->idx_que.buf_size,
+ &srq->idx_que.idx_buf);
+ kfree(srq->idx_que.bitmap);
+
+err_kernel_srq_mtt:
+ hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+
+err_kernel_buf:
+ hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+
+ return ret;
+}
+
+static void destroy_user_srq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq)
+{
+ hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+ ib_umem_release(srq->idx_que.umem);
+ hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+ ib_umem_release(srq->umem);
+}
+
+static void destroy_kernel_srq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq, int srq_buf_size)
+{
+ kvfree(srq->wrid);
+ hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+ hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, &srq->idx_que.idx_buf);
+ kfree(srq->idx_que.bitmap);
+ hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+ hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+}
+
int hns_roce_create_srq(struct ib_srq *ib_srq,
- struct ib_srq_init_attr *srq_init_attr,
+ struct ib_srq_init_attr *init_attr,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device);
@@ -205,148 +367,47 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
struct hns_roce_srq *srq = to_hr_srq(ib_srq);
int srq_desc_size;
int srq_buf_size;
- u32 page_shift;
int ret = 0;
- u32 npages;
u32 cqn;
/* Check the actual SRQ wqe and SRQ sge num */
- if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs ||
- srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges)
+ if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs ||
+ init_attr->attr.max_sge > hr_dev->caps.max_srq_sges)
return -EINVAL;
mutex_init(&srq->mutex);
spin_lock_init(&srq->lock);
- srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
- srq->max_gs = srq_init_attr->attr.max_sge;
+ srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1);
+ srq->max_gs = init_attr->attr.max_sge;
- srq_desc_size = max(16, 16 * srq->max_gs);
+ srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs));
srq->wqe_shift = ilog2(srq_desc_size);
- srq_buf_size = srq->max * srq_desc_size;
+ srq_buf_size = srq->wqe_cnt * srq_desc_size;
srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ;
- srq->idx_que.buf_size = srq->max * srq->idx_que.entry_sz;
+ srq->idx_que.buf_size = srq->wqe_cnt * srq->idx_que.entry_sz;
srq->mtt.mtt_type = MTT_TYPE_SRQWQE;
srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX;
if (udata) {
- struct hns_roce_ib_create_srq ucmd;
-
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
- return -EFAULT;
-
- srq->umem =
- ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0);
- if (IS_ERR(srq->umem))
- return PTR_ERR(srq->umem);
-
- if (hr_dev->caps.srqwqe_buf_pg_sz) {
- npages = (ib_umem_page_count(srq->umem) +
- (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) /
- (1 << hr_dev->caps.srqwqe_buf_pg_sz);
- page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
- ret = hns_roce_mtt_init(hr_dev, npages,
- page_shift,
- &srq->mtt);
- } else
- ret = hns_roce_mtt_init(hr_dev,
- ib_umem_page_count(srq->umem),
- PAGE_SHIFT, &srq->mtt);
- if (ret)
- goto err_buf;
-
- ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem);
- if (ret)
- goto err_srq_mtt;
-
- /* config index queue BA */
- srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr,
- srq->idx_que.buf_size, 0, 0);
- if (IS_ERR(srq->idx_que.umem)) {
- dev_err(hr_dev->dev,
- "ib_umem_get error for index queue\n");
- ret = PTR_ERR(srq->idx_que.umem);
- goto err_srq_mtt;
- }
-
- if (hr_dev->caps.idx_buf_pg_sz) {
- npages = (ib_umem_page_count(srq->idx_que.umem) +
- (1 << hr_dev->caps.idx_buf_pg_sz) - 1) /
- (1 << hr_dev->caps.idx_buf_pg_sz);
- page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
- ret = hns_roce_mtt_init(hr_dev, npages,
- page_shift, &srq->idx_que.mtt);
- } else {
- ret = hns_roce_mtt_init(
- hr_dev, ib_umem_page_count(srq->idx_que.umem),
- PAGE_SHIFT, &srq->idx_que.mtt);
- }
-
- if (ret) {
- dev_err(hr_dev->dev,
- "hns_roce_mtt_init error for idx que\n");
- goto err_idx_mtt;
- }
-
- ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt,
- srq->idx_que.umem);
+ ret = create_user_srq(srq, udata, srq_buf_size);
if (ret) {
- dev_err(hr_dev->dev,
- "hns_roce_ib_umem_write_mtt error for idx que\n");
- goto err_idx_buf;
+ dev_err(hr_dev->dev, "Create user srq failed\n");
+ goto err_srq;
}
} else {
- page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
- if (hns_roce_buf_alloc(hr_dev, srq_buf_size,
- (1 << page_shift) * 2, &srq->buf,
- page_shift))
- return -ENOMEM;
-
- srq->head = 0;
- srq->tail = srq->max - 1;
-
- ret = hns_roce_mtt_init(hr_dev, srq->buf.npages,
- srq->buf.page_shift, &srq->mtt);
- if (ret)
- goto err_buf;
-
- ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf);
- if (ret)
- goto err_srq_mtt;
-
- page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
- ret = hns_roce_create_idx_que(ib_srq->pd, srq, page_shift);
+ ret = create_kernel_srq(srq, srq_buf_size);
if (ret) {
- dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n",
- ret);
- goto err_srq_mtt;
- }
-
- /* Init mtt table for idx_que */
- ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages,
- srq->idx_que.idx_buf.page_shift,
- &srq->idx_que.mtt);
- if (ret)
- goto err_create_idx;
-
- /* Write buffer address into the mtt table */
- ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt,
- &srq->idx_que.idx_buf);
- if (ret)
- goto err_idx_buf;
-
- srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL);
- if (!srq->wrid) {
- ret = -ENOMEM;
- goto err_idx_buf;
+ dev_err(hr_dev->dev, "Create kernel srq failed\n");
+ goto err_srq;
}
}
- cqn = ib_srq_has_cq(srq_init_attr->srq_type) ?
- to_hr_cq(srq_init_attr->ext.cq)->cqn : 0;
+ cqn = ib_srq_has_cq(init_attr->srq_type) ?
+ to_hr_cq(init_attr->ext.cq)->cqn : 0;
srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG;
@@ -356,7 +417,6 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
goto err_wrid;
srq->event = hns_roce_ib_srq_event;
- srq->ibsrq.ext.xrc.srq_num = srq->srqn;
resp.srqn = srq->srqn;
if (udata) {
@@ -373,27 +433,12 @@ err_srqc_alloc:
hns_roce_srq_free(hr_dev, srq);
err_wrid:
- kvfree(srq->wrid);
-
-err_idx_buf:
- hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
-
-err_idx_mtt:
- ib_umem_release(srq->idx_que.umem);
-
-err_create_idx:
- hns_roce_buf_free(hr_dev, srq->idx_que.buf_size,
- &srq->idx_que.idx_buf);
- bitmap_free(srq->idx_que.bitmap);
-
-err_srq_mtt:
- hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
-
-err_buf:
- ib_umem_release(srq->umem);
- if (!udata)
- hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+ if (udata)
+ destroy_user_srq(hr_dev, srq);
+ else
+ destroy_kernel_srq(hr_dev, srq, srq_buf_size);
+err_srq:
return ret;
}
@@ -409,7 +454,7 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
} else {
kvfree(srq->wrid);
- hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift,
+ hns_roce_buf_free(hr_dev, srq->wqe_cnt << srq->wqe_shift,
&srq->buf);
}
ib_umem_release(srq->idx_que.umem);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 2d6a378e8560..bb78d3280acc 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -2079,9 +2079,9 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
dst = i40iw_get_dst_ipv6(&src_addr, &dst_addr);
if (!dst || dst->error) {
if (dst) {
- dst_release(dst);
i40iw_pr_err("ip6_route_output returned dst->error = %d\n",
dst->error);
+ dst_release(dst);
}
return rc;
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index d44cf33df81a..238614370927 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -1225,6 +1225,8 @@ static void i40iw_add_ipv4_addr(struct i40iw_device *iwdev)
const struct in_ifaddr *ifa;
idev = in_dev_get(dev);
+ if (!idev)
+ continue;
in_dev_for_each_ifa_rtnl(ifa, idev) {
i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
"IP=%pI4, vlan_id=%d, MAC=%pM\n", &ifa->ifa_address,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index d169a8031375..c335de91508f 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -97,18 +97,7 @@ static int i40iw_query_port(struct ib_device *ibdev,
u8 port,
struct ib_port_attr *props)
{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct net_device *netdev = iwdev->netdev;
-
- /* props being zeroed by the caller, avoid zeroing it here */
- props->max_mtu = IB_MTU_4096;
- props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
-
props->lid = 1;
- if (netif_carrier_ok(iwdev->netdev))
- props->state = IB_PORT_ACTIVE;
- else
- props->state = IB_PORT_DOWN;
props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
props->gid_tbl_len = 1;
@@ -180,8 +169,7 @@ static void i40iw_dealloc_ucontext(struct ib_ucontext *context)
static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct i40iw_ucontext *ucontext;
- u64 db_addr_offset;
- u64 push_offset;
+ u64 db_addr_offset, push_offset, pfn;
ucontext = to_ucontext(context);
if (ucontext->iwdev->sc_dev.is_pf) {
@@ -200,7 +188,6 @@ static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
if (vma->vm_pgoff == (db_addr_offset >> PAGE_SHIFT)) {
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- vma->vm_private_data = ucontext;
} else {
if ((vma->vm_pgoff - (push_offset >> PAGE_SHIFT)) % 2)
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
@@ -208,12 +195,12 @@ static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
}
- if (io_remap_pfn_range(vma, vma->vm_start,
- vma->vm_pgoff + (pci_resource_start(ucontext->iwdev->ldev->pcidev, 0) >> PAGE_SHIFT),
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
+ pfn = vma->vm_pgoff +
+ (pci_resource_start(ucontext->iwdev->ldev->pcidev, 0) >>
+ PAGE_SHIFT);
- return 0;
+ return rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE,
+ vma->vm_page_prot, NULL);
}
/**
@@ -1769,12 +1756,15 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
int ret;
int pg_shift;
+ if (!udata)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (iwdev->closing)
return ERR_PTR(-ENODEV);
if (length > I40IW_MAX_MR_SIZE)
return ERR_PTR(-EINVAL);
- region = ib_umem_get(udata, start, length, acc, 0);
+ region = ib_umem_get(pd->device, start, length, acc);
if (IS_ERR(region))
return (struct ib_mr *)region;
@@ -2784,6 +2774,10 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
return -ENOMEM;
iwibdev = iwdev->iwibdev;
rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
+ ret = ib_device_set_netdev(&iwibdev->ibdev, iwdev->netdev, 1);
+ if (ret)
+ goto error;
+
ret = ib_register_device(&iwibdev->ibdev, "i40iw%d");
if (ret)
goto error;
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index ecd6cadd529a..b591861934b3 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -186,23 +186,6 @@ out:
kfree(ent);
}
-static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id)
-{
- struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
- struct rb_root *sl_id_map = &sriov->sl_id_map;
- struct id_map_entry *ent, *found_ent;
-
- spin_lock(&sriov->id_map_lock);
- ent = xa_erase(&sriov->pv_id_table, pv_cm_id);
- if (!ent)
- goto out;
- found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id);
- if (found_ent && found_ent == ent)
- rb_erase(&found_ent->node, sl_id_map);
-out:
- spin_unlock(&sriov->id_map_lock);
-}
-
static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
{
struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
@@ -294,7 +277,7 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
spin_lock(&sriov->id_map_lock);
spin_lock_irqsave(&sriov->going_down_lock, flags);
/*make sure that there is no schedule inside the scheduled work.*/
- if (!sriov->is_going_down) {
+ if (!sriov->is_going_down && !id->scheduled_delete) {
id->scheduled_delete = 1;
schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
}
@@ -341,9 +324,6 @@ cont:
if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
schedule_delayed(ibdev, id);
- else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID)
- id_map_find_del(ibdev, pv_cm_id);
-
return 0;
}
@@ -382,12 +362,9 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
*slave = id->slave_id;
set_remote_comm_id(mad, id->sl_cm_id);
- if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
+ if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_REJ_ATTR_ID)
schedule_delayed(ibdev, id);
- else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
- mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) {
- id_map_find_del(ibdev, (int) pv_cm_id);
- }
return 0;
}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index a7d238d312f0..f8b936b76dcd 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -144,8 +144,8 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata,
int shift;
int n;
- *umem = ib_umem_get(udata, buf_addr, cqe * cqe_size,
- IB_ACCESS_LOCAL_WRITE, 1);
+ *umem = ib_umem_get(&dev->ib_dev, buf_addr, cqe * cqe_size,
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
@@ -568,18 +568,13 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
wc->vendor_err = cqe->vendor_err_syndrome;
}
-static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
+static int mlx4_ib_ipoib_csum_ok(__be16 status, u8 badfcs_enc, __be16 checksum)
{
- return ((status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
- MLX4_CQE_STATUS_IPV4F |
- MLX4_CQE_STATUS_IPV4OPT |
- MLX4_CQE_STATUS_IPV6 |
- MLX4_CQE_STATUS_IPOK)) ==
- cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
- MLX4_CQE_STATUS_IPOK)) &&
- (status & cpu_to_be16(MLX4_CQE_STATUS_UDP |
- MLX4_CQE_STATUS_TCP)) &&
- checksum == cpu_to_be16(0xffff);
+ return ((badfcs_enc & MLX4_CQE_STATUS_L4_CSUM) ||
+ ((status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
+ (status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
+ MLX4_CQE_STATUS_UDP)) &&
+ (checksum == cpu_to_be16(0xffff))));
}
static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
@@ -855,6 +850,7 @@ repoll:
wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status,
+ cqe->badfcs_enc,
cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
if (is_eth) {
wc->slid = 0;
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c
index 0f390351cef0..d41f03ccb0e1 100644
--- a/drivers/infiniband/hw/mlx4/doorbell.c
+++ b/drivers/infiniband/hw/mlx4/doorbell.c
@@ -64,7 +64,8 @@ int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt,
page->user_virt = (virt & PAGE_MASK);
page->refcnt = 0;
- page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0);
+ page->umem = ib_umem_get(context->ibucontext.device, virt & PAGE_MASK,
+ PAGE_SIZE, 0);
if (IS_ERR(page->umem)) {
err = PTR_ERR(page->umem);
kfree(page);
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 68c951491a08..abe68708d6d6 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -966,7 +966,6 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
}
mutex_unlock(&dev->counters_table[port_num - 1].mutex);
if (stats_avail) {
- memset(out_mad->data, 0, sizeof out_mad->data);
switch (counter_stats.counter_mode & 0xf) {
case 0:
edit_counter(&counter_stats,
@@ -984,38 +983,31 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index)
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
- const struct ib_mad *in_mad = (const struct ib_mad *)in;
- struct ib_mad *out_mad = (struct ib_mad *)out;
enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num);
- if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
- *out_mad_size != sizeof(*out_mad)))
- return IB_MAD_RESULT_FAILURE;
-
/* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA
* queries, should be called only by VFs and for that specific purpose
*/
if (link == IB_LINK_LAYER_INFINIBAND) {
if (mlx4_is_slave(dev->dev) &&
- (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
- (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS ||
- in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT ||
- in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)))
- return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
- in_grh, in_mad, out_mad);
+ (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
+ (in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS ||
+ in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT ||
+ in->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)))
+ return iboe_process_mad(ibdev, mad_flags, port_num,
+ in_wc, in_grh, in, out);
- return ib_process_mad(ibdev, mad_flags, port_num, in_wc,
- in_grh, in_mad, out_mad);
+ return ib_process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
+ in, out);
}
if (link == IB_LINK_LAYER_ETHERNET)
return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
- in_grh, in_mad, out_mad);
+ in_grh, in, out);
return -EINVAL;
}
@@ -1677,8 +1669,6 @@ tx_err:
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
}
- kfree(tun_qp->tx_ring);
- tun_qp->tx_ring = NULL;
i = MLX4_NUM_TUNNEL_BUFS;
err:
while (i > 0) {
@@ -1687,6 +1677,8 @@ err:
rx_buf_size, DMA_FROM_DEVICE);
kfree(tun_qp->ring[i].addr);
}
+ kfree(tun_qp->tx_ring);
+ tun_qp->tx_ring = NULL;
kfree(tun_qp->ring);
tun_qp->ring = NULL;
return -ENOMEM;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 8790101facb7..2f5d9b181848 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -246,6 +246,13 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
}
+static void free_gid_entry(struct gid_entry *entry)
+{
+ memset(&entry->gid, 0, sizeof(entry->gid));
+ kfree(entry->ctx);
+ entry->ctx = NULL;
+}
+
static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
{
struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
@@ -256,6 +263,8 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
int hw_update = 0;
int i;
struct gid_entry *gids = NULL;
+ u16 vlan_id = 0xffff;
+ u8 mac[ETH_ALEN];
if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
return -EINVAL;
@@ -266,12 +275,16 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
if (!context)
return -EINVAL;
+ ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]);
+ if (ret)
+ return ret;
port_gid_table = &iboe->gids[attr->port_num - 1];
spin_lock_bh(&iboe->lock);
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
if (!memcmp(&port_gid_table->gids[i].gid,
&attr->gid, sizeof(attr->gid)) &&
- port_gid_table->gids[i].gid_type == attr->gid_type) {
+ port_gid_table->gids[i].gid_type == attr->gid_type &&
+ port_gid_table->gids[i].vlan_id == vlan_id) {
found = i;
break;
}
@@ -291,6 +304,7 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
memcpy(&port_gid_table->gids[free].gid,
&attr->gid, sizeof(attr->gid));
port_gid_table->gids[free].gid_type = attr->gid_type;
+ port_gid_table->gids[free].vlan_id = vlan_id;
port_gid_table->gids[free].ctx->real_index = free;
port_gid_table->gids[free].ctx->refcount = 1;
hw_update = 1;
@@ -306,6 +320,8 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
GFP_ATOMIC);
if (!gids) {
ret = -ENOMEM;
+ *context = NULL;
+ free_gid_entry(&port_gid_table->gids[free]);
} else {
for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
@@ -317,6 +333,12 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
if (!ret && hw_update) {
ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
+ if (ret) {
+ spin_lock_bh(&iboe->lock);
+ *context = NULL;
+ free_gid_entry(&port_gid_table->gids[free]);
+ spin_unlock_bh(&iboe->lock);
+ }
kfree(gids);
}
@@ -346,10 +368,7 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
if (!ctx->refcount) {
unsigned int real_index = ctx->real_index;
- memset(&port_gid_table->gids[real_index].gid, 0,
- sizeof(port_gid_table->gids[real_index].gid));
- kfree(port_gid_table->gids[real_index].ctx);
- port_gid_table->gids[real_index].ctx = NULL;
+ free_gid_entry(&port_gid_table->gids[real_index]);
hw_update = 1;
}
}
@@ -734,7 +753,8 @@ out:
static u8 state_to_phys_state(enum ib_port_state state)
{
- return state == IB_PORT_ACTIVE ? 5 : 3;
+ return state == IB_PORT_ACTIVE ?
+ IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
}
static int eth_link_query_port(struct ib_device *ibdev, u8 port,
@@ -1145,7 +1165,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
return rdma_user_mmap_io(context, vma,
to_mucontext(context)->uar.pfn,
PAGE_SIZE,
- pgprot_noncached(vma->vm_page_prot));
+ pgprot_noncached(vma->vm_page_prot),
+ NULL);
case 1:
if (dev->dev->caps.bf_reg_size == 0)
@@ -1154,7 +1175,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
context, vma,
to_mucontext(context)->uar.pfn +
dev->dev->caps.num_uars,
- PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot));
+ PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot),
+ NULL);
case 3: {
struct mlx4_clock_params params;
@@ -1170,7 +1192,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
params.bar) +
params.offset) >>
PAGE_SHIFT,
- PAGE_SIZE, pgprot_noncached(vma->vm_page_prot));
+ PAGE_SIZE, pgprot_noncached(vma->vm_page_prot),
+ NULL);
}
default:
@@ -3007,16 +3030,17 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
ibdev->ib_active = false;
flush_workqueue(wq);
- mlx4_ib_close_sriov(ibdev);
- mlx4_ib_mad_cleanup(ibdev);
- ib_unregister_device(&ibdev->ib_dev);
- mlx4_ib_diag_cleanup(ibdev);
if (ibdev->iboe.nb.notifier_call) {
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
+ mlx4_ib_close_sriov(ibdev);
+ mlx4_ib_mad_cleanup(ibdev);
+ ib_unregister_device(&ibdev->ib_dev);
+ mlx4_ib_diag_cleanup(ibdev);
+
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
kfree(ibdev->ib_uc_qpns_bitmap);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index eb53bb4c0c91..d188573187fa 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -508,6 +508,7 @@ struct gid_entry {
union ib_gid gid;
enum ib_gid_type gid_type;
struct gid_cache_context *ctx;
+ u16 vlan_id;
};
struct mlx4_port_gid_table {
@@ -786,11 +787,10 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
-int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index);
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
int mlx4_ib_mad_init(struct mlx4_ib_dev *dev);
void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 753479285ce9..b0121c90c561 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -367,7 +367,7 @@ end:
return block_shift;
}
-static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start,
+static struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start,
u64 length, int access_flags)
{
/*
@@ -377,6 +377,7 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start,
* again
*/
if (!ib_access_writable(access_flags)) {
+ unsigned long untagged_start = untagged_addr(start);
struct vm_area_struct *vma;
down_read(&current->mm->mmap_sem);
@@ -385,9 +386,9 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start,
* cover the memory, but for now it requires a single vma to
* entirely cover the MR to support RO mappings.
*/
- vma = find_vma(current->mm, start);
- if (vma && vma->vm_end >= start + length &&
- vma->vm_start <= start) {
+ vma = find_vma(current->mm, untagged_start);
+ if (vma && vma->vm_end >= untagged_start + length &&
+ vma->vm_start <= untagged_start) {
if (vma->vm_flags & VM_WRITE)
access_flags |= IB_ACCESS_LOCAL_WRITE;
} else {
@@ -397,7 +398,7 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start,
up_read(&current->mm->mmap_sem);
}
- return ib_umem_get(udata, start, length, access_flags, 0);
+ return ib_umem_get(device, start, length, access_flags);
}
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
@@ -414,7 +415,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr)
return ERR_PTR(-ENOMEM);
- mr->umem = mlx4_get_umem_mr(udata, start, length, access_flags);
+ mr->umem = mlx4_get_umem_mr(pd->device, start, length, access_flags);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
@@ -503,7 +504,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
ib_umem_release(mmr->umem);
- mmr->umem = mlx4_get_umem_mr(udata, start, length,
+ mmr->umem = mlx4_get_umem_mr(mr->device, start, length,
mr_access_flags);
if (IS_ERR(mmr->umem)) {
err = PTR_ERR(mmr->umem);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 82aff2f2fdc2..26425dd2d960 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -325,7 +325,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
}
static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
- bool is_user, int has_rq, struct mlx4_ib_qp *qp,
+ bool is_user, bool has_rq, struct mlx4_ib_qp *qp,
u32 inl_recv_sz)
{
/* Sanity check RQ size before proceeding */
@@ -506,10 +506,10 @@ static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
kfree(qp->sqp_proxy_rcv);
}
-static int qp_has_rq(struct ib_qp_init_attr *attr)
+static bool qp_has_rq(struct ib_qp_init_attr *attr)
{
if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
- return 0;
+ return false;
return !attr->srq;
}
@@ -849,18 +849,149 @@ static void mlx4_ib_release_wqn(struct mlx4_ib_ucontext *context,
* reused for further WQN allocations.
* The next created WQ will allocate a new range.
*/
- range->dirty = 1;
+ range->dirty = true;
}
mutex_unlock(&context->wqn_ranges_mutex);
}
-static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
- enum mlx4_ib_source_type src,
- struct ib_qp_init_attr *init_attr,
+static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata, struct mlx4_ib_qp *qp)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ int qpn;
+ int err;
+ struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx4_ib_ucontext, ibucontext);
+ struct mlx4_ib_cq *mcq;
+ unsigned long flags;
+ int range_size;
+ struct mlx4_ib_create_wq wq;
+ size_t copy_len;
+ int shift;
+ int n;
+
+ qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_PACKET;
+
+ mutex_init(&qp->mutex);
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
+ INIT_LIST_HEAD(&qp->gid_list);
+ INIT_LIST_HEAD(&qp->steering_rules);
+
+ qp->state = IB_QPS_RESET;
+
+ copy_len = min(sizeof(struct mlx4_ib_create_wq), udata->inlen);
+
+ if (ib_copy_from_udata(&wq, udata, copy_len)) {
+ err = -EFAULT;
+ goto err;
+ }
+
+ if (wq.comp_mask || wq.reserved[0] || wq.reserved[1] ||
+ wq.reserved[2]) {
+ pr_debug("user command isn't supported\n");
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ if (wq.log_range_size > ilog2(dev->dev->caps.max_rss_tbl_sz)) {
+ pr_debug("WQN range size must be equal or smaller than %d\n",
+ dev->dev->caps.max_rss_tbl_sz);
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+ range_size = 1 << wq.log_range_size;
+
+ if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS)
+ qp->flags |= MLX4_IB_QP_SCATTER_FCS;
+
+ err = set_rq_size(dev, &init_attr->cap, true, true, qp, qp->inl_recv_sz);
+ if (err)
+ goto err;
+
+ qp->sq_no_prefetch = 1;
+ qp->sq.wqe_cnt = 1;
+ qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE;
+ qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+ (qp->sq.wqe_cnt << qp->sq.wqe_shift);
+
+ qp->umem = ib_umem_get(pd->device, wq.buf_addr, qp->buf_size, 0);
+ if (IS_ERR(qp->umem)) {
+ err = PTR_ERR(qp->umem);
+ goto err;
+ }
+
+ n = ib_umem_page_count(qp->umem);
+ shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
+ err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
+
+ if (err)
+ goto err_buf;
+
+ err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
+ if (err)
+ goto err_mtt;
+
+ err = mlx4_ib_db_map_user(udata, wq.db_addr, &qp->db);
+ if (err)
+ goto err_mtt;
+ qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS;
+
+ err = mlx4_ib_alloc_wqn(context, qp, range_size, &qpn);
+ if (err)
+ goto err_wrid;
+
+ err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
+ if (err)
+ goto err_qpn;
+
+ /*
+ * Hardware wants QPN written in big-endian order (after
+ * shifting) for send doorbell. Precompute this value to save
+ * a little bit when posting sends.
+ */
+ qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
+
+ qp->mqp.event = mlx4_ib_wq_event;
+
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq));
+ /* Maintain device to QPs access, needed for further handling
+ * via reset flow
+ */
+ list_add_tail(&qp->qps_list, &dev->qp_list);
+ /* Maintain CQ to QPs access, needed for further handling
+ * via reset flow
+ */
+ mcq = to_mcq(init_attr->send_cq);
+ list_add_tail(&qp->cq_send_list, &mcq->send_qp_list);
+ mcq = to_mcq(init_attr->recv_cq);
+ list_add_tail(&qp->cq_recv_list, &mcq->recv_qp_list);
+ mlx4_ib_unlock_cqs(to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq));
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+ return 0;
+
+err_qpn:
+ mlx4_ib_release_wqn(context, qp, 0);
+err_wrid:
+ mlx4_ib_db_unmap_user(context, &qp->db);
+
+err_mtt:
+ mlx4_mtt_cleanup(dev->dev, &qp->mtt);
+err_buf:
+ ib_umem_release(qp->umem);
+err:
+ return err;
+}
+
+static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, int sqpn,
struct mlx4_ib_qp **caller_qp)
{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
int qpn;
int err;
struct mlx4_ib_sqp *sqp = NULL;
@@ -870,7 +1001,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
struct mlx4_ib_cq *mcq;
unsigned long flags;
- int range_size = 0;
/* When tunneling special qps, we use a plain UD qp */
if (sqpn) {
@@ -921,15 +1051,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (!sqp)
return -ENOMEM;
qp = &sqp->qp;
- qp->pri.vid = 0xFFFF;
- qp->alt.vid = 0xFFFF;
} else {
qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL);
if (!qp)
return -ENOMEM;
- qp->pri.vid = 0xFFFF;
- qp->alt.vid = 0xFFFF;
}
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
} else
qp = *caller_qp;
@@ -941,48 +1069,24 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
INIT_LIST_HEAD(&qp->gid_list);
INIT_LIST_HEAD(&qp->steering_rules);
- qp->state = IB_QPS_RESET;
+ qp->state = IB_QPS_RESET;
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
-
if (udata) {
- union {
- struct mlx4_ib_create_qp qp;
- struct mlx4_ib_create_wq wq;
- } ucmd;
+ struct mlx4_ib_create_qp ucmd;
size_t copy_len;
int shift;
int n;
- copy_len = (src == MLX4_IB_QP_SRC) ?
- sizeof(struct mlx4_ib_create_qp) :
- min(sizeof(struct mlx4_ib_create_wq), udata->inlen);
+ copy_len = sizeof(struct mlx4_ib_create_qp);
if (ib_copy_from_udata(&ucmd, udata, copy_len)) {
err = -EFAULT;
goto err;
}
- if (src == MLX4_IB_RWQ_SRC) {
- if (ucmd.wq.comp_mask || ucmd.wq.reserved[0] ||
- ucmd.wq.reserved[1] || ucmd.wq.reserved[2]) {
- pr_debug("user command isn't supported\n");
- err = -EOPNOTSUPP;
- goto err;
- }
-
- if (ucmd.wq.log_range_size >
- ilog2(dev->dev->caps.max_rss_tbl_sz)) {
- pr_debug("WQN range size must be equal or smaller than %d\n",
- dev->dev->caps.max_rss_tbl_sz);
- err = -EOPNOTSUPP;
- goto err;
- }
- range_size = 1 << ucmd.wq.log_range_size;
- } else {
- qp->inl_recv_sz = ucmd.qp.inl_recv_sz;
- }
+ qp->inl_recv_sz = ucmd.inl_recv_sz;
if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) {
if (!(dev->dev->caps.flags &
@@ -1000,30 +1104,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err;
- if (src == MLX4_IB_QP_SRC) {
- qp->sq_no_prefetch = ucmd.qp.sq_no_prefetch;
+ qp->sq_no_prefetch = ucmd.sq_no_prefetch;
- err = set_user_sq_size(dev, qp,
- (struct mlx4_ib_create_qp *)
- &ucmd);
- if (err)
- goto err;
- } else {
- qp->sq_no_prefetch = 1;
- qp->sq.wqe_cnt = 1;
- qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE;
- /* Allocated buffer expects to have at least that SQ
- * size.
- */
- qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
- (qp->sq.wqe_cnt << qp->sq.wqe_shift);
- }
+ err = set_user_sq_size(dev, qp, &ucmd);
+ if (err)
+ goto err;
qp->umem =
- ib_umem_get(udata,
- (src == MLX4_IB_QP_SRC) ? ucmd.qp.buf_addr :
- ucmd.wq.buf_addr,
- qp->buf_size, 0, 0);
+ ib_umem_get(pd->device, ucmd.buf_addr, qp->buf_size, 0);
if (IS_ERR(qp->umem)) {
err = PTR_ERR(qp->umem);
goto err;
@@ -1041,11 +1129,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err_mtt;
if (qp_has_rq(init_attr)) {
- err = mlx4_ib_db_map_user(udata,
- (src == MLX4_IB_QP_SRC) ?
- ucmd.qp.db_addr :
- ucmd.wq.db_addr,
- &qp->db);
+ err = mlx4_ib_db_map_user(udata, ucmd.db_addr, &qp->db);
if (err)
goto err_mtt;
}
@@ -1115,10 +1199,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err_wrid;
}
}
- } else if (src == MLX4_IB_RWQ_SRC) {
- err = mlx4_ib_alloc_wqn(context, qp, range_size, &qpn);
- if (err)
- goto err_wrid;
} else {
/* Raw packet QPNs may not have bits 6,7 set in their qp_num;
* otherwise, the WQE BlueFlame setup flow wrongly causes
@@ -1157,8 +1237,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
*/
qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
- qp->mqp.event = (src == MLX4_IB_QP_SRC) ? mlx4_ib_qp_event :
- mlx4_ib_wq_event;
+ qp->mqp.event = mlx4_ib_qp_event;
if (!*caller_qp)
*caller_qp = qp;
@@ -1186,8 +1265,6 @@ err_qpn:
if (!sqpn) {
if (qp->flags & MLX4_IB_QP_NETIF)
mlx4_ib_steer_qp_free(dev, qpn, 1);
- else if (src == MLX4_IB_RWQ_SRC)
- mlx4_ib_release_wqn(context, qp, 0);
else
mlx4_qp_release_range(dev->dev, qpn, 1);
}
@@ -1518,8 +1595,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
/* fall through */
case IB_QPT_UD:
{
- err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC,
- init_attr, udata, 0, &qp);
+ err = create_qp_common(pd, init_attr, udata, 0, &qp);
if (err) {
kfree(qp);
return ERR_PTR(err);
@@ -1549,8 +1625,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
}
- err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC,
- init_attr, udata, sqpn, &qp);
+ err = create_qp_common(pd, init_attr, udata, sqpn, &qp);
if (err)
return ERR_PTR(err);
@@ -3010,7 +3085,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
}
if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
- is_vlan = 1;
+ is_vlan = true;
}
}
err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh,
@@ -4047,8 +4122,8 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata)
{
- struct mlx4_ib_dev *dev;
- struct ib_qp_init_attr ib_qp_init_attr;
+ struct mlx4_dev *dev = to_mdev(pd->device)->dev;
+ struct ib_qp_init_attr ib_qp_init_attr = {};
struct mlx4_ib_qp *qp;
struct mlx4_ib_create_wq ucmd;
int err, required_cmd_sz;
@@ -4073,14 +4148,13 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
if (udata->outlen)
return ERR_PTR(-EOPNOTSUPP);
- dev = to_mdev(pd->device);
-
if (init_attr->wq_type != IB_WQT_RQ) {
pr_debug("unsupported wq type %d\n", init_attr->wq_type);
return ERR_PTR(-EOPNOTSUPP);
}
- if (init_attr->create_flags & ~IB_WQ_FLAGS_SCATTER_FCS) {
+ if (init_attr->create_flags & ~IB_WQ_FLAGS_SCATTER_FCS ||
+ !(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
pr_debug("unsupported create_flags %u\n",
init_attr->create_flags);
return ERR_PTR(-EOPNOTSUPP);
@@ -4093,7 +4167,6 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
- memset(&ib_qp_init_attr, 0, sizeof(ib_qp_init_attr));
ib_qp_init_attr.qp_context = init_attr->wq_context;
ib_qp_init_attr.qp_type = IB_QPT_RAW_PACKET;
ib_qp_init_attr.cap.max_recv_wr = init_attr->max_wr;
@@ -4104,8 +4177,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS)
ib_qp_init_attr.create_flags |= IB_QP_CREATE_SCATTER_FCS;
- err = create_qp_common(dev, pd, MLX4_IB_RWQ_SRC, &ib_qp_init_attr,
- udata, 0, &qp);
+ err = create_rq(pd, &ib_qp_init_attr, udata, qp);
if (err) {
kfree(qp);
return ERR_PTR(err);
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 848db7264cc9..8f9d5035142d 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -110,7 +110,8 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq,
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
return -EFAULT;
- srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0);
+ srq->umem =
+ ib_umem_get(ib_srq->device, ucmd.buf_addr, buf_size, 0);
if (IS_ERR(srq->umem))
return PTR_ERR(srq->umem);
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 9924be8384d8..d0a043ccbe58 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \
srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \
- cong.o
+ cong.o restrack.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 6c8645033102..4c26492ab8a3 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -157,7 +157,7 @@ int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
return -ENOMEM;
}
-int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
+void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
{
struct mlx5_core_dev *dev = dm->dev;
u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
@@ -175,145 +175,13 @@ int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
MLX5_SET(dealloc_memic_in, in, memic_size, length);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-
- if (!err) {
- spin_lock(&dm->lock);
- bitmap_clear(dm->memic_alloc_pages,
- start_page_idx, num_pages);
- spin_unlock(&dm->lock);
- }
-
- return err;
-}
-
-int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
- u16 uid, phys_addr_t *addr, u32 *obj_id)
-{
- struct mlx5_core_dev *dev = dm->dev;
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
- u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {};
- unsigned long *block_map;
- u64 icm_start_addr;
- u32 log_icm_size;
- u32 num_blocks;
- u32 max_blocks;
- u64 block_idx;
- void *sw_icm;
- int ret;
-
- MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
- MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
- MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
-
- switch (type) {
- case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
- icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
- steering_sw_icm_start_address);
- log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size);
- block_map = dm->steering_sw_icm_alloc_blocks;
- break;
- case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
- icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
- header_modify_sw_icm_start_address);
- log_icm_size = MLX5_CAP_DEV_MEM(dev,
- log_header_modify_sw_icm_size);
- block_map = dm->header_modify_sw_icm_alloc_blocks;
- break;
- default:
- return -EINVAL;
- }
-
- num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >>
- MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
- max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
- spin_lock(&dm->lock);
- block_idx = bitmap_find_next_zero_area(block_map,
- max_blocks,
- 0,
- num_blocks, 0);
-
- if (block_idx < max_blocks)
- bitmap_set(block_map,
- block_idx, num_blocks);
-
- spin_unlock(&dm->lock);
-
- if (block_idx >= max_blocks)
- return -ENOMEM;
-
- sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm);
- icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
- MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr,
- icm_start_addr);
- MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length));
-
- ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- if (ret) {
- spin_lock(&dm->lock);
- bitmap_clear(block_map,
- block_idx, num_blocks);
- spin_unlock(&dm->lock);
-
- return ret;
- }
-
- *addr = icm_start_addr;
- *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
-
- return 0;
-}
-
-int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
- u16 uid, phys_addr_t addr, u32 obj_id)
-{
- struct mlx5_core_dev *dev = dm->dev;
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
- u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
- unsigned long *block_map;
- u32 num_blocks;
- u64 start_idx;
- int err;
-
- num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >>
- MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
-
- switch (type) {
- case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
- start_idx =
- (addr - MLX5_CAP64_DEV_MEM(
- dev, steering_sw_icm_start_address)) >>
- MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
- block_map = dm->steering_sw_icm_alloc_blocks;
- break;
- case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
- start_idx =
- (addr -
- MLX5_CAP64_DEV_MEM(
- dev, header_modify_sw_icm_start_address)) >>
- MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
- block_map = dm->header_modify_sw_icm_alloc_blocks;
- break;
- default:
- return -EINVAL;
- }
-
- MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
- MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
-
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
- return err;
+ return;
spin_lock(&dm->lock);
- bitmap_clear(block_map,
- start_idx, num_blocks);
+ bitmap_clear(dm->memic_alloc_pages,
+ start_page_idx, num_pages);
spin_unlock(&dm->lock);
-
- return 0;
}
int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 0572dcba6eae..945ebce73613 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -46,7 +46,7 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
void *in, int in_size);
int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
u64 length, u32 alignment);
-int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
+void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
@@ -65,8 +65,4 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
u16 uid);
int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
u16 opmod, u8 port);
-int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
- u16 uid, phys_addr_t *addr, u32 *obj_id);
-int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
- u16 uid, phys_addr_t addr, u32 obj_id);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 45f48cde6b9d..367a71bc5f4b 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -423,9 +423,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_cqe64 *cqe64;
struct mlx5_core_qp *mqp;
struct mlx5_ib_wq *wq;
- struct mlx5_sig_err_cqe *sig_err_cqe;
- struct mlx5_core_mkey *mmkey;
- struct mlx5_ib_mr *mr;
uint8_t opcode;
uint32_t qpn;
u16 wqe_ctr;
@@ -519,27 +516,29 @@ repoll:
}
}
break;
- case MLX5_CQE_SIG_ERR:
- sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
+ case MLX5_CQE_SIG_ERR: {
+ struct mlx5_sig_err_cqe *sig_err_cqe =
+ (struct mlx5_sig_err_cqe *)cqe64;
+ struct mlx5_core_sig_ctx *sig;
- xa_lock(&dev->mdev->priv.mkey_table);
- mmkey = xa_load(&dev->mdev->priv.mkey_table,
+ xa_lock(&dev->sig_mrs);
+ sig = xa_load(&dev->sig_mrs,
mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
- mr = to_mibmr(mmkey);
- get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
- mr->sig->sig_err_exists = true;
- mr->sig->sigerr_count++;
+ get_sig_err_item(sig_err_cqe, &sig->err_item);
+ sig->sig_err_exists = true;
+ sig->sigerr_count++;
mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
- cq->mcq.cqn, mr->sig->err_item.key,
- mr->sig->err_item.err_type,
- mr->sig->err_item.sig_err_offset,
- mr->sig->err_item.expected,
- mr->sig->err_item.actual);
+ cq->mcq.cqn, sig->err_item.key,
+ sig->err_item.err_type,
+ sig->err_item.sig_err_offset,
+ sig->err_item.expected,
+ sig->err_item.actual);
- xa_unlock(&dev->mdev->priv.mkey_table);
+ xa_unlock(&dev->sig_mrs);
goto repoll;
}
+ }
return 0;
}
@@ -709,8 +708,8 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
*cqe_size = ucmd.cqe_size;
cq->buf.umem =
- ib_umem_get(udata, ucmd.buf_addr, entries * ucmd.cqe_size,
- IB_ACCESS_LOCAL_WRITE, 1);
+ ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
+ entries * ucmd.cqe_size, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(cq->buf.umem)) {
err = PTR_ERR(cq->buf.umem);
return err;
@@ -1109,9 +1108,9 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
return -EINVAL;
- umem = ib_umem_get(udata, ucmd.buf_addr,
+ umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
(size_t)ucmd.cqe_size * entries,
- IB_ACCESS_LOCAL_WRITE, 1);
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem)) {
err = PTR_ERR(umem);
return err;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index af5bbb35c058..46e1ab771f10 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -30,7 +30,7 @@ enum devx_obj_flags {
struct devx_async_data {
struct mlx5_ib_dev *mdev;
struct list_head list;
- struct ib_uobject *fd_uobj;
+ struct devx_async_cmd_event_file *ev_file;
struct mlx5_async_work cb_work;
u16 cmd_out_len;
/* must be last field in this structure */
@@ -72,7 +72,6 @@ struct devx_event_subscription {
struct rcu_head rcu;
u64 cookie;
struct devx_async_event_file *ev_file;
- struct file *filp; /* Upon hot unplug we need a direct access to */
struct eventfd_ctx *eventfd;
};
@@ -100,6 +99,7 @@ struct devx_obj {
struct mlx5_ib_devx_mr devx_mr;
struct mlx5_core_dct core_dct;
struct mlx5_core_cq core_cq;
+ u32 flow_counter_bulk_size;
};
struct list_head event_sub; /* holds devx_event_subscription entries */
};
@@ -192,15 +192,20 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
}
}
-bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id)
+bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id)
{
struct devx_obj *devx_obj = obj;
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
+
+ if (offset && offset >= devx_obj->flow_counter_bulk_size)
+ return false;
+
*counter_id = MLX5_GET(dealloc_flow_counter_in,
devx_obj->dinbox,
flow_counter_id);
+ *counter_id += offset;
return true;
}
@@ -233,6 +238,8 @@ static bool is_legacy_obj_event_num(u16 event_num)
case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
case MLX5_EVENT_TYPE_DCT_DRAINED:
case MLX5_EVENT_TYPE_COMP:
+ case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
+ case MLX5_EVENT_TYPE_XRQ_ERROR:
return true;
default:
return false;
@@ -315,8 +322,10 @@ static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe)
case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
return eqe->data.qp_srq.type;
case MLX5_EVENT_TYPE_CQ_ERROR:
+ case MLX5_EVENT_TYPE_XRQ_ERROR:
return 0;
case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
return MLX5_EVENT_QUEUE_TYPE_DCT;
default:
return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
@@ -542,6 +551,8 @@ static u64 devx_get_obj_id(const void *in)
break;
case MLX5_CMD_OP_ARM_XRQ:
case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
+ case MLX5_CMD_OP_MODIFY_XRQ:
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
MLX5_GET(arm_xrq_in, in, xrqn));
break;
@@ -776,6 +787,14 @@ static bool devx_is_obj_create_cmd(const void *in, u16 *opcode)
return true;
return false;
}
+ case MLX5_CMD_OP_CREATE_PSV:
+ {
+ u8 num_psv = MLX5_GET(create_psv_in, in, num_psv);
+
+ if (num_psv == 1)
+ return true;
+ return false;
+ }
default:
return false;
}
@@ -810,6 +829,8 @@ static bool devx_is_obj_modify_cmd(const void *in)
case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
case MLX5_CMD_OP_ARM_XRQ:
case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
+ case MLX5_CMD_OP_MODIFY_XRQ:
return true;
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
{
@@ -922,6 +943,7 @@ static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev)
case MLX5_CMD_OP_QUERY_CONG_STATUS:
case MLX5_CMD_OP_QUERY_CONG_PARAMS:
case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
+ case MLX5_CMD_OP_QUERY_LAG:
return true;
default:
return false;
@@ -1215,6 +1237,12 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
case MLX5_CMD_OP_ALLOC_XRCD:
MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
break;
+ case MLX5_CMD_OP_CREATE_PSV:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DESTROY_PSV);
+ MLX5_SET(destroy_psv_in, din, psvn,
+ MLX5_GET(create_psv_out, out, psv0_index));
+ break;
default:
/* The entry must match to one of the devx_is_obj_create_cmd */
WARN_ON(true);
@@ -1242,8 +1270,8 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
mkey->pd = MLX5_GET(mkc, mkc, pd);
devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
- return xa_err(xa_store(&dev->mdev->priv.mkey_table,
- mlx5_base_mkey(mkey->key), mkey, GFP_KERNEL));
+ return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey,
+ GFP_KERNEL));
}
static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
@@ -1275,29 +1303,6 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
return 0;
}
-static void devx_free_indirect_mkey(struct rcu_head *rcu)
-{
- kfree(container_of(rcu, struct devx_obj, devx_mr.rcu));
-}
-
-/* This function to delete from the radix tree needs to be called before
- * destroying the underlying mkey. Otherwise a race might occur in case that
- * other thread will get the same mkey before this one will be deleted,
- * in that case it will fail via inserting to the tree its own data.
- *
- * Note:
- * An error in the destroy is not expected unless there is some other indirect
- * mkey which points to this one. In a kernel cleanup flow it will be just
- * destroyed in the iterative destruction call. In a user flow, in case
- * the application didn't close in the expected order it's its own problem,
- * the mkey won't be part of the tree, in both cases the kernel is safe.
- */
-static void devx_cleanup_mkey(struct devx_obj *obj)
-{
- xa_erase(&obj->ib_dev->mdev->priv.mkey_table,
- mlx5_base_mkey(obj->devx_mr.mmkey.key));
-}
-
static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
struct devx_event_subscription *sub)
{
@@ -1339,8 +1344,16 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
int ret;
dev = mlx5_udata_to_mdev(&attrs->driver_udata);
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
- devx_cleanup_mkey(obj);
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+ /*
+ * The pagefault_single_data_segment() does commands against
+ * the mmkey, we must wait for that to stop before freeing the
+ * mkey, as another allocation could get the same mkey #.
+ */
+ xa_erase(&obj->ib_dev->odp_mkeys,
+ mlx5_base_mkey(obj->devx_mr.mmkey.key));
+ synchronize_srcu(&dev->odp_srcu);
+ }
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
@@ -1359,12 +1372,6 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
devx_cleanup_subscription(dev, sub_entry);
mutex_unlock(&devx_event_table->event_xa_lock);
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
- call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu,
- devx_free_indirect_mkey);
- return ret;
- }
-
kfree(obj);
return ret;
}
@@ -1461,6 +1468,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
if (err)
goto obj_free;
+ if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) {
+ u8 bulk = MLX5_GET(alloc_flow_counter_in,
+ cmd_in,
+ flow_counter_bulk);
+ obj->flow_counter_bulk_size = 128UL * bulk;
+ }
+
uobj->object = obj;
INIT_LIST_HEAD(&obj->event_sub);
obj->ib_dev = dev;
@@ -1468,26 +1482,21 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
&obj_id);
WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
- err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
- if (err)
- goto obj_destroy;
- }
-
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
if (err)
- goto err_copy;
+ goto obj_destroy;
if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT)
obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type);
-
obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id);
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+ err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
+ if (err)
+ goto obj_destroy;
+ }
return 0;
-err_copy:
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
- devx_cleanup_mkey(obj);
obj_destroy:
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
@@ -1664,21 +1673,20 @@ static void devx_query_callback(int status, struct mlx5_async_work *context)
{
struct devx_async_data *async_data =
container_of(context, struct devx_async_data, cb_work);
- struct ib_uobject *fd_uobj = async_data->fd_uobj;
- struct devx_async_cmd_event_file *ev_file;
- struct devx_async_event_queue *ev_queue;
+ struct devx_async_cmd_event_file *ev_file = async_data->ev_file;
+ struct devx_async_event_queue *ev_queue = &ev_file->ev_queue;
unsigned long flags;
- ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
- uobj);
- ev_queue = &ev_file->ev_queue;
-
+ /*
+ * Note that if the struct devx_async_cmd_event_file uobj begins to be
+ * destroyed it will block at mlx5_cmd_cleanup_async_ctx() until this
+ * routine returns, ensuring that it always remains valid here.
+ */
spin_lock_irqsave(&ev_queue->lock, flags);
list_add_tail(&async_data->list, &ev_queue->event_list);
spin_unlock_irqrestore(&ev_queue->lock, flags);
wake_up_interruptible(&ev_queue->poll_wait);
- fput(fd_uobj->object);
}
#define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */
@@ -1747,9 +1755,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
async_data->cmd_out_len = cmd_out_len;
async_data->mdev = mdev;
- async_data->fd_uobj = fd_uobj;
+ async_data->ev_file = ev_file;
- get_file(fd_uobj->object);
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in,
uverbs_attr_get_len(attrs,
@@ -1759,12 +1766,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
devx_query_callback, &async_data->cb_work);
if (err)
- goto cb_err;
+ goto free_async;
return 0;
-cb_err:
- fput(fd_uobj->object);
free_async:
kvfree(async_data);
sub_bytes:
@@ -2022,6 +2027,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
goto err;
list_add_tail(&event_sub->event_list, &sub_list);
+ uverbs_uobject_get(&ev_file->uobj);
if (use_eventfd) {
event_sub->eventfd =
eventfd_ctx_fdget(redirect_fd);
@@ -2035,7 +2041,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
event_sub->cookie = cookie;
event_sub->ev_file = ev_file;
- event_sub->filp = fd_uobj->object;
/* May be needed upon cleanup the devx object/subscription */
event_sub->xa_key_level1 = key_level1;
event_sub->xa_key_level2 = obj_id;
@@ -2089,7 +2094,7 @@ err:
if (event_sub->eventfd)
eventfd_ctx_put(event_sub->eventfd);
-
+ uverbs_uobject_put(&event_sub->ev_file->uobj);
kfree(event_sub);
}
@@ -2124,7 +2129,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
if (err)
return err;
- obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access, 0);
+ obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access);
if (IS_ERR(obj->umem))
return PTR_ERR(obj->umem);
@@ -2285,7 +2290,11 @@ static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data)
case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
break;
+ case MLX5_EVENT_TYPE_XRQ_ERROR:
+ obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff;
+ break;
case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
break;
case MLX5_EVENT_TYPE_CQ_ERROR:
@@ -2310,7 +2319,8 @@ static int deliver_event(struct devx_event_subscription *event_sub,
if (ev_file->omit_data) {
spin_lock_irqsave(&ev_file->lock, flags);
- if (!list_empty(&event_sub->event_list)) {
+ if (!list_empty(&event_sub->event_list) ||
+ ev_file->is_destroyed) {
spin_unlock_irqrestore(&ev_file->lock, flags);
return 0;
}
@@ -2334,7 +2344,10 @@ static int deliver_event(struct devx_event_subscription *event_sub,
memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe));
spin_lock_irqsave(&ev_file->lock, flags);
- list_add_tail(&event_data->list, &ev_file->event_list);
+ if (!ev_file->is_destroyed)
+ list_add_tail(&event_data->list, &ev_file->event_list);
+ else
+ kfree(event_data);
spin_unlock_irqrestore(&ev_file->lock, flags);
wake_up_interruptible(&ev_file->poll_wait);
@@ -2347,17 +2360,10 @@ static void dispatch_event_fd(struct list_head *fd_list,
struct devx_event_subscription *item;
list_for_each_entry_rcu(item, fd_list, xa_list) {
- if (!get_file_rcu(item->filp))
- continue;
-
- if (item->eventfd) {
+ if (item->eventfd)
eventfd_signal(item->eventfd, 1);
- fput(item->filp);
- continue;
- }
-
- deliver_event(item, data);
- fput(item->filp);
+ else
+ deliver_event(item, data);
}
}
@@ -2465,11 +2471,11 @@ static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
return -ERESTARTSYS;
}
- if (list_empty(&ev_queue->event_list) &&
- ev_queue->is_destroyed)
- return -EIO;
-
spin_lock_irq(&ev_queue->lock);
+ if (ev_queue->is_destroyed) {
+ spin_unlock_irq(&ev_queue->lock);
+ return -EIO;
+ }
}
event = list_entry(ev_queue->event_list.next,
@@ -2495,23 +2501,6 @@ static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
return ret;
}
-static int devx_async_cmd_event_close(struct inode *inode, struct file *filp)
-{
- struct ib_uobject *uobj = filp->private_data;
- struct devx_async_cmd_event_file *comp_ev_file = container_of(
- uobj, struct devx_async_cmd_event_file, uobj);
- struct devx_async_data *entry, *tmp;
-
- spin_lock_irq(&comp_ev_file->ev_queue.lock);
- list_for_each_entry_safe(entry, tmp,
- &comp_ev_file->ev_queue.event_list, list)
- kvfree(entry);
- spin_unlock_irq(&comp_ev_file->ev_queue.lock);
-
- uverbs_close_fd(filp);
- return 0;
-}
-
static __poll_t devx_async_cmd_event_poll(struct file *filp,
struct poll_table_struct *wait)
{
@@ -2535,7 +2524,7 @@ static const struct file_operations devx_async_cmd_event_fops = {
.owner = THIS_MODULE,
.read = devx_async_cmd_event_read,
.poll = devx_async_cmd_event_poll,
- .release = devx_async_cmd_event_close,
+ .release = uverbs_uobject_fd_release,
.llseek = no_llseek,
};
@@ -2560,10 +2549,6 @@ static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
return -EOVERFLOW;
}
- if (ev_file->is_destroyed) {
- spin_unlock_irq(&ev_file->lock);
- return -EIO;
- }
while (list_empty(&ev_file->event_list)) {
spin_unlock_irq(&ev_file->lock);
@@ -2639,81 +2624,96 @@ static __poll_t devx_async_event_poll(struct file *filp,
return pollflags;
}
-static int devx_async_event_close(struct inode *inode, struct file *filp)
+static void devx_free_subscription(struct rcu_head *rcu)
{
- struct devx_async_event_file *ev_file = filp->private_data;
- struct devx_event_subscription *event_sub, *event_sub_tmp;
- struct devx_async_event_data *entry, *tmp;
- struct mlx5_ib_dev *dev = ev_file->dev;
+ struct devx_event_subscription *event_sub =
+ container_of(rcu, struct devx_event_subscription, rcu);
- mutex_lock(&dev->devx_event_table.event_xa_lock);
- /* delete the subscriptions which are related to this FD */
- list_for_each_entry_safe(event_sub, event_sub_tmp,
- &ev_file->subscribed_events_list, file_list) {
- devx_cleanup_subscription(dev, event_sub);
- if (event_sub->eventfd)
- eventfd_ctx_put(event_sub->eventfd);
-
- list_del_rcu(&event_sub->file_list);
- /* subscription may not be used by the read API any more */
- kfree_rcu(event_sub, rcu);
- }
-
- mutex_unlock(&dev->devx_event_table.event_xa_lock);
-
- /* free the pending events allocation */
- if (!ev_file->omit_data) {
- spin_lock_irq(&ev_file->lock);
- list_for_each_entry_safe(entry, tmp,
- &ev_file->event_list, list)
- kfree(entry); /* read can't come any more */
- spin_unlock_irq(&ev_file->lock);
- }
-
- uverbs_close_fd(filp);
- put_device(&dev->ib_dev.dev);
- return 0;
+ if (event_sub->eventfd)
+ eventfd_ctx_put(event_sub->eventfd);
+ uverbs_uobject_put(&event_sub->ev_file->uobj);
+ kfree(event_sub);
}
static const struct file_operations devx_async_event_fops = {
.owner = THIS_MODULE,
.read = devx_async_event_read,
.poll = devx_async_event_poll,
- .release = devx_async_event_close,
+ .release = uverbs_uobject_fd_release,
.llseek = no_llseek,
};
-static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj,
- enum rdma_remove_reason why)
+static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
{
struct devx_async_cmd_event_file *comp_ev_file =
container_of(uobj, struct devx_async_cmd_event_file,
uobj);
struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
+ struct devx_async_data *entry, *tmp;
spin_lock_irq(&ev_queue->lock);
ev_queue->is_destroyed = 1;
spin_unlock_irq(&ev_queue->lock);
-
- if (why == RDMA_REMOVE_DRIVER_REMOVE)
- wake_up_interruptible(&ev_queue->poll_wait);
+ wake_up_interruptible(&ev_queue->poll_wait);
mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx);
+
+ spin_lock_irq(&comp_ev_file->ev_queue.lock);
+ list_for_each_entry_safe(entry, tmp,
+ &comp_ev_file->ev_queue.event_list, list) {
+ list_del(&entry->list);
+ kvfree(entry);
+ }
+ spin_unlock_irq(&comp_ev_file->ev_queue.lock);
return 0;
};
-static int devx_hot_unplug_async_event_file(struct ib_uobject *uobj,
- enum rdma_remove_reason why)
+static int devx_async_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
{
struct devx_async_event_file *ev_file =
container_of(uobj, struct devx_async_event_file,
uobj);
+ struct devx_event_subscription *event_sub, *event_sub_tmp;
+ struct mlx5_ib_dev *dev = ev_file->dev;
spin_lock_irq(&ev_file->lock);
ev_file->is_destroyed = 1;
- spin_unlock_irq(&ev_file->lock);
+ /* free the pending events allocation */
+ if (ev_file->omit_data) {
+ struct devx_event_subscription *event_sub, *tmp;
+
+ list_for_each_entry_safe(event_sub, tmp, &ev_file->event_list,
+ event_list)
+ list_del_init(&event_sub->event_list);
+
+ } else {
+ struct devx_async_event_data *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &ev_file->event_list,
+ list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ }
+
+ spin_unlock_irq(&ev_file->lock);
wake_up_interruptible(&ev_file->poll_wait);
+
+ mutex_lock(&dev->devx_event_table.event_xa_lock);
+ /* delete the subscriptions which are related to this FD */
+ list_for_each_entry_safe(event_sub, event_sub_tmp,
+ &ev_file->subscribed_events_list, file_list) {
+ devx_cleanup_subscription(dev, event_sub);
+ list_del_rcu(&event_sub->file_list);
+ /* subscription may not be used by the read API any more */
+ call_rcu(&event_sub->rcu, devx_free_subscription);
+ }
+ mutex_unlock(&dev->devx_event_table.event_xa_lock);
+
+ put_device(&dev->ib_dev.dev);
return 0;
};
@@ -2899,7 +2899,7 @@ DECLARE_UVERBS_NAMED_METHOD(
DECLARE_UVERBS_NAMED_OBJECT(
MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file),
- devx_hot_unplug_async_cmd_event_file,
+ devx_async_cmd_event_destroy_uobj,
&devx_async_cmd_event_fops, "[devx_async_cmd]",
O_RDONLY),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
@@ -2917,7 +2917,7 @@ DECLARE_UVERBS_NAMED_METHOD(
DECLARE_UVERBS_NAMED_OBJECT(
MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file),
- devx_hot_unplug_async_event_file,
+ devx_async_event_destroy_uobj,
&devx_async_event_fops, "[devx_async_event]",
O_RDONLY),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC));
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
index 8f4e5f22b84c..61475b571531 100644
--- a/drivers/infiniband/hw/mlx5/doorbell.c
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -64,7 +64,8 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
page->user_virt = (virt & PAGE_MASK);
page->refcnt = 0;
- page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0);
+ page->umem = ib_umem_get(context->ibucontext.device, virt & PAGE_MASK,
+ PAGE_SIZE, 0);
if (IS_ERR(page->umem)) {
err = PTR_ERR(page->umem);
kfree(page);
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index b8841355fcd5..dbee17d22d50 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -32,6 +32,9 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
*namespace = MLX5_FLOW_NAMESPACE_FDB;
break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
+ break;
default:
return -EINVAL;
}
@@ -82,6 +85,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
int len, ret, i;
u32 counter_id = 0;
+ u32 *offset_attr;
+ u32 offset = 0;
if (!capable(CAP_NET_RAW))
return -EPERM;
@@ -101,6 +106,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !dest_devx)
return -EINVAL;
+ /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+ ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
+ return -EINVAL;
+
if (dest_devx) {
devx_obj = uverbs_attr_get_obj(
attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
@@ -112,8 +122,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
*/
if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type))
return -EINVAL;
- /* Allow only flow table as dest when inserting to FDB */
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
+ /* Allow only flow table as dest when inserting to FDB or RDMA_RX */
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
return -EINVAL;
} else if (dest_qp) {
@@ -142,8 +153,27 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
if (len) {
devx_obj = arr_flow_actions[0]->object;
- if (!mlx5_ib_devx_is_flow_counter(devx_obj, &counter_id))
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
+
+ int num_offsets = uverbs_attr_ptr_get_array_size(
+ attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
+ sizeof(u32));
+
+ if (num_offsets != 1)
+ return -EINVAL;
+
+ offset_attr = uverbs_attr_get_alloced_ptr(
+ attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
+ offset = *offset_attr;
+ }
+
+ if (!mlx5_ib_devx_is_flow_counter(devx_obj, offset,
+ &counter_id))
return -EINVAL;
+
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
}
@@ -322,11 +352,11 @@ void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
switch (maction->flow_action_raw.sub_type) {
case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
- maction->flow_action_raw.action_id);
+ maction->flow_action_raw.modify_hdr);
break;
case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
- maction->flow_action_raw.action_id);
+ maction->flow_action_raw.pkt_reformat);
break;
case MLX5_IB_FLOW_ACTION_DECAP:
break;
@@ -352,10 +382,11 @@ mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
if (!maction)
return ERR_PTR(-ENOMEM);
- ret = mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in,
- &maction->flow_action_raw.action_id);
+ maction->flow_action_raw.modify_hdr =
+ mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
- if (ret) {
+ if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
+ ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
kfree(maction);
return ERR_PTR(ret);
}
@@ -479,11 +510,13 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx(
if (ret)
return ret;
- ret = mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
- in, namespace,
- &maction->flow_action_raw.action_id);
- if (ret)
+ maction->flow_action_raw.pkt_reformat =
+ mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
+ in, namespace);
+ if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
+ ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
return ret;
+ }
maction->flow_action_raw.sub_type =
MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
@@ -586,7 +619,11 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
MLX5_IB_OBJECT_DEVX_OBJ,
UVERBS_ACCESS_READ, 1, 1,
- UA_OPTIONAL));
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
+ UA_OPTIONAL,
+ UA_ALLOC_AND_COPY));
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
MLX5_IB_METHOD_DESTROY_FLOW,
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 4950df3f71b6..1ae6fd95acaa 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -263,7 +263,7 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
},
.sq_sig_type = gsi->sq_sig_type,
.qp_type = IB_QPT_UD,
- .create_flags = mlx5_ib_create_qp_sqpn_qp1(),
+ .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1,
};
return ib_create_qp(pd, &init_attr);
@@ -507,8 +507,7 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
if (ret) {
/* Undo the effect of adding the outstanding wr */
- gsi->outstanding_pi = (gsi->outstanding_pi - 1) %
- gsi->cap.max_send_wr;
+ gsi->outstanding_pi--;
goto err;
}
spin_unlock_irqrestore(&gsi->lock, flags);
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 74ce9249e75a..5c3d052ac30b 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -35,7 +35,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
int vport_index;
if (rep->vport == MLX5_VPORT_UPLINK)
- profile = &uplink_rep_profile;
+ profile = &raw_eth_profile;
else
return mlx5_ib_set_vport_rep(dev, rep);
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
index de43b423bafc..3b6750cba796 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.h
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -10,7 +10,7 @@
#include "mlx5_ib.h"
#ifdef CONFIG_MLX5_ESWITCH
-extern const struct mlx5_ib_profile uplink_rep_profile;
+extern const struct mlx5_ib_profile raw_eth_profile;
u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c
index 649a3364f838..b61165359954 100644
--- a/drivers/infiniband/hw/mlx5/ib_virt.c
+++ b/drivers/infiniband/hw/mlx5/ib_virt.c
@@ -164,8 +164,10 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID;
in->node_guid = guid;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
- if (!err)
+ if (!err) {
vfs_ctx[vf].node_guid = guid;
+ vfs_ctx[vf].node_guid_valid = 1;
+ }
kfree(in);
return err;
}
@@ -185,8 +187,10 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID;
in->port_guid = guid;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
- if (!err)
+ if (!err) {
vfs_ctx[vf].port_guid = guid;
+ vfs_ctx[vf].port_guid_valid = 1;
+ }
kfree(in);
return err;
}
@@ -201,3 +205,19 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
return -EINVAL;
}
+
+int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_guid *node_guid,
+ struct ifla_vf_guid *port_guid)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
+
+ node_guid->guid =
+ vfs_ctx[vf].node_guid_valid ? vfs_ctx[vf].node_guid : 0;
+ port_guid->guid =
+ vfs_ctx[vf].port_guid_valid ? vfs_ctx[vf].port_guid : 0;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 348c1df69cdc..14e0c17de6a9 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -74,58 +74,6 @@ static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey,
port);
}
-static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
- const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad *in_mad, struct ib_mad *out_mad)
-{
- u16 slid;
- int err;
-
- slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
-
- if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0)
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-
- if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
- in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
- if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
- in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
- in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
- return IB_MAD_RESULT_SUCCESS;
-
- /* Don't process SMInfo queries -- the SMA can't handle them.
- */
- if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
- return IB_MAD_RESULT_SUCCESS;
- } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
- in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1 ||
- in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2 ||
- in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
- if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
- in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
- return IB_MAD_RESULT_SUCCESS;
- } else {
- return IB_MAD_RESULT_SUCCESS;
- }
-
- err = mlx5_MAD_IFC(to_mdev(ibdev),
- mad_flags & IB_MAD_IGNORE_MKEY,
- mad_flags & IB_MAD_IGNORE_BKEY,
- port_num, in_wc, in_grh, in_mad, out_mad);
- if (err)
- return IB_MAD_RESULT_FAILURE;
-
- /* set return bit in status of directed route responses */
- if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
- out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
-
- if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
- /* no response for trap repress */
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
-}
-
static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext,
void *out)
{
@@ -271,30 +219,66 @@ done:
int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index)
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- const struct ib_mad *in_mad = (const struct ib_mad *)in;
- struct ib_mad *out_mad = (struct ib_mad *)out;
- int ret;
+ u8 mgmt_class = in->mad_hdr.mgmt_class;
+ u8 method = in->mad_hdr.method;
+ u16 slid;
+ int err;
- if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
- *out_mad_size != sizeof(*out_mad)))
- return IB_MAD_RESULT_FAILURE;
+ slid = in_wc ? ib_lid_cpu16(in_wc->slid) :
+ be16_to_cpu(IB_LID_PERMISSIVE);
- memset(out_mad->data, 0, sizeof(out_mad->data));
+ if (method == IB_MGMT_METHOD_TRAP && !slid)
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
- if (MLX5_CAP_GEN(dev->mdev, vport_counters) &&
- in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
- in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) {
- ret = process_pma_cmd(dev, port_num, in_mad, out_mad);
- } else {
- ret = process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
- in_mad, out_mad);
+ switch (mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: {
+ if (method != IB_MGMT_METHOD_GET &&
+ method != IB_MGMT_METHOD_SET &&
+ method != IB_MGMT_METHOD_TRAP_REPRESS)
+ return IB_MAD_RESULT_SUCCESS;
+
+ /* Don't process SMInfo queries -- the SMA can't handle them.
+ */
+ if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
+ return IB_MAD_RESULT_SUCCESS;
+ } break;
+ case IB_MGMT_CLASS_PERF_MGMT:
+ if (MLX5_CAP_GEN(dev->mdev, vport_counters) &&
+ method == IB_MGMT_METHOD_GET)
+ return process_pma_cmd(dev, port_num, in, out);
+ /* fallthrough */
+ case MLX5_IB_VENDOR_CLASS1:
+ /* fallthrough */
+ case MLX5_IB_VENDOR_CLASS2:
+ case IB_MGMT_CLASS_CONG_MGMT: {
+ if (method != IB_MGMT_METHOD_GET &&
+ method != IB_MGMT_METHOD_SET)
+ return IB_MAD_RESULT_SUCCESS;
+ } break;
+ default:
+ return IB_MAD_RESULT_SUCCESS;
}
- return ret;
+
+ err = mlx5_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY,
+ mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc,
+ in_grh, in, out);
+ if (err)
+ return IB_MAD_RESULT_FAILURE;
+
+ /* set return bit in status of directed route responses */
+ if (mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ out->mad_hdr.status |= cpu_to_be16(1 << 15);
+
+ if (method == IB_MGMT_METHOD_TRAP_REPRESS)
+ /* no response for trap repress */
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e12a4404096b..e4bcfa81b70a 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -40,7 +40,7 @@
#include <linux/slab.h>
#include <linux/bitmap.h>
#if defined(CONFIG_X86)
-#include <asm/pat.h>
+#include <asm/memtype.h>
#endif
#include <linux/sched.h>
#include <linux/sched/mm.h>
@@ -67,6 +67,7 @@
#include <rdma/uverbs_std_types.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/ib_umem_odp.h>
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
@@ -535,7 +536,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
props->pkey_tbl_len = 1;
props->state = IB_PORT_DOWN;
- props->phys_state = 3;
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
props->qkey_viol_cntr = qkey_viol_cntr;
@@ -561,7 +562,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
if (netif_running(ndev) && netif_carrier_ok(ndev)) {
props->state = IB_PORT_ACTIVE;
- props->phys_state = 5;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
}
ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
@@ -693,21 +694,6 @@ static void get_atomic_caps_qp(struct mlx5_ib_dev *dev,
get_atomic_caps(dev, atomic_size_qp, props);
}
-static void get_atomic_caps_dc(struct mlx5_ib_dev *dev,
- struct ib_device_attr *props)
-{
- u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
-
- get_atomic_caps(dev, atomic_size_qp, props);
-}
-
-bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev)
-{
- struct ib_device_attr props = {};
-
- get_atomic_caps_dc(dev, &props);
- return (props.atomic_cap == IB_ATOMIC_HCA) ? true : false;
-}
static int mlx5_query_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid)
{
@@ -829,6 +815,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
{
+ size_t uhw_outlen = (uhw) ? uhw->outlen : 0;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
int err = -ENOMEM;
@@ -842,12 +829,12 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
u64 max_tso;
resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length);
- if (uhw->outlen && uhw->outlen < resp_len)
+ if (uhw_outlen && uhw_outlen < resp_len)
return -EINVAL;
- else
- resp.response_length = resp_len;
- if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
+ resp.response_length = resp_len;
+
+ if (uhw && uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
return -EINVAL;
memset(props, 0, sizeof(*props));
@@ -911,7 +898,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->raw_packet_caps |=
IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
- if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
+ if (field_avail(typeof(resp), tso_caps, uhw_outlen)) {
max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
if (max_tso) {
resp.tso_caps.max_tso = 1 << max_tso;
@@ -921,7 +908,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
- if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
+ if (field_avail(typeof(resp), rss_caps, uhw_outlen)) {
resp.rss_caps.rx_hash_function =
MLX5_RX_HASH_FUNC_TOEPLITZ;
resp.rss_caps.rx_hash_fields_mask =
@@ -941,9 +928,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
resp.response_length += sizeof(resp.rss_caps);
}
} else {
- if (field_avail(typeof(resp), tso_caps, uhw->outlen))
+ if (field_avail(typeof(resp), tso_caps, uhw_outlen))
resp.response_length += sizeof(resp.tso_caps);
- if (field_avail(typeof(resp), rss_caps, uhw->outlen))
+ if (field_avail(typeof(resp), rss_caps, uhw_outlen))
resp.response_length += sizeof(resp.rss_caps);
}
@@ -1011,6 +998,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
props->max_pi_fast_reg_page_list_len =
props->max_fast_reg_page_list_len / 2;
+ props->max_sgl_rd =
+ MLX5_CAP_GEN(mdev, max_sgl_for_optimized_performance);
get_atomic_caps_qp(dev, props);
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
@@ -1023,15 +1012,32 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- if (MLX5_CAP_GEN(mdev, pg))
+ if (dev->odp_caps.general_caps & IB_ODP_SUPPORT)
props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
props->odp_caps = dev->odp_caps;
+ if (!uhw) {
+ /* ODP for kernel QPs is not implemented for receive
+ * WQEs and SRQ WQEs
+ */
+ props->odp_caps.per_transport_caps.rc_odp_caps &=
+ ~(IB_ODP_SUPPORT_READ |
+ IB_ODP_SUPPORT_SRQ_RECV);
+ props->odp_caps.per_transport_caps.uc_odp_caps &=
+ ~(IB_ODP_SUPPORT_READ |
+ IB_ODP_SUPPORT_SRQ_RECV);
+ props->odp_caps.per_transport_caps.ud_odp_caps &=
+ ~(IB_ODP_SUPPORT_READ |
+ IB_ODP_SUPPORT_SRQ_RECV);
+ props->odp_caps.per_transport_caps.xrc_odp_caps &=
+ ~(IB_ODP_SUPPORT_READ |
+ IB_ODP_SUPPORT_SRQ_RECV);
+ }
}
if (MLX5_CAP_GEN(mdev, cd))
props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
- if (!mlx5_core_is_pf(mdev))
+ if (mlx5_core_is_vf(mdev))
props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
if (mlx5_ib_port_link_layer(ibdev, 1) ==
@@ -1066,7 +1072,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_MAX_CQ_PERIOD;
}
- if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
+ if (field_avail(typeof(resp), cqe_comp_caps, uhw_outlen)) {
resp.response_length += sizeof(resp.cqe_comp_caps);
if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) {
@@ -1084,7 +1090,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
- if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) &&
+ if (field_avail(typeof(resp), packet_pacing_caps, uhw_outlen) &&
raw_support) {
if (MLX5_CAP_QOS(mdev, packet_pacing) &&
MLX5_CAP_GEN(mdev, qos)) {
@@ -1103,7 +1109,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
- uhw->outlen)) {
+ uhw_outlen)) {
if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe))
resp.mlx5_ib_support_multi_pkt_send_wqes =
MLX5_IB_ALLOW_MPW;
@@ -1116,7 +1122,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
}
- if (field_avail(typeof(resp), flags, uhw->outlen)) {
+ if (field_avail(typeof(resp), flags, uhw_outlen)) {
resp.response_length += sizeof(resp.flags);
if (MLX5_CAP_GEN(mdev, cqe_compression_128))
@@ -1132,8 +1138,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT;
}
- if (field_avail(typeof(resp), sw_parsing_caps,
- uhw->outlen)) {
+ if (field_avail(typeof(resp), sw_parsing_caps, uhw_outlen)) {
resp.response_length += sizeof(resp.sw_parsing_caps);
if (MLX5_CAP_ETH(mdev, swp)) {
resp.sw_parsing_caps.sw_parsing_offloads |=
@@ -1153,7 +1158,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
- if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen) &&
+ if (field_avail(typeof(resp), striding_rq_caps, uhw_outlen) &&
raw_support) {
resp.response_length += sizeof(resp.striding_rq_caps);
if (MLX5_CAP_GEN(mdev, striding_rq)) {
@@ -1161,8 +1166,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES;
resp.striding_rq_caps.max_single_stride_log_num_of_bytes =
MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES;
- resp.striding_rq_caps.min_single_wqe_log_num_of_strides =
- MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
+ if (MLX5_CAP_GEN(dev->mdev, ext_stride_num_range))
+ resp.striding_rq_caps
+ .min_single_wqe_log_num_of_strides =
+ MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
+ else
+ resp.striding_rq_caps
+ .min_single_wqe_log_num_of_strides =
+ MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
resp.striding_rq_caps.max_single_wqe_log_num_of_strides =
MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES;
resp.striding_rq_caps.supported_qpts =
@@ -1170,8 +1181,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
- if (field_avail(typeof(resp), tunnel_offloads_caps,
- uhw->outlen)) {
+ if (field_avail(typeof(resp), tunnel_offloads_caps, uhw_outlen)) {
resp.response_length += sizeof(resp.tunnel_offloads_caps);
if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan))
resp.tunnel_offloads_caps |=
@@ -1192,7 +1202,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP;
}
- if (uhw->outlen) {
+ if (uhw_outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
if (err)
@@ -1808,7 +1818,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
return -EINVAL;
resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
- if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
+ if (dev->wc_support)
resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
resp.cache_line_size = cache_line_size();
resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
@@ -1867,10 +1877,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
if (err)
goto out_sys_pages;
- if (ibdev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)
- context->ibucontext.invalidate_range =
- &mlx5_ib_invalidate_range;
-
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
err = mlx5_ib_devx_create(dev, true);
if (err < 0)
@@ -1999,11 +2005,6 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_bfreg_info *bfregi;
- /* All umem's must be destroyed before destroying the ucontext. */
- mutex_lock(&ibcontext->per_mm_list_lock);
- WARN_ON(!list_empty(&ibcontext->per_mm_list));
- mutex_unlock(&ibcontext->per_mm_list_lock);
-
bfregi = &context->bfregi;
mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
@@ -2089,6 +2090,31 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
virt_to_page(dev->mdev->clock_info));
}
+static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
+{
+ struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
+ struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
+ struct mlx5_var_table *var_table = &dev->var_table;
+ struct mlx5_ib_dm *mdm;
+
+ switch (mentry->mmap_flag) {
+ case MLX5_IB_MMAP_TYPE_MEMIC:
+ mdm = container_of(mentry, struct mlx5_ib_dm, mentry);
+ mlx5_cmd_dealloc_memic(&dev->dm, mdm->dev_addr,
+ mdm->size);
+ kfree(mdm);
+ break;
+ case MLX5_IB_MMAP_TYPE_VAR:
+ mutex_lock(&var_table->bitmap_lock);
+ clear_bit(mentry->page_idx, var_table->bitmap);
+ mutex_unlock(&var_table->bitmap_lock);
+ kfree(mentry);
+ break;
+ default:
+ WARN_ON(true);
+ }
+}
+
static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
struct vm_area_struct *vma,
struct mlx5_ib_ucontext *context)
@@ -2177,7 +2203,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE,
- prot);
+ prot, NULL);
if (err) {
mlx5_ib_err(dev,
"rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n",
@@ -2201,25 +2227,67 @@ free_bfreg:
return err;
}
-static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+static int add_dm_mmap_entry(struct ib_ucontext *context,
+ struct mlx5_ib_dm *mdm,
+ u64 address)
+{
+ mdm->mentry.mmap_flag = MLX5_IB_MMAP_TYPE_MEMIC;
+ mdm->mentry.address = address;
+ return rdma_user_mmap_entry_insert_range(
+ context, &mdm->mentry.rdma_entry,
+ mdm->size,
+ MLX5_IB_MMAP_DEVICE_MEM << 16,
+ (MLX5_IB_MMAP_DEVICE_MEM << 16) + (1UL << 16) - 1);
+}
+
+static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma)
+{
+ unsigned long idx;
+ u8 command;
+
+ command = get_command(vma->vm_pgoff);
+ idx = get_extended_index(vma->vm_pgoff);
+
+ return (command << 16 | idx);
+}
+
+static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
+ struct vm_area_struct *vma,
+ struct ib_ucontext *ucontext)
{
- struct mlx5_ib_ucontext *mctx = to_mucontext(context);
- struct mlx5_ib_dev *dev = to_mdev(context->device);
- u16 page_idx = get_extended_index(vma->vm_pgoff);
- size_t map_size = vma->vm_end - vma->vm_start;
- u32 npages = map_size >> PAGE_SHIFT;
+ struct mlx5_user_mmap_entry *mentry;
+ struct rdma_user_mmap_entry *entry;
+ unsigned long pgoff;
+ pgprot_t prot;
phys_addr_t pfn;
+ int ret;
- if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
- page_idx + npages)
+ pgoff = mlx5_vma_to_pgoff(vma);
+ entry = rdma_user_mmap_entry_get_pgoff(ucontext, pgoff);
+ if (!entry)
return -EINVAL;
- pfn = ((dev->mdev->bar_addr +
- MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
- PAGE_SHIFT) +
- page_idx;
- return rdma_user_mmap_io(context, vma, pfn, map_size,
- pgprot_writecombine(vma->vm_page_prot));
+ mentry = to_mmmap(entry);
+ pfn = (mentry->address >> PAGE_SHIFT);
+ if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR)
+ prot = pgprot_noncached(vma->vm_page_prot);
+ else
+ prot = pgprot_writecombine(vma->vm_page_prot);
+ ret = rdma_user_mmap_io(ucontext, vma, pfn,
+ entry->npages * PAGE_SIZE,
+ prot,
+ entry);
+ rdma_user_mmap_entry_put(&mentry->rdma_entry);
+ return ret;
+}
+
+static u64 mlx5_entry_to_mmap_offset(struct mlx5_user_mmap_entry *entry)
+{
+ u64 cmd = (entry->rdma_entry.start_pgoff >> 16) & 0xFFFF;
+ u64 index = entry->rdma_entry.start_pgoff & 0xFFFF;
+
+ return (((index >> 8) << 16) | (cmd << MLX5_IB_MMAP_CMD_SHIFT) |
+ (index & 0xFF)) << PAGE_SHIFT;
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
@@ -2257,15 +2325,13 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
PAGE_SHIFT;
return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
PAGE_SIZE,
- pgprot_noncached(vma->vm_page_prot));
+ pgprot_noncached(vma->vm_page_prot),
+ NULL);
case MLX5_IB_MMAP_CLOCK_INFO:
return mlx5_ib_mmap_clock_info_page(dev, vma, context);
- case MLX5_IB_MMAP_DEVICE_MEM:
- return dm_mmap(ibcontext, vma);
-
default:
- return -EINVAL;
+ return mlx5_ib_mmap_offset(dev, vma, ibcontext);
}
return 0;
@@ -2280,6 +2346,7 @@ static inline int check_dm_type_support(struct mlx5_ib_dev *dev,
return -EOPNOTSUPP;
break;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
if (!capable(CAP_SYS_RAWIO) ||
!capable(CAP_NET_RAW))
return -EPERM;
@@ -2300,8 +2367,9 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
{
struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
u64 start_offset;
- u32 page_idx;
+ u16 page_idx;
int err;
+ u64 address;
dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
@@ -2310,28 +2378,30 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
if (err)
return err;
- page_idx = (dm->dev_addr - pci_resource_start(dm_db->dev->pdev, 0) -
- MLX5_CAP64_DEV_MEM(dm_db->dev, memic_bar_start_addr)) >>
- PAGE_SHIFT;
+ address = dm->dev_addr & PAGE_MASK;
+ err = add_dm_mmap_entry(ctx, dm, address);
+ if (err)
+ goto err_dealloc;
+ page_idx = dm->mentry.rdma_entry.start_pgoff & 0xFFFF;
err = uverbs_copy_to(attrs,
MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
- &page_idx, sizeof(page_idx));
+ &page_idx,
+ sizeof(page_idx));
if (err)
- goto err_dealloc;
+ goto err_copy;
start_offset = dm->dev_addr & ~PAGE_MASK;
err = uverbs_copy_to(attrs,
MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
&start_offset, sizeof(start_offset));
if (err)
- goto err_dealloc;
-
- bitmap_set(to_mucontext(ctx)->dm_pages, page_idx,
- DIV_ROUND_UP(dm->size, PAGE_SIZE));
+ goto err_copy;
return 0;
+err_copy:
+ rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
err_dealloc:
mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
@@ -2344,20 +2414,20 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
struct uverbs_attr_bundle *attrs,
int type)
{
- struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
+ struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev;
u64 act_size;
int err;
/* Allocation size must a multiple of the basic block size
* and a power of 2.
*/
- act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev));
+ act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dev));
act_size = roundup_pow_of_two(act_size);
dm->size = act_size;
- err = mlx5_cmd_alloc_sw_icm(dm_db, type, act_size,
- to_mucontext(ctx)->devx_uid, &dm->dev_addr,
- &dm->icm_dm.obj_id);
+ err = mlx5_dm_sw_icm_alloc(dev, type, act_size,
+ to_mucontext(ctx)->devx_uid, &dm->dev_addr,
+ &dm->icm_dm.obj_id);
if (err)
return err;
@@ -2365,9 +2435,9 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
&dm->dev_addr, sizeof(dm->dev_addr));
if (err)
- mlx5_cmd_dealloc_sw_icm(dm_db, type, dm->size,
- to_mucontext(ctx)->devx_uid,
- dm->dev_addr, dm->icm_dm.obj_id);
+ mlx5_dm_sw_icm_dealloc(dev, type, dm->size,
+ to_mucontext(ctx)->devx_uid, dm->dev_addr,
+ dm->icm_dm.obj_id);
return err;
}
@@ -2407,8 +2477,14 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
attrs);
break;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ err = handle_alloc_dm_sw_icm(context, dm,
+ attr, attrs,
+ MLX5_SW_ICM_TYPE_STEERING);
+ break;
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
- err = handle_alloc_dm_sw_icm(context, dm, attr, attrs, type);
+ err = handle_alloc_dm_sw_icm(context, dm,
+ attr, attrs,
+ MLX5_SW_ICM_TYPE_HEADER_MODIFY);
break;
default:
err = -EOPNOTSUPP;
@@ -2428,30 +2504,25 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
- struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm;
+ struct mlx5_core_dev *dev = to_mdev(ibdm->device)->mdev;
struct mlx5_ib_dm *dm = to_mdm(ibdm);
- u32 page_idx;
int ret;
switch (dm->type) {
case MLX5_IB_UAPI_DM_TYPE_MEMIC:
- ret = mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
+ rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
+ return 0;
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING,
+ dm->size, ctx->devx_uid, dm->dev_addr,
+ dm->icm_dm.obj_id);
if (ret)
return ret;
-
- page_idx = (dm->dev_addr -
- pci_resource_start(dm_db->dev->pdev, 0) -
- MLX5_CAP64_DEV_MEM(dm_db->dev,
- memic_bar_start_addr)) >>
- PAGE_SHIFT;
- bitmap_clear(ctx->dm_pages, page_idx,
- DIV_ROUND_UP(dm->size, PAGE_SIZE));
break;
- case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
- ret = mlx5_cmd_dealloc_sw_icm(dm_db, dm->type, dm->size,
- ctx->devx_uid, dm->dev_addr,
- dm->icm_dm.obj_id);
+ ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_HEADER_MODIFY,
+ dm->size, ctx->devx_uid, dm->dev_addr,
+ dm->icm_dm.obj_id);
if (ret)
return ret;
break;
@@ -2646,7 +2717,8 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
return -EINVAL;
action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- action->modify_id = maction->flow_action_raw.action_id;
+ action->modify_hdr =
+ maction->flow_action_raw.modify_hdr;
return 0;
}
if (maction->flow_action_raw.sub_type ==
@@ -2663,8 +2735,8 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
return -EINVAL;
action->action |=
MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
- action->reformat_id =
- maction->flow_action_raw.action_id;
+ action->pkt_reformat =
+ maction->flow_action_raw.pkt_reformat;
return 0;
}
/* fall through */
@@ -3239,12 +3311,14 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
int num_entries, int num_groups,
u32 flags)
{
+ struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
- ft = mlx5_create_auto_grouped_flow_table(ns, priority,
- num_entries,
- num_groups,
- 0, flags);
+ ft_attr.prio = priority;
+ ft_attr.max_fte = num_entries;
+ ft_attr.flags = flags;
+ ft_attr.autogroup.max_num_groups = num_groups;
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
if (IS_ERR(ft))
return ERR_CAST(ft);
@@ -3544,10 +3618,6 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
INIT_LIST_HEAD(&handler->list);
- if (dst) {
- memcpy(&dest_arr[0], dst, sizeof(*dst));
- dest_num++;
- }
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(dev->mdev, spec,
@@ -3560,6 +3630,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
ib_flow += ((union ib_flow_spec *)ib_flow)->size;
}
+ if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
+ memcpy(&dest_arr[0], dst, sizeof(*dst));
+ dest_num++;
+ }
+
if (!flow_is_multicast_only(flow_attr))
set_underlay_qp(dev, spec, underlay_qpn);
@@ -3600,10 +3675,8 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
- if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) {
+ if (!dest_num)
rule_dst = NULL;
- dest_num = 0;
- }
} else {
if (is_egress)
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
@@ -3967,6 +4040,11 @@ _get_flow_table(struct mlx5_ib_dev *dev,
esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
priority = FDB_BYPASS_PATH;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+ log_max_ft_size));
+ priority = fs_matcher->priority;
}
max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
@@ -3981,6 +4059,8 @@ _get_flow_table(struct mlx5_ib_dev *dev,
prio = &dev->flow_db->egress_prios[priority];
else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB)
prio = &dev->flow_db->fdb;
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX)
+ prio = &dev->flow_db->rdma_rx[priority];
if (!prio)
return ERR_PTR(-EINVAL);
@@ -4728,7 +4808,6 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port)
struct ib_device_attr *dprops = NULL;
struct ib_port_attr *pprops = NULL;
int err = -ENOMEM;
- struct ib_udata uhw = {.inlen = 0, .outlen = 0};
pprops = kzalloc(sizeof(*pprops), GFP_KERNEL);
if (!pprops)
@@ -4738,7 +4817,7 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port)
if (!dprops)
goto out;
- err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
+ err = mlx5_ib_query_device(&dev->ib_dev, dprops, NULL);
if (err) {
mlx5_ib_warn(dev, "query_device failed %d\n", err);
goto out;
@@ -5134,8 +5213,7 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep);
- if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
- immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
@@ -5238,11 +5316,9 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
{
int err;
- if (MLX5_CAP_GEN(dev->mdev, roce)) {
- err = mlx5_nic_vport_enable_roce(dev->mdev);
- if (err)
- return err;
- }
+ err = mlx5_nic_vport_enable_roce(dev->mdev);
+ if (err)
+ return err;
err = mlx5_eth_lag_init(dev);
if (err)
@@ -5251,8 +5327,7 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
return 0;
err_disable_roce:
- if (MLX5_CAP_GEN(dev->mdev, roce))
- mlx5_nic_vport_disable_roce(dev->mdev);
+ mlx5_nic_vport_disable_roce(dev->mdev);
return err;
}
@@ -5260,8 +5335,7 @@ err_disable_roce:
static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
{
mlx5_eth_lag_cleanup(dev);
- if (MLX5_CAP_GEN(dev->mdev, roce))
- mlx5_nic_vport_disable_roce(dev->mdev);
+ mlx5_nic_vport_disable_roce(dev->mdev);
}
struct mlx5_ib_counter {
@@ -5313,6 +5387,14 @@ static const struct mlx5_ib_counter extended_err_cnts[] = {
INIT_Q_COUNTER(req_cqe_flush_error),
};
+static const struct mlx5_ib_counter roce_accl_cnts[] = {
+ INIT_Q_COUNTER(roce_adp_retrans),
+ INIT_Q_COUNTER(roce_adp_retrans_to),
+ INIT_Q_COUNTER(roce_slow_restart),
+ INIT_Q_COUNTER(roce_slow_restart_cnps),
+ INIT_Q_COUNTER(roce_slow_restart_trans),
+};
+
#define INIT_EXT_PPCNT_COUNTER(_name) \
{ .name = #_name, .offset = \
MLX5_BYTE_OFF(ppcnt_reg, \
@@ -5322,11 +5404,21 @@ static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
};
+static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
+{
+ return MLX5_ESWITCH_MANAGER(mdev) &&
+ mlx5_ib_eswitch_mode(mdev->priv.eswitch) ==
+ MLX5_ESWITCH_OFFLOADS;
+}
+
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
+ int num_cnt_ports;
int i;
- for (i = 0; i < dev->num_ports; i++) {
+ num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
+
+ for (i = 0; i < num_cnt_ports; i++) {
if (dev->port[i].cnts.set_id_valid)
mlx5_core_dealloc_q_counter(dev->mdev,
dev->port[i].cnts.set_id);
@@ -5351,6 +5443,9 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
num_counters += ARRAY_SIZE(extended_err_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, roce_accl))
+ num_counters += ARRAY_SIZE(roce_accl_cnts);
+
cnts->num_q_counters = num_counters;
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
@@ -5411,6 +5506,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
}
}
+ if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
+ for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
+ names[j] = roce_accl_cnts[i].name;
+ offsets[j] = roce_accl_cnts[i].offset;
+ }
+ }
+
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
names[j] = cong_cnts[i].name;
@@ -5428,13 +5530,15 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
+ int num_cnt_ports;
int err = 0;
int i;
bool is_shared;
is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
+ num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
- for (i = 0; i < dev->num_ports; i++) {
+ for (i = 0; i < num_cnt_ports; i++) {
err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
if (err)
goto err_alloc;
@@ -5454,7 +5558,6 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
}
dev->port[i].cnts.set_id_valid = true;
}
-
return 0;
err_alloc:
@@ -5462,25 +5565,50 @@ err_alloc:
return err;
}
+static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
+ u8 port_num)
+{
+ return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
+ &dev->port[port_num].cnts;
+}
+
+/**
+ * mlx5_ib_get_counters_id - Returns counters id to use for device+port
+ * @dev: Pointer to mlx5 IB device
+ * @port_num: Zero based port number
+ *
+ * mlx5_ib_get_counters_id() Returns counters set id to use for given
+ * device port combination in switchdev and non switchdev mode of the
+ * parent device.
+ */
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
+{
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
+
+ return cnts->set_id;
+}
+
static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
u8 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_port *port = &dev->port[port_num - 1];
+ const struct mlx5_ib_counters *cnts;
+ bool is_switchdev = is_mdev_switchdev_mode(dev->mdev);
- /* We support only per port stats */
- if (port_num == 0)
+ if ((is_switchdev && port_num) || (!is_switchdev && !port_num))
return NULL;
- return rdma_alloc_hw_stats_struct(port->cnts.names,
- port->cnts.num_q_counters +
- port->cnts.num_cong_counters +
- port->cnts.num_ext_ppcnt_counters,
+ cnts = get_counters(dev, port_num - 1);
+
+ return rdma_alloc_hw_stats_struct(cnts->names,
+ cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
- struct mlx5_ib_port *port,
+ const struct mlx5_ib_counters *cnts,
struct rdma_hw_stats *stats,
u16 set_id)
{
@@ -5497,8 +5625,8 @@ static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
if (ret)
goto free;
- for (i = 0; i < port->cnts.num_q_counters; i++) {
- val = *(__be32 *)(out + port->cnts.offsets[i]);
+ for (i = 0; i < cnts->num_q_counters; i++) {
+ val = *(__be32 *)(out + cnts->offsets[i]);
stats->value[i] = (u64)be32_to_cpu(val);
}
@@ -5508,10 +5636,10 @@ free:
}
static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
- struct mlx5_ib_port *port,
- struct rdma_hw_stats *stats)
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats)
{
- int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters;
+ int offset = cnts->num_q_counters + cnts->num_cong_counters;
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
int ret, i;
void *out;
@@ -5524,12 +5652,10 @@ static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
if (ret)
goto free;
- for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) {
+ for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
stats->value[i + offset] =
be64_to_cpup((__be64 *)(out +
- port->cnts.offsets[i + offset]));
- }
-
+ cnts->offsets[i + offset]));
free:
kvfree(out);
return ret;
@@ -5540,7 +5666,7 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
u8 port_num, int index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_port *port = &dev->port[port_num - 1];
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
struct mlx5_core_dev *mdev;
int ret, num_counters;
u8 mdev_port_num;
@@ -5548,18 +5674,17 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
if (!stats)
return -EINVAL;
- num_counters = port->cnts.num_q_counters +
- port->cnts.num_cong_counters +
- port->cnts.num_ext_ppcnt_counters;
+ num_counters = cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
/* q_counters are per IB device, query the master mdev */
- ret = mlx5_ib_query_q_counters(dev->mdev, port, stats,
- port->cnts.set_id);
+ ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
if (ret)
return ret;
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- ret = mlx5_ib_query_ext_ppcnt_counters(dev, port, stats);
+ ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
if (ret)
return ret;
}
@@ -5576,10 +5701,10 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
}
ret = mlx5_lag_query_cong_counters(dev->mdev,
stats->value +
- port->cnts.num_q_counters,
- port->cnts.num_cong_counters,
- port->cnts.offsets +
- port->cnts.num_q_counters);
+ cnts->num_q_counters,
+ cnts->num_cong_counters,
+ cnts->offsets +
+ cnts->num_q_counters);
mlx5_ib_put_native_port_mdev(dev, port_num);
if (ret)
@@ -5594,20 +5719,22 @@ static struct rdma_hw_stats *
mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
{
struct mlx5_ib_dev *dev = to_mdev(counter->device);
- struct mlx5_ib_port *port = &dev->port[counter->port - 1];
+ const struct mlx5_ib_counters *cnts =
+ get_counters(dev, counter->port - 1);
/* Q counters are in the beginning of all counters */
- return rdma_alloc_hw_stats_struct(port->cnts.names,
- port->cnts.num_q_counters,
+ return rdma_alloc_hw_stats_struct(cnts->names,
+ cnts->num_q_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
{
struct mlx5_ib_dev *dev = to_mdev(counter->device);
- struct mlx5_ib_port *port = &dev->port[counter->port - 1];
+ const struct mlx5_ib_counters *cnts =
+ get_counters(dev, counter->port - 1);
- return mlx5_ib_query_q_counters(dev->mdev, port,
+ return mlx5_ib_query_q_counters(dev->mdev, cnts,
counter->stats, counter->id);
}
@@ -5664,11 +5791,10 @@ static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
{
- if (!dev->delay_drop.dbg)
+ if (!dev->delay_drop.dir_debugfs)
return;
- debugfs_remove_recursive(dev->delay_drop.dbg->dir_debugfs);
- kfree(dev->delay_drop.dbg);
- dev->delay_drop.dbg = NULL;
+ debugfs_remove_recursive(dev->delay_drop.dir_debugfs);
+ dev->delay_drop.dir_debugfs = NULL;
}
static void cancel_delay_drop(struct mlx5_ib_dev *dev)
@@ -5719,52 +5845,22 @@ static const struct file_operations fops_delay_drop_timeout = {
.read = delay_drop_timeout_read,
};
-static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
+static void delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
{
- struct mlx5_ib_dbg_delay_drop *dbg;
+ struct dentry *root;
if (!mlx5_debugfs_root)
- return 0;
-
- dbg = kzalloc(sizeof(*dbg), GFP_KERNEL);
- if (!dbg)
- return -ENOMEM;
-
- dev->delay_drop.dbg = dbg;
-
- dbg->dir_debugfs =
- debugfs_create_dir("delay_drop",
- dev->mdev->priv.dbg_root);
- if (!dbg->dir_debugfs)
- goto out_debugfs;
-
- dbg->events_cnt_debugfs =
- debugfs_create_atomic_t("num_timeout_events", 0400,
- dbg->dir_debugfs,
- &dev->delay_drop.events_cnt);
- if (!dbg->events_cnt_debugfs)
- goto out_debugfs;
-
- dbg->rqs_cnt_debugfs =
- debugfs_create_atomic_t("num_rqs", 0400,
- dbg->dir_debugfs,
- &dev->delay_drop.rqs_cnt);
- if (!dbg->rqs_cnt_debugfs)
- goto out_debugfs;
-
- dbg->timeout_debugfs =
- debugfs_create_file("timeout", 0600,
- dbg->dir_debugfs,
- &dev->delay_drop,
- &fops_delay_drop_timeout);
- if (!dbg->timeout_debugfs)
- goto out_debugfs;
+ return;
- return 0;
+ root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root);
+ dev->delay_drop.dir_debugfs = root;
-out_debugfs:
- delay_drop_debugfs_cleanup(dev);
- return -ENOMEM;
+ debugfs_create_atomic_t("num_timeout_events", 0400, root,
+ &dev->delay_drop.events_cnt);
+ debugfs_create_atomic_t("num_rqs", 0400, root,
+ &dev->delay_drop.rqs_cnt);
+ debugfs_create_file("timeout", 0600, root, &dev->delay_drop,
+ &fops_delay_drop_timeout);
}
static void init_delay_drop(struct mlx5_ib_dev *dev)
@@ -5780,11 +5876,9 @@ static void init_delay_drop(struct mlx5_ib_dev *dev)
atomic_set(&dev->delay_drop.rqs_cnt, 0);
atomic_set(&dev->delay_drop.events_cnt, 0);
- if (delay_drop_debugfs_init(dev))
- mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
+ delay_drop_debugfs_init(dev);
}
-/* The mlx5_ib_multiport_mutex should be held when calling this function */
static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
{
@@ -5794,6 +5888,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
int err;
int i;
+ lockdep_assert_held(&mlx5_ib_multiport_mutex);
+
mlx5_ib_cleanup_cong_debugfs(ibdev, port_num);
spin_lock(&port->mp.mpi_lock);
@@ -5843,13 +5939,14 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
ibdev->port[port_num].roce.last_port_state = IB_PORT_DOWN;
}
-/* The mlx5_ib_multiport_mutex should be held when calling this function */
static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
{
u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
int err;
+ lockdep_assert_held(&mlx5_ib_multiport_mutex);
+
spin_lock(&ibdev->port[port_num].mp.mpi_lock);
if (ibdev->port[port_num].mp.mpi) {
mlx5_ib_dbg(ibdev, "port %d already affiliated.\n",
@@ -5991,6 +6088,145 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
+static int var_obj_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_user_mmap_entry *obj = uobject->object;
+
+ rdma_user_mmap_entry_remove(&obj->rdma_entry);
+ return 0;
+}
+
+static struct mlx5_user_mmap_entry *
+alloc_var_entry(struct mlx5_ib_ucontext *c)
+{
+ struct mlx5_user_mmap_entry *entry;
+ struct mlx5_var_table *var_table;
+ u32 page_idx;
+ int err;
+
+ var_table = &to_mdev(c->ibucontext.device)->var_table;
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_lock(&var_table->bitmap_lock);
+ page_idx = find_first_zero_bit(var_table->bitmap,
+ var_table->num_var_hw_entries);
+ if (page_idx >= var_table->num_var_hw_entries) {
+ err = -ENOSPC;
+ mutex_unlock(&var_table->bitmap_lock);
+ goto end;
+ }
+
+ set_bit(page_idx, var_table->bitmap);
+ mutex_unlock(&var_table->bitmap_lock);
+
+ entry->address = var_table->hw_start_addr +
+ (page_idx * var_table->stride_size);
+ entry->page_idx = page_idx;
+ entry->mmap_flag = MLX5_IB_MMAP_TYPE_VAR;
+
+ err = rdma_user_mmap_entry_insert_range(
+ &c->ibucontext, &entry->rdma_entry, var_table->stride_size,
+ MLX5_IB_MMAP_OFFSET_START << 16,
+ (MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1);
+ if (err)
+ goto err_insert;
+
+ return entry;
+
+err_insert:
+ mutex_lock(&var_table->bitmap_lock);
+ clear_bit(page_idx, var_table->bitmap);
+ mutex_unlock(&var_table->bitmap_lock);
+end:
+ kfree(entry);
+ return ERR_PTR(err);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_VAR_OBJ_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE);
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_user_mmap_entry *entry;
+ u64 mmap_offset;
+ u32 length;
+ int err;
+
+ c = to_mucontext(ib_uverbs_get_ucontext(attrs));
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+
+ entry = alloc_var_entry(c);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
+
+ mmap_offset = mlx5_entry_to_mmap_offset(entry);
+ length = entry->rdma_entry.npages * PAGE_SIZE;
+ uobj->object = entry;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET,
+ &mmap_offset, sizeof(mmap_offset));
+ if (err)
+ goto err;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID,
+ &entry->page_idx, sizeof(entry->page_idx));
+ if (err)
+ goto err;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH,
+ &length, sizeof(length));
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ rdma_user_mmap_entry_remove(&entry->rdma_entry);
+ return err;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_VAR_OBJ_ALLOC,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_VAR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_VAR_OBJ_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_VAR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_VAR,
+ UVERBS_TYPE_ALLOC_IDR(var_obj_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_ALLOC),
+ &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_DESTROY));
+
+static bool var_is_supported(struct ib_device *device)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+
+ return (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q);
+}
+
ADD_UVERBS_ATTRIBUTES_SIMPLE(
mlx5_ib_dm,
UVERBS_OBJECT_DM,
@@ -6013,14 +6249,14 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
enum mlx5_ib_uapi_flow_action_flags));
static const struct uapi_definition mlx5_ib_defs[] = {
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
-#endif
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
&mlx5_ib_flow_action),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
+ UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
{}
};
@@ -6096,38 +6332,17 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
- struct mlx5_core_dev *mdev = dev->mdev;
-
mlx5_ib_cleanup_multiport_master(dev);
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- srcu_barrier(&dev->mr_srcu);
- cleanup_srcu_struct(&dev->mr_srcu);
- }
+ WARN_ON(!xa_empty(&dev->odp_mkeys));
+ cleanup_srcu_struct(&dev->odp_srcu);
+ WARN_ON(!xa_empty(&dev->sig_mrs));
WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
-
- WARN_ON(dev->dm.steering_sw_icm_alloc_blocks &&
- !bitmap_empty(
- dev->dm.steering_sw_icm_alloc_blocks,
- BIT(MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size) -
- MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev))));
-
- kfree(dev->dm.steering_sw_icm_alloc_blocks);
-
- WARN_ON(dev->dm.header_modify_sw_icm_alloc_blocks &&
- !bitmap_empty(dev->dm.header_modify_sw_icm_alloc_blocks,
- BIT(MLX5_CAP_DEV_MEM(
- mdev, log_header_modify_sw_icm_size) -
- MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev))));
-
- kfree(dev->dm.header_modify_sw_icm_alloc_blocks);
}
static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
- u64 header_modify_icm_blocks = 0;
- u64 steering_icm_blocks = 0;
int err;
int i;
@@ -6139,6 +6354,8 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
dev->port[i].roce.last_port_state = IB_PORT_DOWN;
}
+ mlx5_ib_internal_fill_odp_caps(dev);
+
err = mlx5_ib_init_multiport_master(dev);
if (err)
return err;
@@ -6171,52 +6388,18 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
-
- if (MLX5_CAP_GEN_64(mdev, general_obj_types) &
- MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) {
- if (MLX5_CAP64_DEV_MEM(mdev, steering_sw_icm_start_address)) {
- steering_icm_blocks =
- BIT(MLX5_CAP_DEV_MEM(mdev,
- log_steering_sw_icm_size) -
- MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev));
-
- dev->dm.steering_sw_icm_alloc_blocks =
- kcalloc(BITS_TO_LONGS(steering_icm_blocks),
- sizeof(unsigned long), GFP_KERNEL);
- if (!dev->dm.steering_sw_icm_alloc_blocks)
- goto err_mp;
- }
-
- if (MLX5_CAP64_DEV_MEM(mdev,
- header_modify_sw_icm_start_address)) {
- header_modify_icm_blocks = BIT(
- MLX5_CAP_DEV_MEM(
- mdev, log_header_modify_sw_icm_size) -
- MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev));
-
- dev->dm.header_modify_sw_icm_alloc_blocks =
- kcalloc(BITS_TO_LONGS(header_modify_icm_blocks),
- sizeof(unsigned long), GFP_KERNEL);
- if (!dev->dm.header_modify_sw_icm_alloc_blocks)
- goto err_dm;
- }
- }
+ xa_init(&dev->odp_mkeys);
+ xa_init(&dev->sig_mrs);
spin_lock_init(&dev->dm.lock);
dev->dm.dev = mdev;
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- err = init_srcu_struct(&dev->mr_srcu);
- if (err)
- goto err_dm;
- }
+ err = init_srcu_struct(&dev->odp_srcu);
+ if (err)
+ goto err_mp;
return 0;
-err_dm:
- kfree(dev->dm.steering_sw_icm_alloc_blocks);
- kfree(dev->dm.header_modify_sw_icm_alloc_blocks);
-
err_mp:
mlx5_ib_cleanup_multiport_master(dev);
@@ -6273,12 +6456,16 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.disassociate_ucontext = mlx5_ib_disassociate_ucontext,
.drain_rq = mlx5_ib_drain_rq,
.drain_sq = mlx5_ib_drain_sq,
+ .enable_driver = mlx5_ib_enable_driver,
+ .fill_res_entry = mlx5_ib_fill_res_entry,
+ .fill_stat_entry = mlx5_ib_fill_stat_entry,
.get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = mlx5_ib_get_dma_mr,
.get_link_layer = mlx5_ib_port_link_layer,
.map_mr_sg = mlx5_ib_map_mr_sg,
.map_mr_sg_pi = mlx5_ib_map_mr_sg_pi,
.mmap = mlx5_ib_mmap,
+ .mmap_free = mlx5_ib_mmap_free,
.modify_cq = mlx5_ib_modify_cq,
.modify_device = mlx5_ib_modify_device,
.modify_port = mlx5_ib_modify_port,
@@ -6319,6 +6506,7 @@ static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = {
static const struct ib_device_ops mlx5_ib_dev_sriov_ops = {
.get_vf_config = mlx5_ib_get_vf_config,
+ .get_vf_guid = mlx5_ib_get_vf_guid,
.get_vf_stats = mlx5_ib_get_vf_stats,
.set_vf_guid = mlx5_ib_set_vf_guid,
.set_vf_link_state = mlx5_ib_set_vf_link_state,
@@ -6340,6 +6528,35 @@ static const struct ib_device_ops mlx5_ib_dev_dm_ops = {
.reg_dm_mr = mlx5_ib_reg_dm_mr,
};
+static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_var_table *var_table = &dev->var_table;
+ u8 log_doorbell_bar_size;
+ u8 log_doorbell_stride;
+ u64 bar_size;
+
+ log_doorbell_bar_size = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
+ log_doorbell_bar_size);
+ log_doorbell_stride = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
+ log_doorbell_stride);
+ var_table->hw_start_addr = dev->mdev->bar_addr +
+ MLX5_CAP64_DEV_VDPA_EMULATION(mdev,
+ doorbell_bar_offset);
+ bar_size = (1ULL << log_doorbell_bar_size) * 4096;
+ var_table->stride_size = 1ULL << log_doorbell_stride;
+ var_table->num_var_hw_entries = div64_u64(bar_size, var_table->stride_size);
+ mutex_init(&var_table->bitmap_lock);
+ var_table->bitmap = bitmap_zalloc(var_table->num_var_hw_entries,
+ GFP_KERNEL);
+ return (var_table->bitmap) ? 0 : -ENOMEM;
+}
+
+static void mlx5_ib_stage_caps_cleanup(struct mlx5_ib_dev *dev)
+{
+ bitmap_free(dev->var_table.bitmap);
+}
+
static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
@@ -6427,6 +6644,13 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
mutex_init(&dev->lb.mutex);
+ if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) {
+ err = mlx5_ib_init_var_table(dev);
+ if (err)
+ return err;
+ }
+
dev->ib_dev.use_cq_dim = true;
return 0;
@@ -6448,7 +6672,7 @@ static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = {
.query_port = mlx5_ib_rep_query_port,
};
-static int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev)
{
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops);
return 0;
@@ -6488,7 +6712,7 @@ static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
mlx5_remove_netdev_notifier(dev, port_num);
}
-static int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_raw_eth_roce_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
@@ -6504,7 +6728,7 @@ static int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
return err;
}
-static void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_stage_raw_eth_roce_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_stage_common_roce_cleanup(dev);
}
@@ -6563,8 +6787,6 @@ static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
{
- mlx5_ib_internal_fill_odp_caps(dev);
-
return mlx5_ib_odp_init_one(dev);
}
@@ -6716,10 +6938,24 @@ static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
}
}
+int mlx5_ib_enable_driver(struct ib_device *dev)
+{
+ struct mlx5_ib_dev *mdev = to_mdev(dev);
+ int ret;
+
+ ret = mlx5_ib_test_wc(mdev);
+ mlx5_ib_dbg(mdev, "Write-Combining %s",
+ mdev->wc_support ? "supported" : "not supported");
+
+ return ret;
+}
+
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage)
{
+ dev->ib_active = false;
+
/* Number of stages to cleanup */
while (stage) {
stage--;
@@ -6765,7 +7001,7 @@ static const struct mlx5_ib_profile pf_profile = {
mlx5_ib_stage_flow_db_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
- NULL),
+ mlx5_ib_stage_caps_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
mlx5_ib_stage_non_default_cb,
NULL),
@@ -6813,7 +7049,7 @@ static const struct mlx5_ib_profile pf_profile = {
mlx5_ib_stage_delay_drop_cleanup),
};
-const struct mlx5_ib_profile uplink_rep_profile = {
+const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
@@ -6822,13 +7058,13 @@ const struct mlx5_ib_profile uplink_rep_profile = {
mlx5_ib_stage_flow_db_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
- NULL),
+ mlx5_ib_stage_caps_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
- mlx5_ib_stage_rep_non_default_cb,
+ mlx5_ib_stage_raw_eth_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
- mlx5_ib_stage_rep_roce_init,
- mlx5_ib_stage_rep_roce_cleanup),
+ mlx5_ib_stage_raw_eth_roce_init,
+ mlx5_ib_stage_raw_eth_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_SRQ,
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
@@ -6904,6 +7140,7 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
+ const struct mlx5_ib_profile *profile;
enum rdma_link_layer ll;
struct mlx5_ib_dev *dev;
int port_type_cap;
@@ -6932,14 +7169,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->port = kcalloc(num_ports, sizeof(*dev->port),
GFP_KERNEL);
if (!dev->port) {
- ib_dealloc_device((struct ib_device *)dev);
+ ib_dealloc_device(&dev->ib_dev);
return NULL;
}
dev->mdev = mdev;
dev->num_ports = num_ports;
- return __mlx5_ib_add(dev, &pf_profile);
+ if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_enabled(mdev))
+ profile = &raw_eth_profile;
+ else
+ profile = &pf_profile;
+
+ return __mlx5_ib_add(dev, profile);
}
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
@@ -6959,6 +7201,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
mlx5_ib_unbind_slave_port(mpi->ibdev, mpi);
list_del(&mpi->list);
mutex_unlock(&mlx5_ib_multiport_mutex);
+ kfree(mpi);
return;
}
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index fe1a76d8531c..b90a3649e7d1 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -34,6 +34,7 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
+#include <linux/jiffies.h>
/* @umem: umem object to scan
* @addr: ib virtual address requested by the user
@@ -56,18 +57,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
struct scatterlist *sg;
int entry;
- if (umem->is_odp) {
- unsigned int page_shift = to_ib_umem_odp(umem)->page_shift;
-
- *ncont = ib_umem_page_count(umem);
- *count = *ncont << (page_shift - PAGE_SHIFT);
- *shift = page_shift;
- if (order)
- *order = ilog2(roundup_pow_of_two(*ncont));
-
- return;
- }
-
addr = addr >> PAGE_SHIFT;
tmp = (unsigned long)addr;
m = find_first_bit(&tmp, BITS_PER_LONG);
@@ -112,18 +101,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
*count = i;
}
-static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
-{
- u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
-
- if (umem_dma & ODP_READ_ALLOWED_BIT)
- mtt_entry |= MLX5_IB_MTT_READ;
- if (umem_dma & ODP_WRITE_ALLOWED_BIT)
- mtt_entry |= MLX5_IB_MTT_WRITE;
-
- return mtt_entry;
-}
-
/*
* Populate the given array with bus addresses from the umem.
*
@@ -150,19 +127,6 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
struct scatterlist *sg;
int entry;
- if (umem->is_odp) {
- WARN_ON(shift != 0);
- WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
-
- for (i = 0; i < num_pages; ++i) {
- dma_addr_t pa =
- to_ib_umem_odp(umem)->dma_list[offset + i];
-
- pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
- }
- return;
- }
-
i = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> PAGE_SHIFT;
@@ -228,3 +192,201 @@ int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
*offset = buf_off >> ilog2(off_size);
return 0;
}
+
+#define WR_ID_BF 0xBF
+#define WR_ID_END 0xBAD
+#define TEST_WC_NUM_WQES 255
+#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100)
+static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id,
+ bool signaled)
+{
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_wqe_ctrl_seg *ctrl;
+ struct mlx5_bf *bf = &qp->bf;
+ __be32 mmio_wqe[16] = {};
+ unsigned long flags;
+ unsigned int idx;
+ int i;
+
+ if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
+ return -EIO;
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
+ ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
+
+ memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg));
+ ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
+ ctrl->opmod_idx_opcode =
+ cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP);
+ ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) |
+ (qp->trans_qp.base.mqp.qpn << 8));
+
+ qp->sq.wrid[idx] = wr_id;
+ qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP;
+ qp->sq.wqe_head[idx] = qp->sq.head + 1;
+ qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg),
+ MLX5_SEND_WQE_BB);
+ qp->sq.w_list[idx].next = qp->sq.cur_post;
+ qp->sq.head++;
+
+ memcpy(mmio_wqe, ctrl, sizeof(*ctrl));
+ ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |=
+ MLX5_WQE_CTRL_CQ_UPDATE;
+
+ /* Make sure that descriptors are written before
+ * updating doorbell record and ringing the doorbell
+ */
+ wmb();
+
+ qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
+
+ /* Make sure doorbell record is visible to the HCA before
+ * we hit doorbell
+ */
+ wmb();
+ for (i = 0; i < 8; i++)
+ mlx5_write64(&mmio_wqe[i * 2],
+ bf->bfreg->map + bf->offset + i * 8);
+
+ bf->offset ^= bf->buf_size;
+
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+ return 0;
+}
+
+static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq)
+{
+ int ret;
+ struct ib_wc wc = {};
+ unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES;
+
+ do {
+ ret = ib_poll_cq(cq, 1, &wc);
+ if (ret < 0 || wc.status)
+ return ret < 0 ? ret : -EINVAL;
+ if (ret)
+ break;
+ } while (!time_after(jiffies, end));
+
+ if (!ret)
+ return -ETIMEDOUT;
+
+ if (wc.wr_id != WR_ID_BF)
+ ret = 0;
+
+ return ret;
+}
+
+static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp)
+{
+ int err, i;
+
+ for (i = 0; i < TEST_WC_NUM_WQES; i++) {
+ err = post_send_nop(dev, qp, WR_ID_BF, false);
+ if (err)
+ return err;
+ }
+
+ return post_send_nop(dev, qp, WR_ID_END, true);
+}
+
+int mlx5_ib_test_wc(struct mlx5_ib_dev *dev)
+{
+ struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 };
+ int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
+ struct ib_qp_init_attr qp_init_attr = {
+ .cap = { .max_send_wr = TEST_WC_NUM_WQES },
+ .qp_type = IB_QPT_UD,
+ .sq_sig_type = IB_SIGNAL_REQ_WR,
+ .create_flags = MLX5_IB_QP_CREATE_WC_TEST,
+ };
+ struct ib_qp_attr qp_attr = { .port_num = 1 };
+ struct ib_device *ibdev = &dev->ib_dev;
+ struct ib_qp *qp;
+ struct ib_cq *cq;
+ struct ib_pd *pd;
+ int ret;
+
+ if (!MLX5_CAP_GEN(dev->mdev, bf))
+ return 0;
+
+ if (!dev->mdev->roce.roce_en &&
+ port_type_cap == MLX5_CAP_PORT_TYPE_ETH) {
+ if (mlx5_core_is_pf(dev->mdev))
+ dev->wc_support = true;
+ return 0;
+ }
+
+ ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false);
+ if (ret)
+ goto print_err;
+
+ if (!dev->wc_bfreg.wc)
+ goto out1;
+
+ pd = ib_alloc_pd(ibdev, 0);
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
+ goto out1;
+ }
+
+ cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ goto out2;
+ }
+
+ qp_init_attr.recv_cq = cq;
+ qp_init_attr.send_cq = cq;
+ qp = ib_create_qp(pd, &qp_init_attr);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto out3;
+ }
+
+ qp_attr.qp_state = IB_QPS_INIT;
+ ret = ib_modify_qp(qp, &qp_attr,
+ IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX |
+ IB_QP_QKEY);
+ if (ret)
+ goto out4;
+
+ qp_attr.qp_state = IB_QPS_RTR;
+ ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+ if (ret)
+ goto out4;
+
+ qp_attr.qp_state = IB_QPS_RTS;
+ ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
+ if (ret)
+ goto out4;
+
+ ret = test_wc_do_send(dev, qp);
+ if (ret < 0)
+ goto out4;
+
+ ret = test_wc_poll_cq_result(dev, cq);
+ if (ret > 0) {
+ dev->wc_support = true;
+ ret = 0;
+ }
+
+out4:
+ ib_destroy_qp(qp);
+out3:
+ ib_destroy_cq(cq);
+out2:
+ ib_dealloc_pd(pd);
+out1:
+ mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg);
+print_err:
+ if (ret)
+ mlx5_ib_err(
+ dev,
+ "Error %d while trying to test write-combining support\n",
+ ret);
+ return ret;
+}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index f6a53455bf8b..d9bffcc93587 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -72,6 +72,11 @@
#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size)
enum {
+ MLX5_IB_MMAP_OFFSET_START = 9,
+ MLX5_IB_MMAP_OFFSET_END = 255,
+};
+
+enum {
MLX5_IB_MMAP_CMD_SHIFT = 8,
MLX5_IB_MMAP_CMD_MASK = 0xff,
};
@@ -118,6 +123,11 @@ enum {
MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN,
};
+enum mlx5_ib_mmap_type {
+ MLX5_IB_MMAP_TYPE_MEMIC = 1,
+ MLX5_IB_MMAP_TYPE_VAR = 2,
+};
+
#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) \
(MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
@@ -135,7 +145,6 @@ struct mlx5_ib_ucontext {
u32 tdn;
u64 lib_caps;
- DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
u16 devx_uid;
/* For RoCE LAG TX affinity */
atomic_t tx_port_affinity;
@@ -200,6 +209,7 @@ struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS];
struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS];
struct mlx5_ib_flow_prio fdb;
+ struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT];
struct mlx5_flow_table *lag_demux_ft;
/* Protect flow steering bypass flow tables
* when add/del flow rules.
@@ -246,12 +256,8 @@ struct mlx5_ib_flow_db {
* These flags are intended for internal use by the mlx5_ib driver, and they
* rely on the range reserved for that use in the ib_qp_create_flags enum.
*/
-
-/* Create a UD QP whose source QP number is 1 */
-static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void)
-{
- return IB_QP_CREATE_RESERVED_START;
-}
+#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START
+#define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1)
struct wr_list {
u16 opcode;
@@ -294,6 +300,7 @@ enum mlx5_ib_wq_flags {
#define MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES 16
#define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6
#define MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES 13
+#define MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES 3
struct mlx5_ib_rwq {
struct ib_wq ibwq;
@@ -558,6 +565,13 @@ enum mlx5_ib_mtt_access_flags {
MLX5_IB_MTT_WRITE = (1 << 1),
};
+struct mlx5_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ u8 mmap_flag;
+ u64 address;
+ u32 page_idx;
+};
+
struct mlx5_ib_dm {
struct ib_dm ibdm;
phys_addr_t dev_addr;
@@ -569,6 +583,7 @@ struct mlx5_ib_dm {
} icm_dm;
/* other dm types specific params should be added here */
};
+ struct mlx5_user_mmap_entry mentry;
};
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
@@ -584,6 +599,9 @@ struct mlx5_ib_dm {
IB_ACCESS_REMOTE_READ |\
IB_ZERO_BASED)
+#define mlx5_update_odp_stats(mr, counter_name, value) \
+ atomic64_add(value, &((mr)->odp_stats.counter_name))
+
struct mlx5_ib_mr {
struct ib_mr ibmr;
void *descs;
@@ -605,7 +623,6 @@ struct mlx5_ib_mr {
struct mlx5_ib_dev *dev;
u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
struct mlx5_core_sig_ctx *sig;
- int live;
void *descs_alloc;
int access_flags; /* Needed for rereg MR */
@@ -617,10 +634,18 @@ struct mlx5_ib_mr {
u64 data_iova;
u64 pi_iova;
- atomic_t num_leaf_free;
- wait_queue_head_t q_leaf_free;
+ /* For ODP and implicit */
+ atomic_t num_deferred_work;
+ struct xarray implicit_children;
+ union {
+ struct rcu_head rcu;
+ struct list_head elm;
+ struct work_struct work;
+ } odp_destroy;
+ struct ib_odp_counters odp_stats;
+ bool is_odp_implicit;
+
struct mlx5_async_work cb_work;
- atomic_t num_pending_prefetch;
};
static inline bool is_odp_mr(struct mlx5_ib_mr *mr)
@@ -638,7 +663,6 @@ struct mlx5_ib_mw {
struct mlx5_ib_devx_mr {
struct mlx5_core_mkey mmkey;
int ndescs;
- struct rcu_head rcu;
};
struct mlx5_ib_umr_context {
@@ -792,13 +816,6 @@ enum {
MLX5_MAX_DELAY_DROP_TIMEOUT_MS = 100,
};
-struct mlx5_ib_dbg_delay_drop {
- struct dentry *dir_debugfs;
- struct dentry *rqs_cnt_debugfs;
- struct dentry *events_cnt_debugfs;
- struct dentry *timeout_debugfs;
-};
-
struct mlx5_ib_delay_drop {
struct mlx5_ib_dev *dev;
struct work_struct delay_drop_work;
@@ -808,7 +825,7 @@ struct mlx5_ib_delay_drop {
bool activate;
atomic_t events_cnt;
atomic_t rqs_cnt;
- struct mlx5_ib_dbg_delay_drop *dbg;
+ struct dentry *dir_debugfs;
};
enum mlx5_ib_stages {
@@ -868,7 +885,10 @@ struct mlx5_ib_flow_action {
struct {
struct mlx5_ib_dev *dev;
u32 sub_type;
- u32 action_id;
+ union {
+ struct mlx5_modify_hdr *modify_hdr;
+ struct mlx5_pkt_reformat *pkt_reformat;
+ };
} flow_action_raw;
};
};
@@ -881,8 +901,6 @@ struct mlx5_dm {
*/
spinlock_t lock;
DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
- unsigned long *steering_sw_icm_alloc_blocks;
- unsigned long *header_modify_sw_icm_alloc_blocks;
};
struct mlx5_read_counters_attr {
@@ -948,6 +966,15 @@ struct mlx5_devx_event_table {
struct xarray event_xa;
};
+struct mlx5_var_table {
+ /* serialize updating the bitmap */
+ struct mutex bitmap_lock;
+ unsigned long *bitmap;
+ u64 hw_start_addr;
+ u32 stride_size;
+ u64 num_var_hw_entries;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
@@ -956,7 +983,11 @@ struct mlx5_ib_dev {
/* serialize update of capability mask
*/
struct mutex cap_mask_mutex;
- bool ib_active;
+ u8 ib_active:1;
+ u8 fill_delay:1;
+ u8 is_rep:1;
+ u8 lag_active:1;
+ u8 wc_support:1;
struct umr_common umrc;
/* sync used page count stats
*/
@@ -965,7 +996,6 @@ struct mlx5_ib_dev {
struct timer_list delay_timer;
/* Prevents soft lock on massive reg MRs */
struct mutex slow_path_mutex;
- int fill_delay;
struct ib_odp_caps odp_caps;
u64 odp_max_size;
struct mlx5_ib_pf_eq odp_pf_eq;
@@ -974,7 +1004,9 @@ struct mlx5_ib_dev {
* Sleepable RCU that prevents destruction of MRs while they are still
* being used by a page fault handler.
*/
- struct srcu_struct mr_srcu;
+ struct srcu_struct odp_srcu;
+ struct xarray odp_mkeys;
+
u32 null_mkey;
struct mlx5_ib_flow_db *flow_db;
/* protect resources needed as part of reset flow */
@@ -983,11 +1015,10 @@ struct mlx5_ib_dev {
/* Array with num_ports elements */
struct mlx5_ib_port *port;
struct mlx5_sq_bfreg bfreg;
+ struct mlx5_sq_bfreg wc_bfreg;
struct mlx5_sq_bfreg fp_bfreg;
struct mlx5_ib_delay_drop delay_drop;
const struct mlx5_ib_profile *profile;
- bool is_rep;
- int lag_active;
struct mlx5_ib_lb_state lb;
u8 umr_fence;
@@ -998,6 +1029,9 @@ struct mlx5_ib_dev {
struct mlx5_srq_table srq_table;
struct mlx5_async_ctx async_ctx;
struct mlx5_devx_event_table devx_event_table;
+ struct mlx5_var_table var_table;
+
+ struct xarray sig_mrs;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1094,6 +1128,13 @@ to_mflow_act(struct ib_flow_action *ibact)
return container_of(ibact, struct mlx5_ib_flow_action, ib_action);
}
+static inline struct mlx5_user_mmap_entry *
+to_mmmap(struct rdma_user_mmap_entry *rdma_entry)
+{
+ return container_of(rdma_entry,
+ struct mlx5_user_mmap_entry, rdma_entry);
+}
+
int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
struct ib_udata *udata, unsigned long virt,
struct mlx5_db *db);
@@ -1129,12 +1170,12 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr);
int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
-int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
- int buflen, size_t *bc);
-int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
- int buflen, size_t *bc);
-int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index,
- void *buffer, int buflen, size_t *bc);
+int mlx5_ib_read_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc);
+int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc);
+int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc);
int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
@@ -1161,6 +1202,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
struct ib_udata *udata,
int access_flags);
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
+void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr);
int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 length, u64 virt_addr, int access_flags,
struct ib_pd *pd, struct ib_udata *udata);
@@ -1178,9 +1220,8 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
unsigned int *meta_sg_offset);
int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index);
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
struct ib_udata *udata);
int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
@@ -1222,6 +1263,8 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry);
void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
+int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr);
+
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
@@ -1234,7 +1277,6 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
struct ib_rwq_ind_table_init_attr *init_attr,
struct ib_udata *udata);
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
-bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev);
struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_dm_alloc_attr *attr,
@@ -1250,11 +1292,9 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
-void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
- unsigned long end);
void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
-void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
- size_t nentries, struct mlx5_ib_mr *mr, int flags);
+void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags);
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
@@ -1270,9 +1310,8 @@ static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {}
static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
-static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
- size_t nentries, struct mlx5_ib_mr *mr,
- int flags) {}
+static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags) {}
static inline int
mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
@@ -1281,11 +1320,10 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
{
return -EOPNOTSUPP;
}
-static inline void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp,
- unsigned long start,
- unsigned long end){};
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+extern const struct mmu_interval_notifier_ops mlx5_mn_ops;
+
/* Needed for rep profile */
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
@@ -1299,6 +1337,9 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
u8 port, int state);
int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
u8 port, struct ifla_vf_stats *stats);
+int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_guid *node_guid,
+ struct ifla_vf_guid *port_guid);
int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
u64 guid, int type);
@@ -1333,23 +1374,26 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev,
u8 *native_port_num);
void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
u8 port_num);
+int mlx5_ib_fill_res_entry(struct sk_buff *msg,
+ struct rdma_restrack_entry *res);
+int mlx5_ib_fill_stat_entry(struct sk_buff *msg,
+ struct rdma_restrack_entry *res);
+
+extern const struct uapi_definition mlx5_ib_devx_defs[];
+extern const struct uapi_definition mlx5_ib_flow_defs[];
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev);
void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev);
-const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
-extern const struct uapi_definition mlx5_ib_devx_defs[];
-extern const struct uapi_definition mlx5_ib_flow_defs[];
struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
struct mlx5_flow_context *flow_context,
struct mlx5_flow_act *flow_act, u32 counter_id,
void *cmd_in, int inlen, int dest_id, int dest_type);
bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
-bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id);
-int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
+bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id);
void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
#else
static inline int
@@ -1475,4 +1519,25 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
bool dyn_bfreg);
int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num);
+
+static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
+ bool do_modify_atomic, int access_flags)
+{
+ if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
+ return false;
+
+ if (do_modify_atomic &&
+ MLX5_CAP_GEN(dev->mdev, atomic) &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+ return false;
+
+ if (access_flags & IB_ACCESS_RELAXED_ORDERING)
+ return false;
+
+ return true;
+}
+
+int mlx5_ib_enable_driver(struct ib_device *dev);
+int mlx5_ib_test_wc(struct mlx5_ib_dev *dev);
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index b74fad08412f..6fa0a83c19de 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -50,7 +50,6 @@ enum {
static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int mr_cache_max_order(struct mlx5_ib_dev *dev);
-static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
{
@@ -59,13 +58,9 @@ static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
- int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
+ WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- /* Wait until all page fault handlers using the mr complete. */
- synchronize_srcu(&dev->mr_srcu);
-
- return err;
+ return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
static int order2idx(struct mlx5_ib_dev *dev, int order)
@@ -84,32 +79,6 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
}
-static void update_odp_mr(struct mlx5_ib_mr *mr)
-{
- if (is_odp_mr(mr)) {
- /*
- * This barrier prevents the compiler from moving the
- * setting of umem->odp_data->private to point to our
- * MR, before reg_umr finished, to ensure that the MR
- * initialization have finished before starting to
- * handle invalidations.
- */
- smp_wmb();
- to_ib_umem_odp(mr->umem)->private = mr;
- /*
- * Make sure we will see the new
- * umem->odp_data->private value in the invalidation
- * routines, before we can get page faults on the
- * MR. Page faults can happen once we put the MR in
- * the tree, below this line. Without the barrier,
- * there can be a fault handling and an invalidation
- * before umem->odp_data->private == mr is visible to
- * the invalidation handler.
- */
- smp_wmb();
- }
-}
-
static void reg_mr_callback(int status, struct mlx5_async_work *context)
{
struct mlx5_ib_mr *mr =
@@ -120,8 +89,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
struct mlx5_cache_ent *ent = &cache->ent[c];
u8 key;
unsigned long flags;
- struct xarray *mkeys = &dev->mdev->priv.mkey_table;
- int err;
spin_lock_irqsave(&ent->lock, flags);
ent->pending--;
@@ -148,13 +115,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
ent->size++;
spin_unlock_irqrestore(&ent->lock, flags);
- xa_lock_irqsave(mkeys, flags);
- err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key),
- &mr->mmkey, GFP_ATOMIC));
- xa_unlock_irqrestore(mkeys, flags);
- if (err)
- pr_err("Error inserting to mkey tree. 0x%x\n", -err);
-
if (!completion_done(&ent->compl))
complete(&ent->compl);
}
@@ -187,7 +147,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
break;
}
mr->order = ent->order;
- mr->allocated_from_cache = 1;
+ mr->allocated_from_cache = true;
mr->dev = dev;
MLX5_SET(mkc, mkc, free, 1);
@@ -244,9 +204,6 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- synchronize_srcu(&dev->mr_srcu);
-
list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
list_del(&mr->list);
kfree(mr);
@@ -454,7 +411,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
- return NULL;
+ return ERR_PTR(-EINVAL);
}
ent = &cache->ent[entry];
@@ -537,7 +494,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
c = order2idx(dev, mr->order);
WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES);
- if (unreg_umr(dev, mr)) {
+ if (mlx5_mr_cache_invalidate(mr)) {
mr->allocated_from_cache = false;
destroy_mkey(dev, mr);
ent = &cache->ent[c];
@@ -581,10 +538,6 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- synchronize_srcu(&dev->mr_srcu);
-#endif
-
list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
list_del(&mr->list);
kfree(mr);
@@ -705,6 +658,29 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
return 0;
}
+static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
+ struct ib_pd *pd)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+
+ MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
+ MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
+ MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
+ MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
+ MLX5_SET(mkc, mkc, relaxed_ordering_write,
+ !!(acc & IB_ACCESS_RELAXED_ORDERING));
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
+ MLX5_SET(mkc, mkc, relaxed_ordering_read,
+ !!(acc & IB_ACCESS_RELAXED_ORDERING));
+
+ MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET64(mkc, mkc, start_addr, start_addr);
+}
+
struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -728,16 +704,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
- MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
- MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
- MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
- MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
- MLX5_SET(mkc, mkc, lr, 1);
-
MLX5_SET(mkc, mkc, length64, 1);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET64(mkc, mkc, start_addr, 0);
+ set_mkc_access_pd_addr_fields(mkc, acc, 0, pd);
err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
if (err)
@@ -778,25 +746,43 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev)
return MLX5_MAX_UMR_SHIFT;
}
-static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
- u64 start, u64 length, int access_flags,
- struct ib_umem **umem, int *npages, int *page_shift,
- int *ncont, int *order)
+static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length,
+ int access_flags, struct ib_umem **umem, int *npages,
+ int *page_shift, int *ncont, int *order)
{
struct ib_umem *u;
- int err;
*umem = NULL;
- u = ib_umem_get(udata, start, length, access_flags, 0);
- err = PTR_ERR_OR_ZERO(u);
- if (err) {
- mlx5_ib_dbg(dev, "umem get failed (%d)\n", err);
- return err;
+ if (access_flags & IB_ACCESS_ON_DEMAND) {
+ struct ib_umem_odp *odp;
+
+ odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
+ &mlx5_mn_ops);
+ if (IS_ERR(odp)) {
+ mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
+ PTR_ERR(odp));
+ return PTR_ERR(odp);
+ }
+
+ u = &odp->umem;
+
+ *page_shift = odp->page_shift;
+ *ncont = ib_umem_odp_num_pages(odp);
+ *npages = *ncont << (*page_shift - PAGE_SHIFT);
+ if (order)
+ *order = ilog2(roundup_pow_of_two(*ncont));
+ } else {
+ u = ib_umem_get(&dev->ib_dev, start, length, access_flags);
+ if (IS_ERR(u)) {
+ mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
+ return PTR_ERR(u);
+ }
+
+ mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
+ page_shift, ncont, order);
}
- mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
- page_shift, ncont, order);
if (!*npages) {
mlx5_ib_warn(dev, "avoid zero region\n");
ib_umem_release(u);
@@ -890,36 +876,6 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(
return mr;
}
-static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
- void *xlt, int page_shift, size_t size,
- int flags)
-{
- struct mlx5_ib_dev *dev = mr->dev;
- struct ib_umem *umem = mr->umem;
-
- if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
- if (!umr_can_use_indirect_mkey(dev))
- return -EPERM;
- mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
- return npages;
- }
-
- npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
-
- if (!(flags & MLX5_IB_UPD_XLT_ZAP)) {
- __mlx5_ib_populate_pas(dev, umem, page_shift,
- idx, npages, xlt,
- MLX5_IB_MTT_PRESENT);
- /* Clear padding after the pages
- * brought from the umem.
- */
- memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0,
- size - npages * sizeof(struct mlx5_mtt));
- }
-
- return npages;
-}
-
#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
MLX5_UMR_MTT_ALIGNMENT)
#define MLX5_SPARE_UMR_CHUNK 0x10000
@@ -943,6 +899,7 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
size_t pages_mapped = 0;
size_t pages_to_map = 0;
size_t pages_iter = 0;
+ size_t size_to_map = 0;
gfp_t gfp;
bool use_emergency_page = false;
@@ -989,6 +946,15 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
goto free_xlt;
}
+ if (mr->umem->is_odp) {
+ if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
+
+ pages_to_map = min_t(size_t, pages_to_map, max_pages);
+ }
+ }
+
sg.addr = dma;
sg.lkey = dev->umrc.pd->local_dma_lkey;
@@ -1011,14 +977,22 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
pages_mapped < pages_to_map && !err;
pages_mapped += pages_iter, idx += pages_iter) {
npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
+ size_to_map = npages * desc_size;
dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
- npages = populate_xlt(mr, idx, npages, xlt,
- page_shift, size, flags);
-
+ if (mr->umem->is_odp) {
+ mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
+ } else {
+ __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx,
+ npages, xlt,
+ MLX5_IB_MTT_PRESENT);
+ /* Clear padding after the pages
+ * brought from the umem.
+ */
+ memset(xlt + size_to_map, 0, size - size_to_map);
+ }
dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
- sg.length = ALIGN(npages * desc_size,
- MLX5_UMR_MTT_ALIGNMENT);
+ sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
if (pages_mapped + pages_iter >= pages_to_map) {
if (flags & MLX5_IB_UPD_XLT_ENABLE)
@@ -1097,6 +1071,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, free, !populate);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
+ MLX5_SET(mkc, mkc, relaxed_ordering_write,
+ !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
+ MLX5_SET(mkc, mkc, relaxed_ordering_read,
+ !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
@@ -1177,16 +1157,8 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
- MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
- MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
- MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
- MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
- MLX5_SET(mkc, mkc, lr, 1);
-
MLX5_SET64(mkc, mkc, len, length);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET64(mkc, mkc, start_addr, start_addr);
+ set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd);
err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
if (err)
@@ -1277,6 +1249,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
length == U64_MAX) {
+ if (virt_addr != start)
+ return ERR_PTR(-EINVAL);
if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
return ERR_PTR(-EINVAL);
@@ -1287,15 +1261,13 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return &mr->ibmr;
}
- err = mr_umem_get(dev, udata, start, length, access_flags, &umem,
+ err = mr_umem_get(dev, start, length, access_flags, &umem,
&npages, &page_shift, &ncont, &order);
if (err < 0)
return ERR_PTR(err);
- use_umr = !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled) &&
- (!MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled) ||
- !MLX5_CAP_GEN(dev->mdev, atomic));
+ use_umr = mlx5_ib_can_use_umr(dev, true, access_flags);
if (order <= mr_cache_max_order(dev) && use_umr) {
mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
@@ -1330,8 +1302,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->umem = umem;
set_mr_fields(dev, mr, npages, length, access_flags);
- update_odp_mr(mr);
-
if (use_umr) {
int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
@@ -1347,9 +1317,16 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
}
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- mr->live = 1;
- atomic_set(&mr->num_pending_prefetch, 0);
+ if (is_odp_mr(mr)) {
+ to_ib_umem_odp(mr->umem)->private = mr;
+ atomic_set(&mr->num_deferred_work, 0);
+ err = xa_err(xa_store(&dev->odp_mkeys,
+ mlx5_base_mkey(mr->mmkey.key), &mr->mmkey,
+ GFP_KERNEL));
+ if (err) {
+ dereg_mr(dev, mr);
+ return ERR_PTR(err);
+ }
}
return &mr->ibmr;
@@ -1358,22 +1335,29 @@ error:
return ERR_PTR(err);
}
-static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+/**
+ * mlx5_mr_cache_invalidate - Fence all DMA on the MR
+ * @mr: The MR to fence
+ *
+ * Upon return the NIC will not be doing any DMA to the pages under the MR,
+ * and any DMA inprogress will be completed. Failure of this function
+ * indicates the HW has failed catastrophically.
+ */
+int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr)
{
- struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_umr_wr umrwr = {};
- if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return 0;
umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
umrwr.wr.opcode = MLX5_IB_WR_UMR;
- umrwr.pd = dev->umrc.pd;
+ umrwr.pd = mr->dev->umrc.pd;
umrwr.mkey = mr->mmkey.key;
umrwr.ignore_free_state = 1;
- return mlx5_ib_post_send_wait(dev, &umrwr);
+ return mlx5_ib_post_send_wait(mr->dev, &umrwr);
}
static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
@@ -1425,6 +1409,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
if (!mr->umem)
return -EINVAL;
+ if (is_odp_mr(mr))
+ return -EOPNOTSUPP;
+
if (flags & IB_MR_REREG_TRANS) {
addr = virt_addr;
len = length;
@@ -1441,19 +1428,19 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
flags |= IB_MR_REREG_TRANS;
ib_umem_release(mr->umem);
mr->umem = NULL;
- err = mr_umem_get(dev, udata, addr, len, access_flags,
- &mr->umem, &npages, &page_shift, &ncont,
- &order);
+ err = mr_umem_get(dev, addr, len, access_flags, &mr->umem,
+ &npages, &page_shift, &ncont, &order);
if (err)
goto err;
}
- if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
+ if (!mlx5_ib_can_use_umr(dev, true, access_flags) ||
+ (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) {
/*
* UMR can't be used - MKey needs to be replaced.
*/
if (mr->allocated_from_cache)
- err = unreg_umr(dev, mr);
+ err = mlx5_mr_cache_invalidate(mr);
else
err = destroy_mkey(dev, mr);
if (err)
@@ -1468,9 +1455,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
goto err;
}
- mr->allocated_from_cache = 0;
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- mr->live = 1;
+ mr->allocated_from_cache = false;
} else {
/*
* Send a UMR WQE
@@ -1499,7 +1484,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
set_mr_fields(dev, mr, npages, len, access_flags);
- update_odp_mr(mr);
return 0;
err:
@@ -1569,6 +1553,7 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
mr->sig->psv_wire.psv_idx))
mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
mr->sig->psv_wire.psv_idx);
+ xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key));
kfree(mr->sig);
mr->sig = NULL;
}
@@ -1584,53 +1569,20 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
int npages = mr->npages;
struct ib_umem *umem = mr->umem;
- if (is_odp_mr(mr)) {
- struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem);
-
- /* Prevent new page faults and
- * prefetch requests from succeeding
- */
- mr->live = 0;
-
- /* dequeue pending prefetch requests for the mr */
- if (atomic_read(&mr->num_pending_prefetch))
- flush_workqueue(system_unbound_wq);
- WARN_ON(atomic_read(&mr->num_pending_prefetch));
-
- /* Wait for all running page-fault handlers to finish. */
- synchronize_srcu(&dev->mr_srcu);
- /* Destroy all page mappings */
- if (umem_odp->page_list)
- mlx5_ib_invalidate_range(umem_odp,
- ib_umem_start(umem_odp),
- ib_umem_end(umem_odp));
- else
- mlx5_ib_free_implicit_mr(mr);
- /*
- * We kill the umem before the MR for ODP,
- * so that there will not be any invalidations in
- * flight, looking at the *mr struct.
- */
- ib_umem_release(umem);
- atomic_sub(npages, &dev->mdev->priv.reg_pages);
-
- /* Avoid double-freeing the umem. */
- umem = NULL;
- }
+ /* Stop all DMA */
+ if (is_odp_mr(mr))
+ mlx5_ib_fence_odp_mr(mr);
+ else
+ clean_mr(dev, mr);
- clean_mr(dev, mr);
+ if (mr->allocated_from_cache)
+ mlx5_mr_cache_free(dev, mr);
+ else
+ kfree(mr);
- /*
- * We should unregister the DMA address from the HCA before
- * remove the DMA mapping.
- */
- mlx5_mr_cache_free(dev, mr);
ib_umem_release(umem);
- if (umem)
- atomic_sub(npages, &dev->mdev->priv.reg_pages);
+ atomic_sub(npages, &dev->mdev->priv.reg_pages);
- if (!mr->allocated_from_cache)
- kfree(mr);
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
@@ -1642,6 +1594,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr);
}
+ if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) {
+ mlx5_ib_free_implicit_mr(mmr);
+ return 0;
+ }
+
dereg_mr(to_mdev(ibmr->device), mmr);
return 0;
@@ -1805,8 +1762,15 @@ static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
if (err)
goto err_free_mtt_mr;
+ err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
+ mr->sig, GFP_KERNEL));
+ if (err)
+ goto err_free_descs;
return 0;
+err_free_descs:
+ destroy_mkey(dev, mr);
+ mlx5_free_priv_descs(mr);
err_free_mtt_mr:
dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr);
mr->mtt_mr = NULL;
@@ -1959,9 +1923,19 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
}
}
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ err = xa_err(xa_store(&dev->odp_mkeys,
+ mlx5_base_mkey(mw->mmkey.key), &mw->mmkey,
+ GFP_KERNEL));
+ if (err)
+ goto free_mkey;
+ }
+
kfree(in);
return &mw->ibmw;
+free_mkey:
+ mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
free:
kfree(mw);
kfree(in);
@@ -1970,14 +1944,24 @@ free:
int mlx5_ib_dealloc_mw(struct ib_mw *mw)
{
+ struct mlx5_ib_dev *dev = to_mdev(mw->device);
struct mlx5_ib_mw *mmw = to_mmw(mw);
int err;
- err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
- &mmw->mmkey);
- if (!err)
- kfree(mmw);
- return err;
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key));
+ /*
+ * pagefault_single_data_segment() may be accessing mmw under
+ * SRCU if the user bound an ODP MR to this MW.
+ */
+ synchronize_srcu(&dev->odp_srcu);
+ }
+
+ err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
+ if (err)
+ return err;
+ kfree(mmw);
+ return 0;
}
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 1d257d1b3b0d..4216814ba871 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -93,152 +93,223 @@ struct mlx5_pagefault {
static u64 mlx5_imr_ksm_entries;
-static int check_parent(struct ib_umem_odp *odp,
- struct mlx5_ib_mr *parent)
+static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *imr, int flags)
{
- struct mlx5_ib_mr *mr = odp->private;
+ struct mlx5_klm *end = pklm + nentries;
- return mr && mr->parent == parent && !odp->dying;
+ if (flags & MLX5_IB_UPD_XLT_ZAP) {
+ for (; pklm != end; pklm++, idx++) {
+ pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
+ pklm->key = cpu_to_be32(imr->dev->null_mkey);
+ pklm->va = 0;
+ }
+ return;
+ }
+
+ /*
+ * The locking here is pretty subtle. Ideally the implicit_children
+ * xarray would be protected by the umem_mutex, however that is not
+ * possible. Instead this uses a weaker update-then-lock pattern:
+ *
+ * srcu_read_lock()
+ * xa_store()
+ * mutex_lock(umem_mutex)
+ * mlx5_ib_update_xlt()
+ * mutex_unlock(umem_mutex)
+ * destroy lkey
+ *
+ * ie any change the xarray must be followed by the locked update_xlt
+ * before destroying.
+ *
+ * The umem_mutex provides the acquire/release semantic needed to make
+ * the xa_store() visible to a racing thread. While SRCU is not
+ * technically required, using it gives consistent use of the SRCU
+ * locking around the xarray.
+ */
+ lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex);
+ lockdep_assert_held(&imr->dev->odp_srcu);
+
+ for (; pklm != end; pklm++, idx++) {
+ struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx);
+
+ pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
+ if (mtt) {
+ pklm->key = cpu_to_be32(mtt->ibmr.lkey);
+ pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE);
+ } else {
+ pklm->key = cpu_to_be32(imr->dev->null_mkey);
+ pklm->va = 0;
+ }
+ }
}
-static struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr)
+static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
{
- if (WARN_ON(!mr || !is_odp_mr(mr)))
- return NULL;
+ u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
+
+ if (umem_dma & ODP_READ_ALLOWED_BIT)
+ mtt_entry |= MLX5_IB_MTT_READ;
+ if (umem_dma & ODP_WRITE_ALLOWED_BIT)
+ mtt_entry |= MLX5_IB_MTT_WRITE;
- return to_ib_umem_odp(mr->umem)->per_mm;
+ return mtt_entry;
}
-static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
+static void populate_mtt(__be64 *pas, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags)
{
- struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent;
- struct ib_ucontext_per_mm *per_mm = odp->per_mm;
- struct rb_node *rb;
-
- down_read(&per_mm->umem_rwsem);
- while (1) {
- rb = rb_next(&odp->interval_tree.rb);
- if (!rb)
- goto not_found;
- odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
- if (check_parent(odp, parent))
- goto end;
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ dma_addr_t pa;
+ size_t i;
+
+ if (flags & MLX5_IB_UPD_XLT_ZAP)
+ return;
+
+ for (i = 0; i < nentries; i++) {
+ pa = odp->dma_list[idx + i];
+ pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
}
-not_found:
- odp = NULL;
-end:
- up_read(&per_mm->umem_rwsem);
- return odp;
}
-static struct ib_umem_odp *odp_lookup(u64 start, u64 length,
- struct mlx5_ib_mr *parent)
+void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags)
{
- struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(parent);
- struct ib_umem_odp *odp;
- struct rb_node *rb;
-
- down_read(&per_mm->umem_rwsem);
- odp = rbt_ib_umem_lookup(&per_mm->umem_tree, start, length);
- if (!odp)
- goto end;
-
- while (1) {
- if (check_parent(odp, parent))
- goto end;
- rb = rb_next(&odp->interval_tree.rb);
- if (!rb)
- goto not_found;
- odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
- if (ib_umem_start(odp) > start + length)
- goto not_found;
+ if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
+ populate_klm(xlt, idx, nentries, mr, flags);
+ } else {
+ populate_mtt(xlt, idx, nentries, mr, flags);
}
-not_found:
- odp = NULL;
-end:
- up_read(&per_mm->umem_rwsem);
- return odp;
}
-void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
- size_t nentries, struct mlx5_ib_mr *mr, int flags)
+static void dma_fence_odp_mr(struct mlx5_ib_mr *mr)
{
- struct ib_pd *pd = mr->ibmr.pd;
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct ib_umem_odp *odp;
- unsigned long va;
- int i;
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
- if (flags & MLX5_IB_UPD_XLT_ZAP) {
- for (i = 0; i < nentries; i++, pklm++) {
- pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
- pklm->key = cpu_to_be32(dev->null_mkey);
- pklm->va = 0;
- }
- return;
+ /* Ensure mlx5_ib_invalidate_range() will not touch the MR any more */
+ mutex_lock(&odp->umem_mutex);
+ if (odp->npages) {
+ mlx5_mr_cache_invalidate(mr);
+ ib_umem_odp_unmap_dma_pages(odp, ib_umem_start(odp),
+ ib_umem_end(odp));
+ WARN_ON(odp->npages);
}
+ odp->private = NULL;
+ mutex_unlock(&odp->umem_mutex);
- odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE,
- nentries * MLX5_IMR_MTT_SIZE, mr);
-
- for (i = 0; i < nentries; i++, pklm++) {
- pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
- va = (offset + i) * MLX5_IMR_MTT_SIZE;
- if (odp && odp->umem.address == va) {
- struct mlx5_ib_mr *mtt = odp->private;
-
- pklm->key = cpu_to_be32(mtt->ibmr.lkey);
- odp = odp_next(odp);
- } else {
- pklm->key = cpu_to_be32(dev->null_mkey);
- }
- mlx5_ib_dbg(dev, "[%d] va %lx key %x\n",
- i, va, be32_to_cpu(pklm->key));
+ if (!mr->allocated_from_cache) {
+ mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey);
+ WARN_ON(mr->descs);
}
}
-static void mr_leaf_free_action(struct work_struct *work)
+/*
+ * This must be called after the mr has been removed from implicit_children
+ * and the SRCU synchronized. NOTE: The MR does not necessarily have to be
+ * empty here, parallel page faults could have raced with the free process and
+ * added pages to it.
+ */
+static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt)
{
- struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
- int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
- struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
+ struct mlx5_ib_mr *imr = mr->parent;
+ struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
+ int srcu_key;
- mr->parent = NULL;
- synchronize_srcu(&mr->dev->mr_srcu);
+ /* implicit_child_mr's are not allowed to have deferred work */
+ WARN_ON(atomic_read(&mr->num_deferred_work));
- ib_umem_release(&odp->umem);
- if (imr->live)
- mlx5_ib_update_xlt(imr, idx, 1, 0,
+ if (need_imr_xlt) {
+ srcu_key = srcu_read_lock(&mr->dev->odp_srcu);
+ mutex_lock(&odp_imr->umem_mutex);
+ mlx5_ib_update_xlt(mr->parent, idx, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC);
+ mutex_unlock(&odp_imr->umem_mutex);
+ srcu_read_unlock(&mr->dev->odp_srcu, srcu_key);
+ }
+
+ dma_fence_odp_mr(mr);
+
+ mr->parent = NULL;
mlx5_mr_cache_free(mr->dev, mr);
+ ib_umem_odp_release(odp);
+ atomic_dec(&imr->num_deferred_work);
+}
+
+static void free_implicit_child_mr_work(struct work_struct *work)
+{
+ struct mlx5_ib_mr *mr =
+ container_of(work, struct mlx5_ib_mr, odp_destroy.work);
+
+ free_implicit_child_mr(mr, true);
+}
+
+static void free_implicit_child_mr_rcu(struct rcu_head *head)
+{
+ struct mlx5_ib_mr *mr =
+ container_of(head, struct mlx5_ib_mr, odp_destroy.rcu);
+
+ /* Freeing a MR is a sleeping operation, so bounce to a work queue */
+ INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work);
+ queue_work(system_unbound_wq, &mr->odp_destroy.work);
+}
+
+static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
+{
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
+ struct mlx5_ib_mr *imr = mr->parent;
+
+ xa_lock(&imr->implicit_children);
+ /*
+ * This can race with mlx5_ib_free_implicit_mr(), the first one to
+ * reach the xa lock wins the race and destroys the MR.
+ */
+ if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_ATOMIC) !=
+ mr)
+ goto out_unlock;
- if (atomic_dec_and_test(&imr->num_leaf_free))
- wake_up(&imr->q_leaf_free);
+ atomic_inc(&imr->num_deferred_work);
+ call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu,
+ free_implicit_child_mr_rcu);
+
+out_unlock:
+ xa_unlock(&imr->implicit_children);
}
-void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
- unsigned long end)
+static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
+ struct ib_umem_odp *umem_odp =
+ container_of(mni, struct ib_umem_odp, notifier);
struct mlx5_ib_mr *mr;
const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT /
sizeof(struct mlx5_mtt)) - 1;
u64 idx = 0, blk_start_idx = 0;
+ u64 invalidations = 0;
+ unsigned long start;
+ unsigned long end;
int in_block = 0;
u64 addr;
- if (!umem_odp) {
- pr_err("invalidation called on NULL umem or non-ODP umem\n");
- return;
- }
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+ mutex_lock(&umem_odp->umem_mutex);
+ mmu_interval_set_seq(mni, cur_seq);
+ /*
+ * If npages is zero then umem_odp->private may not be setup yet. This
+ * does not complete until after the first page is mapped for DMA.
+ */
+ if (!umem_odp->npages)
+ goto out;
mr = umem_odp->private;
- if (!mr || !mr->ibmr.pd)
- return;
-
- start = max_t(u64, ib_umem_start(umem_odp), start);
- end = min_t(u64, ib_umem_end(umem_odp), end);
+ start = max_t(u64, ib_umem_start(umem_odp), range->start);
+ end = min_t(u64, ib_umem_end(umem_odp), range->end);
/*
* Iteration one - zap the HW's MTTs. The notifiers_count ensures that
@@ -246,7 +317,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
* overwrite the same MTTs. Concurent invalidations might race us,
* but they will write 0s as well, so no difference in the end result.
*/
- mutex_lock(&umem_odp->umem_mutex);
for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) {
idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
/*
@@ -261,6 +331,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
blk_start_idx = idx;
in_block = 1;
}
+
+ /* Count page invalidations */
+ invalidations += idx - blk_start_idx + 1;
} else {
u64 umr_offset = idx & umr_block_mask;
@@ -278,7 +351,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
idx - blk_start_idx + 1, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
- mutex_unlock(&umem_odp->umem_mutex);
+
+ mlx5_update_odp_stats(mr, invalidations, invalidations);
+
/*
* We are now sure that the device will not access the
* memory. We can safely unmap it, and mark it as dirty if
@@ -287,21 +362,25 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
- if (unlikely(!umem_odp->npages && mr->parent &&
- !umem_odp->dying)) {
- WRITE_ONCE(umem_odp->dying, 1);
- atomic_inc(&mr->parent->num_leaf_free);
- schedule_work(&umem_odp->work);
- }
+ if (unlikely(!umem_odp->npages && mr->parent))
+ destroy_unused_implicit_child_mr(mr);
+out:
+ mutex_unlock(&umem_odp->umem_mutex);
+ return true;
}
+const struct mmu_interval_notifier_ops mlx5_mn_ops = {
+ .invalidate = mlx5_ib_invalidate_range,
+};
+
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
struct ib_odp_caps *caps = &dev->odp_caps;
memset(caps, 0, sizeof(*caps));
- if (!MLX5_CAP_GEN(dev->mdev, pg))
+ if (!MLX5_CAP_GEN(dev->mdev, pg) ||
+ !mlx5_ib_can_use_umr(dev, true, 0))
return;
caps->general_caps = IB_ODP_SUPPORT;
@@ -355,10 +434,9 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
MLX5_CAP_GEN(dev->mdev, null_mkey) &&
- MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
+ MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
+ !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled))
caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT;
-
- return;
}
static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
@@ -383,257 +461,225 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
wq_num, err);
}
-static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
- struct ib_umem *umem,
- bool ksm, int access_flags)
+static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
+ unsigned long idx)
{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem_odp *odp;
struct mlx5_ib_mr *mr;
+ struct mlx5_ib_mr *ret;
int err;
- mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY :
- MLX5_IMR_MTT_CACHE_ENTRY);
+ odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem),
+ idx * MLX5_IMR_MTT_SIZE,
+ MLX5_IMR_MTT_SIZE, &mlx5_mn_ops);
+ if (IS_ERR(odp))
+ return ERR_CAST(odp);
+ ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY);
if (IS_ERR(mr))
- return mr;
-
- mr->ibmr.pd = pd;
-
- mr->dev = dev;
- mr->access_flags = access_flags;
- mr->mmkey.iova = 0;
- mr->umem = umem;
-
- if (ksm) {
- err = mlx5_ib_update_xlt(mr, 0,
- mlx5_imr_ksm_entries,
- MLX5_KSM_PAGE_SHIFT,
- MLX5_IB_UPD_XLT_INDIRECT |
- MLX5_IB_UPD_XLT_ZAP |
- MLX5_IB_UPD_XLT_ENABLE);
-
- } else {
- err = mlx5_ib_update_xlt(mr, 0,
- MLX5_IMR_MTT_ENTRIES,
- PAGE_SHIFT,
- MLX5_IB_UPD_XLT_ZAP |
- MLX5_IB_UPD_XLT_ENABLE |
- MLX5_IB_UPD_XLT_ATOMIC);
- }
-
- if (err)
- goto fail;
+ goto out_umem;
+ mr->ibmr.pd = imr->ibmr.pd;
+ mr->access_flags = imr->access_flags;
+ mr->umem = &odp->umem;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
-
- mr->live = 1;
-
- mlx5_ib_dbg(dev, "key %x dev %p mr %p\n",
- mr->mmkey.key, dev->mdev, mr);
-
- return mr;
-
-fail:
- mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
- mlx5_mr_cache_free(dev, mr);
-
- return ERR_PTR(err);
-}
-
-static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
- u64 io_virt, size_t bcnt)
-{
- struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device);
- struct ib_umem_odp *odp, *result = NULL;
- struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
- u64 addr = io_virt & MLX5_IMR_MTT_MASK;
- int nentries = 0, start_idx = 0, ret;
- struct mlx5_ib_mr *mtt;
-
- mutex_lock(&odp_mr->umem_mutex);
- odp = odp_lookup(addr, 1, mr);
-
- mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n",
- io_virt, bcnt, addr, odp);
-
-next_mr:
- if (likely(odp)) {
- if (nentries)
- nentries++;
- } else {
- odp = ib_alloc_odp_umem(odp_mr, addr,
- MLX5_IMR_MTT_SIZE);
- if (IS_ERR(odp)) {
- mutex_unlock(&odp_mr->umem_mutex);
- return ERR_CAST(odp);
- }
-
- mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0,
- mr->access_flags);
- if (IS_ERR(mtt)) {
- mutex_unlock(&odp_mr->umem_mutex);
- ib_umem_release(&odp->umem);
- return ERR_CAST(mtt);
- }
-
- odp->private = mtt;
- mtt->umem = &odp->umem;
- mtt->mmkey.iova = addr;
- mtt->parent = mr;
- INIT_WORK(&odp->work, mr_leaf_free_action);
-
- if (!nentries)
- start_idx = addr >> MLX5_IMR_MTT_SHIFT;
- nentries++;
- }
-
- /* Return first odp if region not covered by single one */
- if (likely(!result))
- result = odp;
-
- addr += MLX5_IMR_MTT_SIZE;
- if (unlikely(addr < io_virt + bcnt)) {
- odp = odp_next(odp);
- if (odp && odp->umem.address != addr)
- odp = NULL;
- goto next_mr;
+ mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE;
+ mr->parent = imr;
+ odp->private = mr;
+
+ err = mlx5_ib_update_xlt(mr, 0,
+ MLX5_IMR_MTT_ENTRIES,
+ PAGE_SHIFT,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ENABLE);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto out_mr;
}
- if (unlikely(nentries)) {
- ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0,
- MLX5_IB_UPD_XLT_INDIRECT |
- MLX5_IB_UPD_XLT_ATOMIC);
- if (ret) {
- mlx5_ib_err(dev, "Failed to update PAS\n");
- result = ERR_PTR(ret);
+ /*
+ * Once the store to either xarray completes any error unwind has to
+ * use synchronize_srcu(). Avoid this with xa_reserve()
+ */
+ ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr,
+ GFP_KERNEL);
+ if (unlikely(ret)) {
+ if (xa_is_err(ret)) {
+ ret = ERR_PTR(xa_err(ret));
+ goto out_mr;
}
+ /*
+ * Another thread beat us to creating the child mr, use
+ * theirs.
+ */
+ goto out_mr;
}
- mutex_unlock(&odp_mr->umem_mutex);
- return result;
+ mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr);
+ return mr;
+
+out_mr:
+ mlx5_mr_cache_free(imr->dev, mr);
+out_umem:
+ ib_umem_odp_release(odp);
+ return ret;
}
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
struct ib_udata *udata,
int access_flags)
{
+ struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device);
+ struct ib_umem_odp *umem_odp;
struct mlx5_ib_mr *imr;
- struct ib_umem *umem;
+ int err;
- umem = ib_umem_get(udata, 0, 0, access_flags, 0);
- if (IS_ERR(umem))
- return ERR_CAST(umem);
+ umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags);
+ if (IS_ERR(umem_odp))
+ return ERR_CAST(umem_odp);
- imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags);
+ imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY);
if (IS_ERR(imr)) {
- ib_umem_release(umem);
- return ERR_CAST(imr);
+ err = PTR_ERR(imr);
+ goto out_umem;
}
- imr->umem = umem;
- init_waitqueue_head(&imr->q_leaf_free);
- atomic_set(&imr->num_leaf_free, 0);
- atomic_set(&imr->num_pending_prefetch, 0);
+ imr->ibmr.pd = &pd->ibpd;
+ imr->access_flags = access_flags;
+ imr->mmkey.iova = 0;
+ imr->umem = &umem_odp->umem;
+ imr->ibmr.lkey = imr->mmkey.key;
+ imr->ibmr.rkey = imr->mmkey.key;
+ imr->umem = &umem_odp->umem;
+ imr->is_odp_implicit = true;
+ atomic_set(&imr->num_deferred_work, 0);
+ xa_init(&imr->implicit_children);
+
+ err = mlx5_ib_update_xlt(imr, 0,
+ mlx5_imr_ksm_entries,
+ MLX5_KSM_PAGE_SHIFT,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ENABLE);
+ if (err)
+ goto out_mr;
+
+ err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key),
+ &imr->mmkey, GFP_KERNEL));
+ if (err)
+ goto out_mr;
+ mlx5_ib_dbg(dev, "key %x mr %p\n", imr->mmkey.key, imr);
return imr;
+out_mr:
+ mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
+ mlx5_mr_cache_free(dev, imr);
+out_umem:
+ ib_umem_odp_release(umem_odp);
+ return ERR_PTR(err);
}
-static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end,
- void *cookie)
+void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
{
- struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie;
+ struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
+ struct mlx5_ib_dev *dev = imr->dev;
+ struct list_head destroy_list;
+ struct mlx5_ib_mr *mtt;
+ struct mlx5_ib_mr *tmp;
+ unsigned long idx;
- if (mr->parent != imr)
- return 0;
+ INIT_LIST_HEAD(&destroy_list);
+
+ xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key));
+ /*
+ * This stops the SRCU protected page fault path from touching either
+ * the imr or any children. The page fault path can only reach the
+ * children xarray via the imr.
+ */
+ synchronize_srcu(&dev->odp_srcu);
+
+ xa_lock(&imr->implicit_children);
+ xa_for_each (&imr->implicit_children, idx, mtt) {
+ __xa_erase(&imr->implicit_children, idx);
+ list_add(&mtt->odp_destroy.elm, &destroy_list);
+ }
+ xa_unlock(&imr->implicit_children);
- ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
- ib_umem_end(umem_odp));
+ /*
+ * num_deferred_work can only be incremented inside the odp_srcu, or
+ * under xa_lock while the child is in the xarray. Thus at this point
+ * it is only decreasing, and all work holding it is now on the wq.
+ */
+ if (atomic_read(&imr->num_deferred_work)) {
+ flush_workqueue(system_unbound_wq);
+ WARN_ON(atomic_read(&imr->num_deferred_work));
+ }
- if (umem_odp->dying)
- return 0;
+ /*
+ * Fence the imr before we destroy the children. This allows us to
+ * skip updating the XLT of the imr during destroy of the child mkey
+ * the imr points to.
+ */
+ mlx5_mr_cache_invalidate(imr);
- WRITE_ONCE(umem_odp->dying, 1);
- atomic_inc(&imr->num_leaf_free);
- schedule_work(&umem_odp->work);
+ list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm)
+ free_implicit_child_mr(mtt, false);
- return 0;
+ mlx5_mr_cache_free(dev, imr);
+ ib_umem_odp_release(odp_imr);
}
-void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
+/**
+ * mlx5_ib_fence_odp_mr - Stop all access to the ODP MR
+ * @mr: to fence
+ *
+ * On return no parallel threads will be touching this MR and no DMA will be
+ * active.
+ */
+void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr)
{
- struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr);
+ /* Prevent new page faults and prefetch requests from succeeding */
+ xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key));
- down_read(&per_mm->umem_rwsem);
- rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0, ULLONG_MAX,
- mr_leaf_free, true, imr);
- up_read(&per_mm->umem_rwsem);
+ /* Wait for all running page-fault handlers to finish. */
+ synchronize_srcu(&mr->dev->odp_srcu);
- wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
+ if (atomic_read(&mr->num_deferred_work)) {
+ flush_workqueue(system_unbound_wq);
+ WARN_ON(atomic_read(&mr->num_deferred_work));
+ }
+
+ dma_fence_odp_mr(mr);
}
-#define MLX5_PF_FLAGS_PREFETCH BIT(0)
#define MLX5_PF_FLAGS_DOWNGRADE BIT(1)
-static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
- u64 io_virt, size_t bcnt, u32 *bytes_mapped,
- u32 flags)
+static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
+ u64 user_va, size_t bcnt, u32 *bytes_mapped,
+ u32 flags)
{
- int npages = 0, current_seq, page_shift, ret, np;
- struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
+ int page_shift, ret, np;
bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
- bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
+ unsigned long current_seq;
u64 access_mask;
- u64 start_idx, page_mask;
- struct ib_umem_odp *odp;
- size_t size;
-
- if (!odp_mr->page_list) {
- odp = implicit_mr_get_data(mr, io_virt, bcnt);
-
- if (IS_ERR(odp))
- return PTR_ERR(odp);
- mr = odp->private;
- } else {
- odp = odp_mr;
- }
-
-next_mr:
- size = min_t(size_t, bcnt, ib_umem_end(odp) - io_virt);
+ u64 start_idx;
page_shift = odp->page_shift;
- page_mask = ~(BIT(page_shift) - 1);
- start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
+ start_idx = (user_va - ib_umem_start(odp)) >> page_shift;
access_mask = ODP_READ_ALLOWED_BIT;
- if (prefetch && !downgrade && !mr->umem->writable) {
- /* prefetch with write-access must
- * be supported by the MR
- */
- ret = -EINVAL;
- goto out;
- }
-
- if (mr->umem->writable && !downgrade)
+ if (odp->umem.writable && !downgrade)
access_mask |= ODP_WRITE_ALLOWED_BIT;
- current_seq = READ_ONCE(odp->notifiers_seq);
- /*
- * Ensure the sequence number is valid for some time before we call
- * gup.
- */
- smp_rmb();
+ current_seq = mmu_interval_read_begin(&odp->notifier);
- ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size,
- access_mask, current_seq);
-
- if (ret < 0)
- goto out;
-
- np = ret;
+ np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask,
+ current_seq);
+ if (np < 0)
+ return np;
mutex_lock(&odp->umem_mutex);
- if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem),
- current_seq)) {
+ if (!mmu_interval_read_retry(&odp->notifier, current_seq)) {
/*
* No need to check whether the MTTs really belong to
* this MR, since ib_umem_odp_map_dma_pages already
@@ -648,53 +694,135 @@ next_mr:
if (ret < 0) {
if (ret != -EAGAIN)
- mlx5_ib_err(dev, "Failed to update mkey page tables\n");
+ mlx5_ib_err(mr->dev,
+ "Failed to update mkey page tables\n");
goto out;
}
if (bytes_mapped) {
u32 new_mappings = (np << page_shift) -
- (io_virt - round_down(io_virt, 1 << page_shift));
- *bytes_mapped += min_t(u32, new_mappings, size);
+ (user_va - round_down(user_va, 1 << page_shift));
+
+ *bytes_mapped += min_t(u32, new_mappings, bcnt);
}
- npages += np << (page_shift - PAGE_SHIFT);
- bcnt -= size;
+ return np << (page_shift - PAGE_SHIFT);
- if (unlikely(bcnt)) {
- struct ib_umem_odp *next;
+out:
+ return ret;
+}
+
+static int pagefault_implicit_mr(struct mlx5_ib_mr *imr,
+ struct ib_umem_odp *odp_imr, u64 user_va,
+ size_t bcnt, u32 *bytes_mapped, u32 flags)
+{
+ unsigned long end_idx = (user_va + bcnt - 1) >> MLX5_IMR_MTT_SHIFT;
+ unsigned long upd_start_idx = end_idx + 1;
+ unsigned long upd_len = 0;
+ unsigned long npages = 0;
+ int err;
+ int ret;
- io_virt += size;
- next = odp_next(odp);
- if (unlikely(!next || next->umem.address != io_virt)) {
- mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
- io_virt, next);
- return -EAGAIN;
+ if (unlikely(user_va >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE ||
+ mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - user_va < bcnt))
+ return -EFAULT;
+
+ /* Fault each child mr that intersects with our interval. */
+ while (bcnt) {
+ unsigned long idx = user_va >> MLX5_IMR_MTT_SHIFT;
+ struct ib_umem_odp *umem_odp;
+ struct mlx5_ib_mr *mtt;
+ u64 len;
+
+ mtt = xa_load(&imr->implicit_children, idx);
+ if (unlikely(!mtt)) {
+ mtt = implicit_get_child_mr(imr, idx);
+ if (IS_ERR(mtt)) {
+ ret = PTR_ERR(mtt);
+ goto out;
+ }
+ upd_start_idx = min(upd_start_idx, idx);
+ upd_len = idx - upd_start_idx + 1;
}
- odp = next;
- mr = odp->private;
- goto next_mr;
+
+ umem_odp = to_ib_umem_odp(mtt->umem);
+ len = min_t(u64, user_va + bcnt, ib_umem_end(umem_odp)) -
+ user_va;
+
+ ret = pagefault_real_mr(mtt, umem_odp, user_va, len,
+ bytes_mapped, flags);
+ if (ret < 0)
+ goto out;
+ user_va += len;
+ bcnt -= len;
+ npages += ret;
}
- return npages;
+ ret = npages;
+ /*
+ * Any time the implicit_children are changed we must perform an
+ * update of the xlt before exiting to ensure the HW and the
+ * implicit_children remains synchronized.
+ */
out:
- if (ret == -EAGAIN) {
- unsigned long timeout = msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
-
- if (!wait_for_completion_timeout(&odp->notifier_completion,
- timeout)) {
- mlx5_ib_warn(
- dev,
- "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n",
- current_seq, odp->notifiers_seq,
- odp->notifiers_count);
- }
- }
+ if (likely(!upd_len))
+ return ret;
+ /*
+ * Notice this is not strictly ordered right, the KSM is updated after
+ * the implicit_children is updated, so a parallel page fault could
+ * see a MR that is not yet visible in the KSM. This is similar to a
+ * parallel page fault seeing a MR that is being concurrently removed
+ * from the KSM. Both of these improbable situations are resolved
+ * safely by resuming the HW and then taking another page fault. The
+ * next pagefault handler will see the new information.
+ */
+ mutex_lock(&odp_imr->umem_mutex);
+ err = mlx5_ib_update_xlt(imr, upd_start_idx, upd_len, 0,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ATOMIC);
+ mutex_unlock(&odp_imr->umem_mutex);
+ if (err) {
+ mlx5_ib_err(imr->dev, "Failed to update PAS\n");
+ return err;
+ }
return ret;
}
+/*
+ * Returns:
+ * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are
+ * not accessible, or the MR is no longer valid.
+ * -EAGAIN/-ENOMEM: The operation should be retried
+ *
+ * -EINVAL/others: General internal malfunction
+ * >0: Number of pages mapped
+ */
+static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
+ u32 *bytes_mapped, u32 flags)
+{
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+
+ if (unlikely(io_virt < mr->mmkey.iova))
+ return -EFAULT;
+
+ if (!odp->is_implicit_odp) {
+ u64 user_va;
+
+ if (check_add_overflow(io_virt - mr->mmkey.iova,
+ (u64)odp->umem.address, &user_va))
+ return -EFAULT;
+ if (unlikely(user_va >= ib_umem_end(odp) ||
+ ib_umem_end(odp) - user_va < bcnt))
+ return -EFAULT;
+ return pagefault_real_mr(mr, odp, user_va, bcnt, bytes_mapped,
+ flags);
+ }
+ return pagefault_implicit_mr(mr, odp, io_virt, bcnt, bytes_mapped,
+ flags);
+}
+
struct pf_frame {
struct pf_frame *next;
u32 key;
@@ -742,10 +870,9 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
struct ib_pd *pd, u32 key,
u64 io_virt, size_t bcnt,
u32 *bytes_committed,
- u32 *bytes_mapped, u32 flags)
+ u32 *bytes_mapped)
{
int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0;
- bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
struct pf_frame *head = NULL, *frame;
struct mlx5_core_mkey *mmkey;
struct mlx5_ib_mr *mr;
@@ -754,58 +881,49 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
size_t offset;
int ndescs;
- srcu_key = srcu_read_lock(&dev->mr_srcu);
+ srcu_key = srcu_read_lock(&dev->odp_srcu);
io_virt += *bytes_committed;
bcnt -= *bytes_committed;
next_mr:
- mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(key));
+ mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key));
+ if (!mmkey) {
+ mlx5_ib_dbg(
+ dev,
+ "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+ key);
+ if (bytes_mapped)
+ *bytes_mapped += bcnt;
+ /*
+ * The user could specify a SGL with multiple lkeys and only
+ * some of them are ODP. Treat the non-ODP ones as fully
+ * faulted.
+ */
+ ret = 0;
+ goto srcu_unlock;
+ }
if (!mkey_is_eq(mmkey, key)) {
mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
ret = -EFAULT;
goto srcu_unlock;
}
- if (prefetch && mmkey->type != MLX5_MKEY_MR) {
- mlx5_ib_dbg(dev, "prefetch is allowed only for MR\n");
- ret = -EINVAL;
- goto srcu_unlock;
- }
-
switch (mmkey->type) {
case MLX5_MKEY_MR:
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- if (!mr->live || !mr->ibmr.pd) {
- mlx5_ib_dbg(dev, "got dead MR\n");
- ret = -EFAULT;
- goto srcu_unlock;
- }
-
- if (prefetch) {
- if (!is_odp_mr(mr) ||
- mr->ibmr.pd != pd) {
- mlx5_ib_dbg(dev, "Invalid prefetch request: %s\n",
- is_odp_mr(mr) ? "MR is not ODP" :
- "PD is not of the MR");
- ret = -EINVAL;
- goto srcu_unlock;
- }
- }
- if (!is_odp_mr(mr)) {
- mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
- key);
- if (bytes_mapped)
- *bytes_mapped += bcnt;
- ret = 0;
- goto srcu_unlock;
- }
-
- ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped, flags);
+ ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0);
if (ret < 0)
goto srcu_unlock;
+ /*
+ * When prefetching a page, page fault is generated
+ * in order to bring the page to the main memory.
+ * In the current flow, page faults are being counted.
+ */
+ mlx5_update_odp_stats(mr, faults, ret);
+
npages += ret;
ret = 0;
break;
@@ -895,7 +1013,7 @@ srcu_unlock:
}
kfree(out);
- srcu_read_unlock(&dev->mr_srcu, srcu_key);
+ srcu_read_unlock(&dev->odp_srcu, srcu_key);
*bytes_committed = 0;
return ret ? ret : npages;
}
@@ -976,7 +1094,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
ret = pagefault_single_data_segment(dev, NULL, key,
io_virt, bcnt,
&pfault->bytes_committed,
- bytes_mapped, 0);
+ bytes_mapped);
if (ret < 0)
break;
npages += ret;
@@ -985,17 +1103,6 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
return ret < 0 ? ret : npages;
}
-static const u32 mlx5_ib_odp_opcode_cap[] = {
- [MLX5_OPCODE_SEND] = IB_ODP_SUPPORT_SEND,
- [MLX5_OPCODE_SEND_IMM] = IB_ODP_SUPPORT_SEND,
- [MLX5_OPCODE_SEND_INVAL] = IB_ODP_SUPPORT_SEND,
- [MLX5_OPCODE_RDMA_WRITE] = IB_ODP_SUPPORT_WRITE,
- [MLX5_OPCODE_RDMA_WRITE_IMM] = IB_ODP_SUPPORT_WRITE,
- [MLX5_OPCODE_RDMA_READ] = IB_ODP_SUPPORT_READ,
- [MLX5_OPCODE_ATOMIC_CS] = IB_ODP_SUPPORT_ATOMIC,
- [MLX5_OPCODE_ATOMIC_FA] = IB_ODP_SUPPORT_ATOMIC,
-};
-
/*
* Parse initiator WQE. Advances the wqe pointer to point at the
* scatter-gather list, and set wqe_end to the end of the WQE.
@@ -1006,12 +1113,8 @@ static int mlx5_ib_mr_initiator_pfault_handler(
{
struct mlx5_wqe_ctrl_seg *ctrl = *wqe;
u16 wqe_index = pfault->wqe.wqe_index;
- u32 transport_caps;
struct mlx5_base_av *av;
unsigned ds, opcode;
-#if defined(DEBUG)
- u32 ctrl_wqe_index, ctrl_qpn;
-#endif
u32 qpn = qp->trans_qp.base.mqp.qpn;
ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
@@ -1027,58 +1130,17 @@ static int mlx5_ib_mr_initiator_pfault_handler(
return -EFAULT;
}
-#if defined(DEBUG)
- ctrl_wqe_index = (be32_to_cpu(ctrl->opmod_idx_opcode) &
- MLX5_WQE_CTRL_WQE_INDEX_MASK) >>
- MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
- if (wqe_index != ctrl_wqe_index) {
- mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
- wqe_index, qpn,
- ctrl_wqe_index);
- return -EFAULT;
- }
-
- ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
- MLX5_WQE_CTRL_QPN_SHIFT;
- if (qpn != ctrl_qpn) {
- mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
- wqe_index, qpn,
- ctrl_qpn);
- return -EFAULT;
- }
-#endif /* DEBUG */
-
*wqe_end = *wqe + ds * MLX5_WQE_DS_UNITS;
*wqe += sizeof(*ctrl);
opcode = be32_to_cpu(ctrl->opmod_idx_opcode) &
MLX5_WQE_CTRL_OPCODE_MASK;
- switch (qp->ibqp.qp_type) {
- case IB_QPT_XRC_INI:
+ if (qp->ibqp.qp_type == IB_QPT_XRC_INI)
*wqe += sizeof(struct mlx5_wqe_xrc_seg);
- transport_caps = dev->odp_caps.per_transport_caps.xrc_odp_caps;
- break;
- case IB_QPT_RC:
- transport_caps = dev->odp_caps.per_transport_caps.rc_odp_caps;
- break;
- case IB_QPT_UD:
- transport_caps = dev->odp_caps.per_transport_caps.ud_odp_caps;
- break;
- default:
- mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport 0x%x\n",
- qp->ibqp.qp_type);
- return -EFAULT;
- }
- if (unlikely(opcode >= ARRAY_SIZE(mlx5_ib_odp_opcode_cap) ||
- !(transport_caps & mlx5_ib_odp_opcode_cap[opcode]))) {
- mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode 0x%x\n",
- opcode);
- return -EFAULT;
- }
-
- if (qp->ibqp.qp_type == IB_QPT_UD) {
+ if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->qp_sub_type == MLX5_IB_QPT_DCI) {
av = *wqe;
if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
*wqe += sizeof(struct mlx5_av);
@@ -1141,19 +1203,6 @@ static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev,
return -EFAULT;
}
- switch (qp->ibqp.qp_type) {
- case IB_QPT_RC:
- if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
- IB_ODP_SUPPORT_RECV))
- goto invalid_transport_or_opcode;
- break;
- default:
-invalid_transport_or_opcode:
- mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport. transport: 0x%x\n",
- qp->ibqp.qp_type);
- return -EFAULT;
- }
-
*wqe_end = wqe + wqe_size;
return 0;
@@ -1203,7 +1252,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
{
bool sq = pfault->type & MLX5_PFAULT_REQUESTOR;
u16 wqe_index = pfault->wqe.wqe_index;
- void *wqe = NULL, *wqe_end = NULL;
+ void *wqe, *wqe_start = NULL, *wqe_end = NULL;
u32 bytes_mapped, total_wqe_bytes;
struct mlx5_core_rsc_common *res;
int resume_with_error = 1;
@@ -1224,23 +1273,24 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
goto resolve_page_fault;
}
- wqe = (void *)__get_free_page(GFP_KERNEL);
- if (!wqe) {
+ wqe_start = (void *)__get_free_page(GFP_KERNEL);
+ if (!wqe_start) {
mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
goto resolve_page_fault;
}
+ wqe = wqe_start;
qp = (res->res == MLX5_RES_QP) ? res_to_qp(res) : NULL;
if (qp && sq) {
- ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE,
- &bytes_copied);
+ ret = mlx5_ib_read_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE,
+ &bytes_copied);
if (ret)
goto read_user;
ret = mlx5_ib_mr_initiator_pfault_handler(
dev, pfault, qp, &wqe, &wqe_end, bytes_copied);
} else if (qp && !sq) {
- ret = mlx5_ib_read_user_wqe_rq(qp, wqe_index, wqe, PAGE_SIZE,
- &bytes_copied);
+ ret = mlx5_ib_read_wqe_rq(qp, wqe_index, wqe, PAGE_SIZE,
+ &bytes_copied);
if (ret)
goto read_user;
ret = mlx5_ib_mr_responder_pfault_handler_rq(
@@ -1248,8 +1298,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
} else if (!qp) {
struct mlx5_ib_srq *srq = res_to_srq(res);
- ret = mlx5_ib_read_user_wqe_srq(srq, wqe_index, wqe, PAGE_SIZE,
- &bytes_copied);
+ ret = mlx5_ib_read_wqe_srq(srq, wqe_index, wqe, PAGE_SIZE,
+ &bytes_copied);
if (ret)
goto read_user;
ret = mlx5_ib_mr_responder_pfault_handler_srq(
@@ -1284,7 +1334,7 @@ resolve_page_fault:
pfault->wqe.wq_num, resume_with_error,
pfault->type);
mlx5_core_res_put(res);
- free_page((unsigned long)wqe);
+ free_page((unsigned long)wqe_start);
}
static int pages_in_range(u64 address, u32 length)
@@ -1327,8 +1377,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
}
ret = pagefault_single_data_segment(dev, NULL, rkey, address, length,
- &pfault->bytes_committed, NULL,
- 0);
+ &pfault->bytes_committed, NULL);
if (ret == -EAGAIN) {
/* We're racing with an invalidation, don't prefetch */
prefetch_activated = 0;
@@ -1355,8 +1404,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
ret = pagefault_single_data_segment(dev, NULL, rkey, address,
prefetch_len,
- &bytes_committed, NULL,
- 0);
+ &bytes_committed, NULL);
if (ret < 0 && ret != -EAGAIN) {
mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
ret, pfault->token, address, prefetch_len);
@@ -1622,8 +1670,10 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
{
int ret = 0;
- if (dev->odp_caps.general_caps & IB_ODP_SUPPORT)
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops);
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT))
+ return ret;
+
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops);
if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
@@ -1633,9 +1683,6 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
}
}
- if (!MLX5_CAP_GEN(dev->mdev, pg))
- return ret;
-
ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq);
return ret;
@@ -1643,7 +1690,7 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev)
{
- if (!MLX5_CAP_GEN(dev->mdev, pg))
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT))
return;
mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq);
@@ -1659,114 +1706,128 @@ int mlx5_ib_odp_init(void)
struct prefetch_mr_work {
struct work_struct work;
- struct ib_pd *pd;
u32 pf_flags;
u32 num_sge;
- struct ib_sge sg_list[0];
+ struct {
+ u64 io_virt;
+ struct mlx5_ib_mr *mr;
+ size_t length;
+ } frags[];
};
-static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev,
- struct ib_sge *sg_list, u32 num_sge,
- u32 from)
+static void destroy_prefetch_work(struct prefetch_mr_work *work)
{
u32 i;
- int srcu_key;
-
- srcu_key = srcu_read_lock(&dev->mr_srcu);
-
- for (i = from; i < num_sge; ++i) {
- struct mlx5_core_mkey *mmkey;
- struct mlx5_ib_mr *mr;
- mmkey = xa_load(&dev->mdev->priv.mkey_table,
- mlx5_base_mkey(sg_list[i].lkey));
- mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- atomic_dec(&mr->num_pending_prefetch);
- }
-
- srcu_read_unlock(&dev->mr_srcu, srcu_key);
+ for (i = 0; i < work->num_sge; ++i)
+ atomic_dec(&work->frags[i].mr->num_deferred_work);
+ kvfree(work);
}
-static bool num_pending_prefetch_inc(struct ib_pd *pd,
- struct ib_sge *sg_list, u32 num_sge)
+static struct mlx5_ib_mr *
+get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
+ u32 lkey)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- bool ret = true;
- u32 i;
+ struct mlx5_core_mkey *mmkey;
+ struct ib_umem_odp *odp;
+ struct mlx5_ib_mr *mr;
- for (i = 0; i < num_sge; ++i) {
- struct mlx5_core_mkey *mmkey;
- struct mlx5_ib_mr *mr;
+ lockdep_assert_held(&dev->odp_srcu);
- mmkey = xa_load(&dev->mdev->priv.mkey_table,
- mlx5_base_mkey(sg_list[i].lkey));
- if (!mmkey || mmkey->key != sg_list[i].lkey) {
- ret = false;
- break;
- }
+ mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey));
+ if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR)
+ return NULL;
- if (mmkey->type != MLX5_MKEY_MR) {
- ret = false;
- break;
- }
+ mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ if (mr->ibmr.pd != pd)
+ return NULL;
- if (mr->ibmr.pd != pd) {
- ret = false;
- break;
- }
+ odp = to_ib_umem_odp(mr->umem);
- if (!mr->live) {
- ret = false;
- break;
- }
+ /* prefetch with write-access must be supported by the MR */
+ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
+ !odp->umem.writable)
+ return NULL;
- atomic_inc(&mr->num_pending_prefetch);
- }
+ return mr;
+}
- if (!ret)
- num_pending_prefetch_dec(dev, sg_list, i, 0);
+static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
+{
+ struct prefetch_mr_work *work =
+ container_of(w, struct prefetch_mr_work, work);
+ u32 bytes_mapped = 0;
+ u32 i;
- return ret;
+ for (i = 0; i < work->num_sge; ++i)
+ pagefault_mr(work->frags[i].mr, work->frags[i].io_virt,
+ work->frags[i].length, &bytes_mapped,
+ work->pf_flags);
+
+ destroy_prefetch_work(work);
}
-static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, u32 pf_flags,
- struct ib_sge *sg_list, u32 num_sge)
+static bool init_prefetch_work(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 pf_flags, struct prefetch_mr_work *work,
+ struct ib_sge *sg_list, u32 num_sge)
{
u32 i;
- int ret = 0;
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
+
+ INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work);
+ work->pf_flags = pf_flags;
for (i = 0; i < num_sge; ++i) {
- struct ib_sge *sg = &sg_list[i];
- int bytes_committed = 0;
+ work->frags[i].io_virt = sg_list[i].addr;
+ work->frags[i].length = sg_list[i].length;
+ work->frags[i].mr =
+ get_prefetchable_mr(pd, advice, sg_list[i].lkey);
+ if (!work->frags[i].mr) {
+ work->num_sge = i - 1;
+ if (i)
+ destroy_prefetch_work(work);
+ return false;
+ }
- ret = pagefault_single_data_segment(dev, pd, sg->lkey, sg->addr,
- sg->length,
- &bytes_committed, NULL,
- pf_flags);
- if (ret < 0)
- break;
+ /* Keep the MR pointer will valid outside the SRCU */
+ atomic_inc(&work->frags[i].mr->num_deferred_work);
}
-
- return ret < 0 ? ret : 0;
+ work->num_sge = num_sge;
+ return true;
}
-static void mlx5_ib_prefetch_mr_work(struct work_struct *work)
+static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 pf_flags, struct ib_sge *sg_list,
+ u32 num_sge)
{
- struct prefetch_mr_work *w =
- container_of(work, struct prefetch_mr_work, work);
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ u32 bytes_mapped = 0;
+ int srcu_key;
+ int ret = 0;
+ u32 i;
+
+ srcu_key = srcu_read_lock(&dev->odp_srcu);
+ for (i = 0; i < num_sge; ++i) {
+ struct mlx5_ib_mr *mr;
- if (ib_device_try_get(w->pd->device)) {
- mlx5_ib_prefetch_sg_list(w->pd, w->pf_flags, w->sg_list,
- w->num_sge);
- ib_device_put(w->pd->device);
+ mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey);
+ if (!mr) {
+ ret = -ENOENT;
+ goto out;
+ }
+ ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length,
+ &bytes_mapped, pf_flags);
+ if (ret < 0)
+ goto out;
}
+ ret = 0;
- num_pending_prefetch_dec(to_mdev(w->pd->device), w->sg_list,
- w->num_sge, 0);
- kvfree(w);
+out:
+ srcu_read_unlock(&dev->odp_srcu, srcu_key);
+ return ret;
}
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
@@ -1774,43 +1835,27 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
u32 flags, struct ib_sge *sg_list, u32 num_sge)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- u32 pf_flags = MLX5_PF_FLAGS_PREFETCH;
+ u32 pf_flags = 0;
struct prefetch_mr_work *work;
- bool valid_req;
int srcu_key;
if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
pf_flags |= MLX5_PF_FLAGS_DOWNGRADE;
if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH)
- return mlx5_ib_prefetch_sg_list(pd, pf_flags, sg_list,
+ return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list,
num_sge);
- work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL);
+ work = kvzalloc(struct_size(work, frags, num_sge), GFP_KERNEL);
if (!work)
return -ENOMEM;
- memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge));
-
- /* It is guaranteed that the pd when work is executed is the pd when
- * work was queued since pd can't be destroyed while it holds MRs and
- * destroying a MR leads to flushing the workquque
- */
- work->pd = pd;
- work->pf_flags = pf_flags;
- work->num_sge = num_sge;
-
- INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work);
-
- srcu_key = srcu_read_lock(&dev->mr_srcu);
-
- valid_req = num_pending_prefetch_inc(pd, sg_list, num_sge);
- if (valid_req)
- queue_work(system_unbound_wq, &work->work);
- else
- kvfree(work);
-
- srcu_read_unlock(&dev->mr_srcu, srcu_key);
-
- return valid_req ? 0 : -EINVAL;
+ srcu_key = srcu_read_lock(&dev->odp_srcu);
+ if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) {
+ srcu_read_unlock(&dev->odp_srcu, srcu_key);
+ return -EINVAL;
+ }
+ queue_work(system_unbound_wq, &work->work);
+ srcu_read_unlock(&dev->odp_srcu, srcu_key);
+ return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 379328b2598f..957f3a52589b 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -129,14 +129,10 @@ static int is_sqp(enum ib_qp_type qp_type)
*
* Return: zero on success, or an error code.
*/
-static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem,
- void *buffer,
- u32 buflen,
- int wqe_index,
- int wq_offset,
- int wq_wqe_cnt,
- int wq_wqe_shift,
- int bcnt,
+static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem, void *buffer,
+ size_t buflen, int wqe_index,
+ int wq_offset, int wq_wqe_cnt,
+ int wq_wqe_shift, int bcnt,
size_t *bytes_copied)
{
size_t offset = wq_offset + ((wqe_index % wq_wqe_cnt) << wq_wqe_shift);
@@ -160,11 +156,43 @@ static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem,
return 0;
}
-int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp,
- int wqe_index,
- void *buffer,
- int buflen,
- size_t *bc)
+static int mlx5_ib_read_kernel_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index,
+ void *buffer, size_t buflen, size_t *bc)
+{
+ struct mlx5_wqe_ctrl_seg *ctrl;
+ size_t bytes_copied = 0;
+ size_t wqe_length;
+ void *p;
+ int ds;
+
+ wqe_index = wqe_index & qp->sq.fbc.sz_m1;
+
+ /* read the control segment first */
+ p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, wqe_index);
+ ctrl = p;
+ ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+ wqe_length = ds * MLX5_WQE_DS_UNITS;
+
+ /* read rest of WQE if it spreads over more than one stride */
+ while (bytes_copied < wqe_length) {
+ size_t copy_length =
+ min_t(size_t, buflen - bytes_copied, MLX5_SEND_WQE_BB);
+
+ if (!copy_length)
+ break;
+
+ memcpy(buffer + bytes_copied, p, copy_length);
+ bytes_copied += copy_length;
+
+ wqe_index = (wqe_index + 1) & qp->sq.fbc.sz_m1;
+ p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, wqe_index);
+ }
+ *bc = bytes_copied;
+ return 0;
+}
+
+static int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index,
+ void *buffer, size_t buflen, size_t *bc)
{
struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct ib_umem *umem = base->ubuffer.umem;
@@ -176,18 +204,10 @@ int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp,
int ret;
int ds;
- if (buflen < sizeof(*ctrl))
- return -EINVAL;
-
/* at first read as much as possible */
- ret = mlx5_ib_read_user_wqe_common(umem,
- buffer,
- buflen,
- wqe_index,
- wq->offset,
- wq->wqe_cnt,
- wq->wqe_shift,
- buflen,
+ ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index,
+ wq->offset, wq->wqe_cnt,
+ wq->wqe_shift, buflen,
&bytes_copied);
if (ret)
return ret;
@@ -210,13 +230,9 @@ int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp,
* so read the remaining bytes starting
* from wqe_index 0
*/
- ret = mlx5_ib_read_user_wqe_common(umem,
- buffer + bytes_copied,
- buflen - bytes_copied,
- 0,
- wq->offset,
- wq->wqe_cnt,
- wq->wqe_shift,
+ ret = mlx5_ib_read_user_wqe_common(umem, buffer + bytes_copied,
+ buflen - bytes_copied, 0, wq->offset,
+ wq->wqe_cnt, wq->wqe_shift,
wqe_length - bytes_copied,
&bytes_copied2);
@@ -226,11 +242,24 @@ int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp,
return 0;
}
-int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp,
- int wqe_index,
- void *buffer,
- int buflen,
- size_t *bc)
+int mlx5_ib_read_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc)
+{
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
+ struct ib_umem *umem = base->ubuffer.umem;
+
+ if (buflen < sizeof(struct mlx5_wqe_ctrl_seg))
+ return -EINVAL;
+
+ if (!umem)
+ return mlx5_ib_read_kernel_wqe_sq(qp, wqe_index, buffer,
+ buflen, bc);
+
+ return mlx5_ib_read_user_wqe_sq(qp, wqe_index, buffer, buflen, bc);
+}
+
+static int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index,
+ void *buffer, size_t buflen, size_t *bc)
{
struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct ib_umem *umem = base->ubuffer.umem;
@@ -238,14 +267,9 @@ int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp,
size_t bytes_copied;
int ret;
- ret = mlx5_ib_read_user_wqe_common(umem,
- buffer,
- buflen,
- wqe_index,
- wq->offset,
- wq->wqe_cnt,
- wq->wqe_shift,
- buflen,
+ ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index,
+ wq->offset, wq->wqe_cnt,
+ wq->wqe_shift, buflen,
&bytes_copied);
if (ret)
@@ -254,25 +278,33 @@ int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp,
return 0;
}
-int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq,
- int wqe_index,
- void *buffer,
- int buflen,
- size_t *bc)
+int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc)
+{
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
+ struct ib_umem *umem = base->ubuffer.umem;
+ struct mlx5_ib_wq *wq = &qp->rq;
+ size_t wqe_size = 1 << wq->wqe_shift;
+
+ if (buflen < wqe_size)
+ return -EINVAL;
+
+ if (!umem)
+ return -EOPNOTSUPP;
+
+ return mlx5_ib_read_user_wqe_rq(qp, wqe_index, buffer, buflen, bc);
+}
+
+static int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index,
+ void *buffer, size_t buflen, size_t *bc)
{
struct ib_umem *umem = srq->umem;
size_t bytes_copied;
int ret;
- ret = mlx5_ib_read_user_wqe_common(umem,
- buffer,
- buflen,
- wqe_index,
- 0,
- srq->msrq.max,
- srq->msrq.wqe_shift,
- buflen,
- &bytes_copied);
+ ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index, 0,
+ srq->msrq.max, srq->msrq.wqe_shift,
+ buflen, &bytes_copied);
if (ret)
return ret;
@@ -280,6 +312,21 @@ int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq,
return 0;
}
+int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc)
+{
+ struct ib_umem *umem = srq->umem;
+ size_t wqe_size = 1 << srq->msrq.wqe_shift;
+
+ if (buflen < wqe_size)
+ return -EINVAL;
+
+ if (!umem)
+ return -EOPNOTSUPP;
+
+ return mlx5_ib_read_user_wqe_srq(srq, wqe_index, buffer, buflen, bc);
+}
+
static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
{
struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
@@ -749,7 +796,7 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
{
int err;
- *umem = ib_umem_get(udata, addr, size, 0, 0);
+ *umem = ib_umem_get(&dev->ib_dev, addr, size, 0);
if (IS_ERR(*umem)) {
mlx5_ib_dbg(dev, "umem_get failed\n");
return PTR_ERR(*umem);
@@ -806,7 +853,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (!ucmd->buf_addr)
return -EINVAL;
- rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0, 0);
+ rwq->umem = ib_umem_get(&dev->ib_dev, ucmd->buf_addr, rwq->buf_size, 0);
if (IS_ERR(rwq->umem)) {
mlx5_ib_dbg(dev, "umem_get failed\n");
err = PTR_ERR(rwq->umem);
@@ -1041,11 +1088,14 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
IB_QP_CREATE_IPOIB_UD_LSO |
IB_QP_CREATE_NETIF_QP |
- mlx5_ib_create_qp_sqpn_qp1()))
+ MLX5_IB_QP_CREATE_SQPN_QP1 |
+ MLX5_IB_QP_CREATE_WC_TEST))
return -EINVAL;
if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
qp->bf.bfreg = &dev->fp_bfreg;
+ else if (init_attr->create_flags & MLX5_IB_QP_CREATE_WC_TEST)
+ qp->bf.bfreg = &dev->wc_bfreg;
else
qp->bf.bfreg = &dev->bfreg;
@@ -1104,7 +1154,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
MLX5_SET(qpc, qpc, fre, 1);
MLX5_SET(qpc, qpc, rlky, 1);
- if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) {
+ if (init_attr->create_flags & MLX5_IB_QP_CREATE_SQPN_QP1) {
MLX5_SET(qpc, qpc, deth_sqpn, 1);
qp->flags |= MLX5_IB_QP_SQPN_QP1;
}
@@ -1868,7 +1918,7 @@ static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
{
enum ib_qp_type qpt = init_attr->qp_type;
int scqe_sz;
- bool allow_scat_cqe = 0;
+ bool allow_scat_cqe = false;
if (qpt == IB_QPT_UC || qpt == IB_QPT_UD)
return;
@@ -2140,7 +2190,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return -EINVAL;
}
if (init_attr->create_flags &
- mlx5_ib_create_qp_sqpn_qp1()) {
+ MLX5_IB_QP_CREATE_SQPN_QP1) {
mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n");
return -EINVAL;
}
@@ -3249,10 +3299,12 @@ static int modify_raw_packet_qp_sq(
}
/* Only remove the old rate after new rate was set */
- if ((old_rl.rate &&
- !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
- (new_state != MLX5_SQC_STATE_RDY))
+ if ((old_rl.rate && !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
+ (new_state != MLX5_SQC_STATE_RDY)) {
mlx5_rl_remove_rate(dev, &old_rl);
+ if (new_state != MLX5_SQC_STATE_RDY)
+ memset(&new_rl, 0, sizeof(new_rl));
+ }
ibqp->rl = new_rl;
sq->state = new_state;
@@ -3386,19 +3438,13 @@ static int __mlx5_ib_qp_set_counter(struct ib_qp *qp,
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_ib_qp *mqp = to_mqp(qp);
struct mlx5_qp_context context = {};
- struct mlx5_ib_port *mibport = NULL;
struct mlx5_ib_qp_base *base;
u32 set_id;
- if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id))
- return 0;
-
- if (counter) {
+ if (counter)
set_id = counter->id;
- } else {
- mibport = &dev->port[mqp->port - 1];
- set_id = mibport->cnts.set_id;
- }
+ else
+ set_id = mlx5_ib_get_counters_id(dev, mqp->port - 1);
base = &mqp->trans_qp.base;
context.qp_counter_set_usr_page &= cpu_to_be32(0xffffff);
@@ -3459,7 +3505,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_qp_context *context;
struct mlx5_ib_pd *pd;
- struct mlx5_ib_port *mibport = NULL;
enum mlx5_qp_state mlx5_cur, mlx5_new;
enum mlx5_qp_optpar optpar;
u32 set_id = 0;
@@ -3624,11 +3669,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (qp->flags & MLX5_IB_QP_UNDERLAY)
port_num = 0;
- mibport = &dev->port[port_num];
if (ibqp->counter)
set_id = ibqp->counter->id;
else
- set_id = mibport->cnts.set_id;
+ set_id = mlx5_ib_get_counters_id(dev, port_num);
context->qp_counter_set_usr_page |=
cpu_to_be32(set_id << 24);
}
@@ -3817,6 +3861,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ u16 set_id;
+
required |= IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
if (!is_valid_mask(attr_mask, required, 0))
return -EINVAL;
@@ -3843,7 +3889,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index);
MLX5_SET(dctc, dctc, port, attr->port_num);
- MLX5_SET(dctc, dctc, counter_set_id, dev->port[attr->port_num - 1].cnts.set_id);
+
+ set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1);
+ MLX5_SET(dctc, dctc, counter_set_id, set_id);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
struct mlx5_ib_modify_qp_resp resp = {};
@@ -4162,7 +4210,7 @@ static u64 get_xlt_octo(u64 bytes)
MLX5_IB_UMR_OCTOWORD;
}
-static __be64 frwr_mkey_mask(void)
+static __be64 frwr_mkey_mask(bool atomic)
{
u64 result;
@@ -4175,10 +4223,12 @@ static __be64 frwr_mkey_mask(void)
MLX5_MKEY_MASK_LW |
MLX5_MKEY_MASK_RR |
MLX5_MKEY_MASK_RW |
- MLX5_MKEY_MASK_A |
MLX5_MKEY_MASK_SMALL_FENCE |
MLX5_MKEY_MASK_FREE;
+ if (atomic)
+ result |= MLX5_MKEY_MASK_A;
+
return cpu_to_be64(result);
}
@@ -4204,7 +4254,7 @@ static __be64 sig_mkey_mask(void)
}
static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct mlx5_ib_mr *mr, u8 flags)
+ struct mlx5_ib_mr *mr, u8 flags, bool atomic)
{
int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
@@ -4212,7 +4262,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
umr->flags = flags;
umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
- umr->mkey_mask = frwr_mkey_mask();
+ umr->mkey_mask = frwr_mkey_mask(atomic);
}
static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
@@ -4811,10 +4861,22 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
{
struct mlx5_ib_mr *mr = to_mmr(wr->mr);
struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
+ struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device);
int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
+ bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
u8 flags = 0;
+ if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) {
+ mlx5_ib_warn(to_mdev(qp->ibqp.device),
+ "Fast update of %s for MR is disabled\n",
+ (MLX5_CAP_GEN(dev->mdev,
+ umr_modify_entity_size_disabled)) ?
+ "entity size" :
+ "atomic access");
+ return -EINVAL;
+ }
+
if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
mlx5_ib_warn(to_mdev(qp->ibqp.device),
"Invalid IB_SEND_INLINE send flag\n");
@@ -4826,7 +4888,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
if (umr_inline)
flags |= MLX5_UMR_INLINE;
- set_reg_umr_seg(*seg, mr, flags);
+ set_reg_umr_seg(*seg, mr, flags, atomic);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
@@ -5315,7 +5377,6 @@ out:
* we hit doorbell */
wmb();
- /* currently we support only regular doorbells */
mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
/* Make sure doorbells don't leak out of SQ spinlock
* and reach the HCA out of order.
@@ -5810,7 +5871,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
if (qp->flags & MLX5_IB_QP_SQPN_QP1)
- qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1();
+ qp_init_attr->create_flags |= MLX5_IB_QP_CREATE_SQPN_QP1;
qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
@@ -5942,12 +6003,21 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
}
MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride);
if (rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ) {
+ /*
+ * In Firmware number of strides in each WQE is:
+ * "512 * 2^single_wqe_log_num_of_strides"
+ * Values 3 to 8 are accepted as 10 to 15, 9 to 18 are
+ * accepted as 0 to 9
+ */
+ static const u8 fw_map[] = { 10, 11, 12, 13, 14, 15, 0, 1,
+ 2, 3, 4, 5, 6, 7, 8, 9 };
MLX5_SET(wq, wq, two_byte_shift_en, rwq->two_byte_shift_en);
MLX5_SET(wq, wq, log_wqe_stride_size,
rwq->single_stride_log_num_of_bytes -
MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES);
- MLX5_SET(wq, wq, log_wqe_num_of_strides, rwq->log_num_strides -
- MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES);
+ MLX5_SET(wq, wq, log_wqe_num_of_strides,
+ fw_map[rwq->log_num_strides -
+ MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES]);
}
MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size);
MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn);
@@ -6022,6 +6092,19 @@ static int set_user_rq_size(struct mlx5_ib_dev *dev,
return 0;
}
+static bool log_of_strides_valid(struct mlx5_ib_dev *dev, u32 log_num_strides)
+{
+ if ((log_num_strides > MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) ||
+ (log_num_strides < MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES))
+ return false;
+
+ if (!MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) &&
+ (log_num_strides < MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES))
+ return false;
+
+ return true;
+}
+
static int prepare_user_rq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata,
@@ -6069,14 +6152,16 @@ static int prepare_user_rq(struct ib_pd *pd,
MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES);
return -EINVAL;
}
- if ((ucmd.single_wqe_log_num_of_strides >
- MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) ||
- (ucmd.single_wqe_log_num_of_strides <
- MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) {
- mlx5_ib_dbg(dev, "Invalid log num strides (%u. Range is %u - %u)\n",
- ucmd.single_wqe_log_num_of_strides,
- MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES,
- MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES);
+ if (!log_of_strides_valid(dev,
+ ucmd.single_wqe_log_num_of_strides)) {
+ mlx5_ib_dbg(
+ dev,
+ "Invalid log num strides (%u. Range is %u - %u)\n",
+ ucmd.single_wqe_log_num_of_strides,
+ MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) ?
+ MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES :
+ MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES,
+ MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES);
return -EINVAL;
}
rwq->single_stride_log_num_of_bytes =
@@ -6331,11 +6416,13 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
}
if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) {
+ u16 set_id;
+
+ set_id = mlx5_ib_get_counters_id(dev, 0);
if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
MLX5_SET64(modify_rq_in, in, modify_bitmask,
MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
- MLX5_SET(rqc, rqc, counter_set_id,
- dev->port->cnts.set_id);
+ MLX5_SET(rqc, rqc, counter_set_id, set_id);
} else
dev_info_once(
&dev->ib_dev.dev,
@@ -6486,6 +6573,7 @@ void mlx5_ib_drain_rq(struct ib_qp *qp)
*/
int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter)
{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_ib_qp *mqp = to_mqp(qp);
int err = 0;
@@ -6495,6 +6583,11 @@ int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter)
goto out;
}
+ if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id)) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
if (mqp->state == IB_QPS_RTS) {
err = __mlx5_ib_qp_set_counter(qp, counter);
if (!err)
diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c
new file mode 100644
index 000000000000..8f6c04f12531
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/restrack.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <uapi/rdma/rdma_netlink.h>
+#include <rdma/ib_umem_odp.h>
+#include <rdma/restrack.h>
+#include "mlx5_ib.h"
+
+static int fill_stat_mr_entry(struct sk_buff *msg,
+ struct rdma_restrack_entry *res)
+{
+ struct ib_mr *ibmr = container_of(res, struct ib_mr, res);
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct nlattr *table_attr;
+
+ if (!(mr->access_flags & IB_ACCESS_ON_DEMAND))
+ return 0;
+
+ table_attr = nla_nest_start(msg,
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
+
+ if (!table_attr)
+ goto err;
+
+ if (rdma_nl_stat_hwcounter_entry(msg, "page_faults",
+ atomic64_read(&mr->odp_stats.faults)))
+ goto err_table;
+ if (rdma_nl_stat_hwcounter_entry(
+ msg, "page_invalidations",
+ atomic64_read(&mr->odp_stats.invalidations)))
+ goto err_table;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err_table:
+ nla_nest_cancel(msg, table_attr);
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_res_mr_entry(struct sk_buff *msg,
+ struct rdma_restrack_entry *res)
+{
+ struct ib_mr *ibmr = container_of(res, struct ib_mr, res);
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct nlattr *table_attr;
+
+ if (!(mr->access_flags & IB_ACCESS_ON_DEMAND))
+ return 0;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ goto err;
+
+ if (mr->is_odp_implicit) {
+ if (rdma_nl_put_driver_string(msg, "odp", "implicit"))
+ goto err;
+ } else {
+ if (rdma_nl_put_driver_string(msg, "odp", "explicit"))
+ goto err;
+ }
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+int mlx5_ib_fill_res_entry(struct sk_buff *msg,
+ struct rdma_restrack_entry *res)
+{
+ if (res->type == RDMA_RESTRACK_MR)
+ return fill_res_mr_entry(msg, res);
+
+ return 0;
+}
+
+int mlx5_ib_fill_stat_entry(struct sk_buff *msg,
+ struct rdma_restrack_entry *res)
+{
+ if (res->type == RDMA_RESTRACK_MR)
+ return fill_stat_mr_entry(msg, res);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 4e7fde86c96b..b1a8a9175040 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -80,7 +80,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
- srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0);
+ srq->umem = ib_umem_get(pd->device, ucmd.buf_addr, buf_size, 0);
if (IS_ERR(srq->umem)) {
mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size);
err = PTR_ERR(srq->umem);
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
index b0d0687c7a68..8fc3630a9d4c 100644
--- a/drivers/infiniband/hw/mlx5/srq_cmd.c
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -86,7 +86,7 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn)
xa_lock(&table->array);
srq = xa_load(&table->array, srqn);
if (srq)
- atomic_inc(&srq->common.refcount);
+ refcount_inc(&srq->common.refcount);
xa_unlock(&table->array);
return srq;
@@ -592,7 +592,7 @@ int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
if (err)
return err;
- atomic_set(&srq->common.refcount, 1);
+ refcount_set(&srq->common.refcount, 1);
init_completion(&srq->common.free);
err = xa_err(xa_store_irq(&table->array, srq->srqn, srq, GFP_KERNEL));
@@ -675,7 +675,7 @@ static int srq_event_notifier(struct notifier_block *nb,
xa_lock(&table->array);
srq = xa_load(&table->array, srqn);
if (srq)
- atomic_inc(&srq->common.refcount);
+ refcount_inc(&srq->common.refcount);
xa_unlock(&table->array);
if (!srq)
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index bfd4eebc1182..599794c5a78f 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -576,14 +576,10 @@ enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port);
int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-int mthca_process_mad(struct ib_device *ibdev,
- int mad_flags,
- u8 port_num,
- const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index);
+int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
int mthca_create_agents(struct mthca_dev *dev);
void mthca_free_agents(struct mthca_dev *dev);
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 7ad517da4917..99aa8183a7f2 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -196,30 +196,19 @@ static void forward_trap(struct mthca_dev *dev,
}
}
-int mthca_process_mad(struct ib_device *ibdev,
- int mad_flags,
- u8 port_num,
- const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index)
+int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
{
int err;
u16 slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
u16 prev_lid = 0;
struct ib_port_attr pattr;
- const struct ib_mad *in_mad = (const struct ib_mad *)in;
- struct ib_mad *out_mad = (struct ib_mad *)out;
-
- if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
- *out_mad_size != sizeof(*out_mad)))
- return IB_MAD_RESULT_FAILURE;
/* Forward locally generated traps to the SM */
- if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP &&
- slid == 0) {
- forward_trap(to_mdev(ibdev), port_num, in_mad);
+ if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP && !slid) {
+ forward_trap(to_mdev(ibdev), port_num, in);
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
}
@@ -229,40 +218,39 @@ int mthca_process_mad(struct ib_device *ibdev,
* Only handle PMA and Mellanox vendor-specific class gets and
* sets for other classes.
*/
- if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
- in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
- if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
- in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
- in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
+ if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ if (in->mad_hdr.method != IB_MGMT_METHOD_GET &&
+ in->mad_hdr.method != IB_MGMT_METHOD_SET &&
+ in->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
return IB_MAD_RESULT_SUCCESS;
/*
* Don't process SMInfo queries or vendor-specific
* MADs -- the SMA can't handle them.
*/
- if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
- ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
+ if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
+ ((in->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
IB_SMP_ATTR_VENDOR_MASK))
return IB_MAD_RESULT_SUCCESS;
- } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
- in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 ||
- in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) {
- if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
- in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
+ } else if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
+ in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 ||
+ in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) {
+ if (in->mad_hdr.method != IB_MGMT_METHOD_GET &&
+ in->mad_hdr.method != IB_MGMT_METHOD_SET)
return IB_MAD_RESULT_SUCCESS;
} else
return IB_MAD_RESULT_SUCCESS;
- if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
- in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
- in_mad->mad_hdr.method == IB_MGMT_METHOD_SET &&
- in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
+ if ((in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+ in->mad_hdr.method == IB_MGMT_METHOD_SET &&
+ in->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
!ib_query_port(ibdev, port_num, &pattr))
prev_lid = ib_lid_cpu16(pattr.lid);
- err = mthca_MAD_IFC(to_mdev(ibdev),
- mad_flags & IB_MAD_IGNORE_MKEY,
- mad_flags & IB_MAD_IGNORE_BKEY,
- port_num, in_wc, in_grh, in_mad, out_mad);
+ err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY,
+ mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc,
+ in_grh, in, out);
if (err == -EBADMSG)
return IB_MAD_RESULT_SUCCESS;
else if (err) {
@@ -270,16 +258,16 @@ int mthca_process_mad(struct ib_device *ibdev,
return IB_MAD_RESULT_FAILURE;
}
- if (!out_mad->mad_hdr.status) {
- smp_snoop(ibdev, port_num, in_mad, prev_lid);
- node_desc_override(ibdev, out_mad);
+ if (!out->mad_hdr.status) {
+ smp_snoop(ibdev, port_num, in, prev_lid);
+ node_desc_override(ibdev, out);
}
/* set return bit in status of directed route responses */
- if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
- out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
+ if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ out->mad_hdr.status |= cpu_to_be16(1 << 15);
- if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
+ if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
/* no response for trap repress */
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index edccfd6e178f..78a48aea3faf 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -472,7 +472,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
goto out;
}
- ret = get_user_pages_fast(uaddr & PAGE_MASK, 1,
+ ret = pin_user_pages_fast(uaddr & PAGE_MASK, 1,
FOLL_WRITE | FOLL_LONGTERM, pages);
if (ret < 0)
goto out;
@@ -482,7 +482,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
if (ret < 0) {
- put_user_page(pages[0]);
+ unpin_user_page(pages[0]);
goto out;
}
@@ -490,7 +490,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
mthca_uarc_virt(dev, uar, i));
if (ret) {
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
- put_user_page(sg_page(&db_tab->page[i].mem));
+ unpin_user_page(sg_page(&db_tab->page[i].mem));
goto out;
}
@@ -556,7 +556,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
if (db_tab->page[i].uvirt) {
mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1);
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
- put_user_page(sg_page(&db_tab->page[i].mem));
+ unpin_user_page(sg_page(&db_tab->page[i].mem));
}
}
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 23554d8bf241..ac19d57803b5 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -880,9 +880,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr)
return ERR_PTR(-ENOMEM);
- mr->umem = ib_umem_get(udata, start, length, acc,
- ucmd.mr_attrs & MTHCA_MR_DMASYNC);
-
+ mr->umem = ib_umem_get(pd->device, start, length, acc);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 8d3e36d548aa..2b7f00ac41b0 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -247,35 +247,20 @@ int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
return 0;
}
-int ocrdma_process_mad(struct ib_device *ibdev,
- int process_mad_flags,
- u8 port_num,
- const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
+int ocrdma_process_mad(struct ib_device *ibdev, int process_mad_flags,
+ u8 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh, const struct ib_mad *in,
+ struct ib_mad *out, size_t *out_mad_size,
u16 *out_mad_pkey_index)
{
- int status;
+ int status = IB_MAD_RESULT_SUCCESS;
struct ocrdma_dev *dev;
- const struct ib_mad *in_mad = (const struct ib_mad *)in;
- struct ib_mad *out_mad = (struct ib_mad *)out;
-
- if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
- *out_mad_size != sizeof(*out_mad)))
- return IB_MAD_RESULT_FAILURE;
- switch (in_mad->mad_hdr.mgmt_class) {
- case IB_MGMT_CLASS_PERF_MGMT:
+ if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
dev = get_ocrdma_dev(ibdev);
- if (!ocrdma_pma_counters(dev, out_mad))
- status = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
- else
- status = IB_MAD_RESULT_SUCCESS;
- break;
- default:
- status = IB_MAD_RESULT_SUCCESS;
- break;
+ ocrdma_pma_counters(dev, out);
+ status |= IB_MAD_RESULT_REPLY;
}
+
return status;
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index 64cb82c08664..9780afcde780 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -56,12 +56,9 @@ int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags,
void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags);
int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
-int ocrdma_process_mad(struct ib_device *,
- int process_mad_flags,
- u8 port_num,
- const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
+int ocrdma_process_mad(struct ib_device *dev, int process_mad_flags,
+ u8 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh, const struct ib_mad *in,
+ struct ib_mad *out, size_t *out_mad_size,
u16 *out_mad_pkey_index);
#endif /* __OCRDMA_AH_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index c15cfc6cef81..d8c47d24d6d6 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -166,7 +166,6 @@ static const struct ib_device_ops ocrdma_dev_ops = {
.get_port_immutable = ocrdma_port_immutable,
.map_mr_sg = ocrdma_map_mr_sg,
.mmap = ocrdma_mmap,
- .modify_port = ocrdma_modify_port,
.modify_qp = ocrdma_modify_qp,
.poll_cq = ocrdma_poll_cq,
.post_recv = ocrdma_post_recv,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 6ef89c226ad8..c2e0d0fa44be 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -2034,7 +2034,7 @@ struct ocrdma_rx_stats {
};
struct ocrdma_rx_qp_err_stats {
- u32 nak_invalid_requst_errors;
+ u32 nak_invalid_request_errors;
u32 nak_remote_operation_errors;
u32 nak_count_remote_access_errors;
u32 local_length_errors;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index a902942adb5d..5f831e3bdbad 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -423,8 +423,8 @@ static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev)
memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
pcur = stats;
- pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors",
- (u64)rx_qp_err_stats->nak_invalid_requst_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_request_errors",
+ (u64)rx_qp_err_stats->nak_invalid_request_errors);
pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors",
(u64)rx_qp_err_stats->nak_remote_operation_errors);
pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors",
@@ -670,12 +670,10 @@ err:
return -EFAULT;
}
-int ocrdma_pma_counters(struct ocrdma_dev *dev,
- struct ib_mad *out_mad)
+void ocrdma_pma_counters(struct ocrdma_dev *dev, struct ib_mad *out_mad)
{
struct ib_pma_portcounters *pma_cnt;
- memset(out_mad->data, 0, sizeof out_mad->data);
pma_cnt = (void *)(out_mad->data + 40);
ocrdma_update_stats(dev);
@@ -683,7 +681,6 @@ int ocrdma_pma_counters(struct ocrdma_dev *dev,
pma_cnt->port_rcv_data = cpu_to_be32(ocrdma_sysfs_rcv_data(dev));
pma_cnt->port_xmit_packets = cpu_to_be32(ocrdma_sysfs_xmit_pkts(dev));
pma_cnt->port_rcv_packets = cpu_to_be32(ocrdma_sysfs_rcv_pkts(dev));
- return 0;
}
static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
index bba1fec4f11f..98feca26ac55 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
@@ -69,7 +69,6 @@ bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev);
void ocrdma_release_stats_resources(struct ocrdma_dev *dev);
void ocrdma_rem_port_stats(struct ocrdma_dev *dev);
void ocrdma_add_port_stats(struct ocrdma_dev *dev);
-int ocrdma_pma_counters(struct ocrdma_dev *dev,
- struct ib_mad *out_mad);
+void ocrdma_pma_counters(struct ocrdma_dev *dev, struct ib_mad *out_mad);
#endif /* __OCRDMA_STATS_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index bccc11378109..d47ea675734b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -163,10 +163,10 @@ int ocrdma_query_port(struct ib_device *ibdev,
netdev = dev->nic_info.netdev;
if (netif_running(netdev) && netif_oper_up(netdev)) {
port_state = IB_PORT_ACTIVE;
- props->phys_state = 5;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
} else {
port_state = IB_PORT_DOWN;
- props->phys_state = 3;
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
}
props->max_mtu = IB_MTU_4096;
props->active_mtu = iboe_get_mtu(netdev->mtu);
@@ -190,12 +190,6 @@ int ocrdma_query_port(struct ib_device *ibdev,
return 0;
}
-int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
- struct ib_port_modify *props)
-{
- return 0;
-}
-
static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
unsigned long len)
{
@@ -875,7 +869,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(status);
- mr->umem = ib_umem_get(udata, start, len, acc, 0);
+ mr->umem = ib_umem_get(ibpd->device, start, len, acc);
if (IS_ERR(mr->umem)) {
status = -EFAULT;
goto umem_err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 32488da1b752..3a5010881be5 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -54,8 +54,6 @@ int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags);
int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props,
struct ib_udata *uhw);
int ocrdma_query_port(struct ib_device *, u8 port, struct ib_port_attr *props);
-int ocrdma_modify_port(struct ib_device *, u8 port, int mask,
- struct ib_port_modify *props);
enum rdma_protocol_type
ocrdma_query_protocol(struct ib_device *device, u8 port_num);
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index f97b3d65b30c..dcdc85a1ab25 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -76,7 +76,7 @@ static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str)
struct qedr_dev *qedr = get_qedr_dev(ibdev);
u32 fw_ver = (u32)qedr->attr.fw_ver;
- snprintf(str, IB_FW_VERSION_NAME_MAX, "%d. %d. %d. %d",
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d",
(fw_ver >> 24) & 0xFF, (fw_ver >> 16) & 0xFF,
(fw_ver >> 8) & 0xFF, fw_ver & 0xFF);
}
@@ -212,7 +212,7 @@ static const struct ib_device_ops qedr_dev_ops = {
.get_link_layer = qedr_link_layer,
.map_mr_sg = qedr_map_mr_sg,
.mmap = qedr_mmap,
- .modify_port = qedr_modify_port,
+ .mmap_free = qedr_mmap_free,
.modify_qp = qedr_modify_qp,
.modify_srq = qedr_modify_srq,
.poll_cq = qedr_poll_cq,
@@ -357,9 +357,10 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
return -ENOMEM;
spin_lock_init(&dev->sgid_lock);
+ xa_init_flags(&dev->srqs, XA_FLAGS_LOCK_IRQ);
if (IS_IWARP(dev)) {
- xa_init_flags(&dev->qps, XA_FLAGS_LOCK_IRQ);
+ xa_init(&dev->qps);
dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq");
}
@@ -826,7 +827,7 @@ static int qedr_init_hw(struct qedr_dev *dev)
if (rc)
goto out;
- dev->db_addr = (void __iomem *)(uintptr_t)out_params.dpi_addr;
+ dev->db_addr = out_params.dpi_addr;
dev->db_phys_addr = out_params.dpi_phys_addr;
dev->db_size = out_params.dpi_size;
dev->dpi = out_params.dpi;
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index a92ca22e5de1..5488dbd59d3c 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -40,6 +40,7 @@
#include <linux/qed/qed_rdma_if.h>
#include <linux/qed/qede_rdma.h>
#include <linux/qed/roce_common.h>
+#include <linux/completion.h>
#include "qedr_hsi_rdma.h"
#define QEDR_NODE_DESC "QLogic 579xx RoCE HCA"
@@ -229,15 +230,17 @@ struct qedr_ucontext {
struct ib_ucontext ibucontext;
struct qedr_dev *dev;
struct qedr_pd *pd;
- u64 dpi_addr;
+ void __iomem *dpi_addr;
+ struct rdma_user_mmap_entry *db_mmap_entry;
u64 dpi_phys_addr;
u32 dpi_size;
u16 dpi;
+ bool db_rec;
+};
- struct list_head mm_head;
-
- /* Lock to protect mm list */
- struct mutex mm_list_lock;
+union db_prod32 {
+ struct rdma_pwm_val16_data data;
+ u32 raw;
};
union db_prod64 {
@@ -265,6 +268,13 @@ struct qedr_userq {
struct qedr_pbl *pbl_tbl;
u64 buf_addr;
size_t buf_len;
+
+ /* doorbell recovery */
+ void __iomem *db_addr;
+ struct qedr_user_db_rec *db_rec_data;
+ struct rdma_user_mmap_entry *db_mmap_entry;
+ void __iomem *db_rec_db2_addr;
+ union db_prod32 db_rec_db2_data;
};
struct qedr_cq {
@@ -300,19 +310,6 @@ struct qedr_pd {
struct qedr_ucontext *uctx;
};
-struct qedr_mm {
- struct {
- u64 phy_addr;
- unsigned long len;
- } key;
- struct list_head entry;
-};
-
-union db_prod32 {
- struct rdma_pwm_val16_data data;
- u32 raw;
-};
-
struct qedr_qp_hwq_info {
/* WQE Elements */
struct qed_chain pbl;
@@ -377,10 +374,20 @@ enum qedr_qp_err_bitmap {
QEDR_QP_ERR_RQ_PBL_FULL = 32,
};
+enum qedr_qp_create_type {
+ QEDR_QP_CREATE_NONE,
+ QEDR_QP_CREATE_USER,
+ QEDR_QP_CREATE_KERNEL,
+};
+
+enum qedr_iwarp_cm_flags {
+ QEDR_IWARP_CM_WAIT_FOR_CONNECT = BIT(0),
+ QEDR_IWARP_CM_WAIT_FOR_DISCONNECT = BIT(1),
+};
+
struct qedr_qp {
struct ib_qp ibqp; /* must be first */
struct qedr_dev *dev;
- struct qedr_iw_ep *ep;
struct qedr_qp_hwq_info sq;
struct qedr_qp_hwq_info rq;
@@ -395,6 +402,7 @@ struct qedr_qp {
u32 id;
struct qedr_pd *pd;
enum ib_qp_type qp_type;
+ enum qedr_qp_create_type create_type;
struct qed_rdma_qp *qed_qp;
u32 qp_id;
u16 icid;
@@ -437,8 +445,11 @@ struct qedr_qp {
/* Relevant to qps created from user space only (applications) */
struct qedr_userq usq;
struct qedr_userq urq;
- atomic_t refcnt;
- bool destroyed;
+
+ /* synchronization objects used with iwarp ep */
+ struct kref refcnt;
+ struct completion iwarp_cm_comp;
+ unsigned long iwarp_cm_flags; /* enum iwarp_cm_flags */
};
struct qedr_ah {
@@ -476,6 +487,18 @@ struct qedr_mr {
u32 npages;
};
+struct qedr_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ struct qedr_dev *dev;
+ union {
+ u64 io_address;
+ void *address;
+ };
+ size_t length;
+ u16 dpi;
+ u8 mmap_flag;
+};
+
#define SET_FIELD2(value, name, flag) ((value) |= ((flag) << (name ## _SHIFT)))
#define QEDR_RESP_IMM (RDMA_CQE_RESPONDER_IMM_FLG_MASK << \
@@ -531,7 +554,7 @@ struct qedr_iw_ep {
struct iw_cm_id *cm_id;
struct qedr_qp *qp;
void *qed_context;
- u8 during_connect;
+ struct kref refcnt;
};
static inline
@@ -574,4 +597,11 @@ static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq)
{
return container_of(ibsrq, struct qedr_srq, ibsrq);
}
+
+static inline struct qedr_user_mmap_entry *
+get_qedr_mmap_entry(struct rdma_user_mmap_entry *rdma_entry)
+{
+ return container_of(rdma_entry, struct qedr_user_mmap_entry,
+ rdma_entry);
+}
#endif
diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
index 22881d4442b9..792eecd206b6 100644
--- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
@@ -79,6 +79,27 @@ qedr_fill_sockaddr6(const struct qed_iwarp_cm_info *cm_info,
}
}
+static void qedr_iw_free_qp(struct kref *ref)
+{
+ struct qedr_qp *qp = container_of(ref, struct qedr_qp, refcnt);
+
+ kfree(qp);
+}
+
+static void
+qedr_iw_free_ep(struct kref *ref)
+{
+ struct qedr_iw_ep *ep = container_of(ref, struct qedr_iw_ep, refcnt);
+
+ if (ep->qp)
+ kref_put(&ep->qp->refcnt, qedr_iw_free_qp);
+
+ if (ep->cm_id)
+ ep->cm_id->rem_ref(ep->cm_id);
+
+ kfree(ep);
+}
+
static void
qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params)
{
@@ -93,6 +114,7 @@ qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params)
ep->dev = dev;
ep->qed_context = params->ep_context;
+ kref_init(&ep->refcnt);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REQUEST;
@@ -141,12 +163,10 @@ qedr_iw_close_event(void *context, struct qed_iwarp_cm_event_params *params)
{
struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
- if (ep->cm_id) {
+ if (ep->cm_id)
qedr_iw_issue_event(context, params, IW_CM_EVENT_CLOSE);
- ep->cm_id->rem_ref(ep->cm_id);
- ep->cm_id = NULL;
- }
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
}
static void
@@ -186,11 +206,13 @@ static void qedr_iw_disconnect_worker(struct work_struct *work)
struct qedr_qp *qp = ep->qp;
struct iw_cm_event event;
- if (qp->destroyed) {
- kfree(dwork);
- qedr_iw_qp_rem_ref(&qp->ibqp);
- return;
- }
+ /* The qp won't be released until we release the ep.
+ * the ep's refcnt was increased before calling this
+ * function, therefore it is safe to access qp
+ */
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
+ &qp->iwarp_cm_flags))
+ goto out;
memset(&event, 0, sizeof(event));
event.status = dwork->status;
@@ -204,7 +226,6 @@ static void qedr_iw_disconnect_worker(struct work_struct *work)
else
qp_params.new_state = QED_ROCE_QP_STATE_SQD;
- kfree(dwork);
if (ep->cm_id)
ep->cm_id->event_handler(ep->cm_id, &event);
@@ -214,7 +235,10 @@ static void qedr_iw_disconnect_worker(struct work_struct *work)
dev->ops->rdma_modify_qp(dev->rdma_ctx, qp->qed_qp, &qp_params);
- qedr_iw_qp_rem_ref(&qp->ibqp);
+ complete(&ep->qp->iwarp_cm_comp);
+out:
+ kfree(dwork);
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
}
static void
@@ -224,13 +248,17 @@ qedr_iw_disconnect_event(void *context,
struct qedr_discon_work *work;
struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
struct qedr_dev *dev = ep->dev;
- struct qedr_qp *qp = ep->qp;
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return;
- qedr_iw_qp_add_ref(&qp->ibqp);
+ /* We can't get a close event before disconnect, but since
+ * we're scheduling a work queue we need to make sure close
+ * won't delete the ep, so we increase the refcnt
+ */
+ kref_get(&ep->refcnt);
+
work->ep = ep;
work->event = params->event;
work->status = params->status;
@@ -252,16 +280,30 @@ qedr_iw_passive_complete(void *context,
if ((params->status == -ECONNREFUSED) && (!ep->qp)) {
DP_DEBUG(dev, QEDR_MSG_IWARP,
"PASSIVE connection refused releasing ep...\n");
- kfree(ep);
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
return;
}
+ complete(&ep->qp->iwarp_cm_comp);
qedr_iw_issue_event(context, params, IW_CM_EVENT_ESTABLISHED);
if (params->status < 0)
qedr_iw_close_event(context, params);
}
+static void
+qedr_iw_active_complete(void *context,
+ struct qed_iwarp_cm_event_params *params)
+{
+ struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
+
+ complete(&ep->qp->iwarp_cm_comp);
+ qedr_iw_issue_event(context, params, IW_CM_EVENT_CONNECT_REPLY);
+
+ if (params->status < 0)
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
+}
+
static int
qedr_iw_mpa_reply(void *context, struct qed_iwarp_cm_event_params *params)
{
@@ -288,27 +330,15 @@ qedr_iw_event_handler(void *context, struct qed_iwarp_cm_event_params *params)
qedr_iw_mpa_reply(context, params);
break;
case QED_IWARP_EVENT_PASSIVE_COMPLETE:
- ep->during_connect = 0;
qedr_iw_passive_complete(context, params);
break;
-
case QED_IWARP_EVENT_ACTIVE_COMPLETE:
- ep->during_connect = 0;
- qedr_iw_issue_event(context,
- params,
- IW_CM_EVENT_CONNECT_REPLY);
- if (params->status < 0) {
- struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
-
- ep->cm_id->rem_ref(ep->cm_id);
- ep->cm_id = NULL;
- }
+ qedr_iw_active_complete(context, params);
break;
case QED_IWARP_EVENT_DISCONNECT:
qedr_iw_disconnect_event(context, params);
break;
case QED_IWARP_EVENT_CLOSE:
- ep->during_connect = 0;
qedr_iw_close_event(context, params);
break;
case QED_IWARP_EVENT_RQ_EMPTY:
@@ -451,10 +481,10 @@ qedr_addr6_resolve(struct qedr_dev *dev,
if ((!dst) || dst->error) {
if (dst) {
- dst_release(dst);
DP_ERR(dev,
"ip6_route_output returned dst->error = %d\n",
dst->error);
+ dst_release(dst);
}
return -EINVAL;
}
@@ -476,6 +506,19 @@ qedr_addr6_resolve(struct qedr_dev *dev,
return rc;
}
+static struct qedr_qp *qedr_iw_load_qp(struct qedr_dev *dev, u32 qpn)
+{
+ struct qedr_qp *qp;
+
+ xa_lock(&dev->qps);
+ qp = xa_load(&dev->qps, qpn);
+ if (qp)
+ kref_get(&qp->refcnt);
+ xa_unlock(&dev->qps);
+
+ return qp;
+}
+
int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
{
struct qedr_dev *dev = get_qedr_dev(cm_id->device);
@@ -491,10 +534,6 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
int rc = 0;
int i;
- qp = xa_load(&dev->qps, conn_param->qpn);
- if (unlikely(!qp))
- return -EINVAL;
-
laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
@@ -516,8 +555,15 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
return -ENOMEM;
ep->dev = dev;
+ kref_init(&ep->refcnt);
+
+ qp = qedr_iw_load_qp(dev, conn_param->qpn);
+ if (!qp) {
+ rc = -EINVAL;
+ goto err;
+ }
+
ep->qp = qp;
- qp->ep = ep;
cm_id->add_ref(cm_id);
ep->cm_id = cm_id;
@@ -580,16 +626,20 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
in_params.qp = qp->qed_qp;
memcpy(in_params.local_mac_addr, dev->ndev->dev_addr, ETH_ALEN);
- ep->during_connect = 1;
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
+ &qp->iwarp_cm_flags))
+ goto err; /* QP already being destroyed */
+
rc = dev->ops->iwarp_connect(dev->rdma_ctx, &in_params, &out_params);
- if (rc)
+ if (rc) {
+ complete(&qp->iwarp_cm_comp);
goto err;
+ }
return rc;
err:
- cm_id->rem_ref(cm_id);
- kfree(ep);
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
return rc;
}
@@ -677,18 +727,17 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct qedr_dev *dev = ep->dev;
struct qedr_qp *qp;
struct qed_iwarp_accept_in params;
- int rc;
+ int rc = 0;
DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn);
- qp = xa_load(&dev->qps, conn_param->qpn);
+ qp = qedr_iw_load_qp(dev, conn_param->qpn);
if (!qp) {
DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn);
return -EINVAL;
}
ep->qp = qp;
- qp->ep = ep;
cm_id->add_ref(cm_id);
ep->cm_id = cm_id;
@@ -700,15 +749,21 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
params.ird = conn_param->ird;
params.ord = conn_param->ord;
- ep->during_connect = 1;
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
+ &qp->iwarp_cm_flags))
+ goto err; /* QP already destroyed */
+
rc = dev->ops->iwarp_accept(dev->rdma_ctx, &params);
- if (rc)
+ if (rc) {
+ complete(&qp->iwarp_cm_comp);
goto err;
+ }
return rc;
+
err:
- ep->during_connect = 0;
- cm_id->rem_ref(cm_id);
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
+
return rc;
}
@@ -731,17 +786,14 @@ void qedr_iw_qp_add_ref(struct ib_qp *ibqp)
{
struct qedr_qp *qp = get_qedr_qp(ibqp);
- atomic_inc(&qp->refcnt);
+ kref_get(&qp->refcnt);
}
void qedr_iw_qp_rem_ref(struct ib_qp *ibqp)
{
struct qedr_qp *qp = get_qedr_qp(ibqp);
- if (atomic_dec_and_test(&qp->refcnt)) {
- xa_erase_irq(&qp->dev->qps, qp->qp_id);
- kfree(qp);
- }
+ kref_put(&qp->refcnt, qedr_iw_free_qp);
}
struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn)
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 27d90a84ea01..484b555150e0 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -51,6 +51,7 @@
#include "verbs.h"
#include <rdma/qedr-abi.h>
#include "qedr_roce_cm.h"
+#include "qedr_iw_cm.h"
#define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm)
#define RDMA_MAX_SGE_PER_SRQ (4)
@@ -58,6 +59,11 @@
#define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
+enum {
+ QEDR_USER_MMAP_IO_WC = 0,
+ QEDR_USER_MMAP_PHYS_PAGE,
+};
+
static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
size_t len)
{
@@ -221,10 +227,10 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
/* *attr being zeroed by the caller, avoid zeroing it here */
if (rdma_port->port_state == QED_RDMA_PORT_UP) {
attr->state = IB_PORT_ACTIVE;
- attr->phys_state = 5;
+ attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
} else {
attr->state = IB_PORT_DOWN;
- attr->phys_state = 3;
+ attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
}
attr->max_mtu = IB_MTU_4096;
attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
@@ -250,78 +256,31 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
return 0;
}
-int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
- struct ib_port_modify *props)
-{
- return 0;
-}
-
-static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
- unsigned long len)
-{
- struct qedr_mm *mm;
-
- mm = kzalloc(sizeof(*mm), GFP_KERNEL);
- if (!mm)
- return -ENOMEM;
-
- mm->key.phy_addr = phy_addr;
- /* This function might be called with a length which is not a multiple
- * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
- * forces this granularity by increasing the requested size if needed.
- * When qedr_mmap is called, it will search the list with the updated
- * length as a key. To prevent search failures, the length is rounded up
- * in advance to PAGE_SIZE.
- */
- mm->key.len = roundup(len, PAGE_SIZE);
- INIT_LIST_HEAD(&mm->entry);
-
- mutex_lock(&uctx->mm_list_lock);
- list_add(&mm->entry, &uctx->mm_head);
- mutex_unlock(&uctx->mm_list_lock);
-
- DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
- "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
- (unsigned long long)mm->key.phy_addr,
- (unsigned long)mm->key.len, uctx);
-
- return 0;
-}
-
-static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
- unsigned long len)
-{
- bool found = false;
- struct qedr_mm *mm;
-
- mutex_lock(&uctx->mm_list_lock);
- list_for_each_entry(mm, &uctx->mm_head, entry) {
- if (len != mm->key.len || phy_addr != mm->key.phy_addr)
- continue;
-
- found = true;
- break;
- }
- mutex_unlock(&uctx->mm_list_lock);
- DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
- "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
- mm->key.phy_addr, mm->key.len, uctx, found);
-
- return found;
-}
-
int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
{
struct ib_device *ibdev = uctx->device;
int rc;
struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
struct qedr_alloc_ucontext_resp uresp = {};
+ struct qedr_alloc_ucontext_req ureq = {};
struct qedr_dev *dev = get_qedr_dev(ibdev);
struct qed_rdma_add_user_out_params oparams;
+ struct qedr_user_mmap_entry *entry;
if (!udata)
return -EFAULT;
+ if (udata->inlen) {
+ rc = ib_copy_from_udata(&ureq, udata,
+ min(sizeof(ureq), udata->inlen));
+ if (rc) {
+ DP_ERR(dev, "Problem copying data from user space\n");
+ return -EFAULT;
+ }
+
+ ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
+ }
+
rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
if (rc) {
DP_ERR(dev,
@@ -334,13 +293,40 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
ctx->dpi_addr = oparams.dpi_addr;
ctx->dpi_phys_addr = oparams.dpi_phys_addr;
ctx->dpi_size = oparams.dpi_size;
- INIT_LIST_HEAD(&ctx->mm_head);
- mutex_init(&ctx->mm_list_lock);
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ entry->io_address = ctx->dpi_phys_addr;
+ entry->length = ctx->dpi_size;
+ entry->mmap_flag = QEDR_USER_MMAP_IO_WC;
+ entry->dpi = ctx->dpi;
+ entry->dev = dev;
+ rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry,
+ ctx->dpi_size);
+ if (rc) {
+ kfree(entry);
+ goto err;
+ }
+ ctx->db_mmap_entry = &entry->rdma_entry;
+
+ if (!dev->user_dpm_enabled)
+ uresp.dpm_flags = 0;
+ else if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY;
+ else
+ uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED |
+ QEDR_DPM_TYPE_ROCE_LEGACY;
+
+ uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
+ uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
+ uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
- uresp.dpm_enabled = dev->user_dpm_enabled;
uresp.wids_enabled = 1;
uresp.wid_count = oparams.wid_count;
- uresp.db_pa = ctx->dpi_phys_addr;
+ uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry);
uresp.db_size = ctx->dpi_size;
uresp.max_send_wr = dev->attr.max_sqe;
uresp.max_recv_wr = dev->attr.max_rqe;
@@ -352,82 +338,92 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (rc)
- return rc;
+ goto err;
ctx->dev = dev;
- rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
- if (rc)
- return rc;
-
DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
&ctx->ibucontext);
return 0;
+
+err:
+ if (!ctx->db_mmap_entry)
+ dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi);
+ else
+ rdma_user_mmap_entry_remove(ctx->db_mmap_entry);
+
+ return rc;
}
void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
{
struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
- struct qedr_mm *mm, *tmp;
DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
uctx);
- uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
- list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
- DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
- "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
- mm->key.phy_addr, mm->key.len, uctx);
- list_del(&mm->entry);
- kfree(mm);
- }
+ rdma_user_mmap_entry_remove(uctx->db_mmap_entry);
}
-int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
{
- struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
- struct qedr_dev *dev = get_qedr_dev(context->device);
- unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
- unsigned long len = (vma->vm_end - vma->vm_start);
- unsigned long dpi_start;
+ struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry);
+ struct qedr_dev *dev = entry->dev;
- dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
+ if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE)
+ free_page((unsigned long)entry->address);
+ else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC)
+ dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi);
- DP_DEBUG(dev, QEDR_MSG_INIT,
- "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
- (void *)vma->vm_start, (void *)vma->vm_end,
- (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
+ kfree(entry);
+}
- if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
- DP_ERR(dev,
- "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n",
- (void *)vma->vm_start, (void *)vma->vm_end);
- return -EINVAL;
- }
+int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
+{
+ struct ib_device *dev = ucontext->device;
+ size_t length = vma->vm_end - vma->vm_start;
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct qedr_user_mmap_entry *entry;
+ int rc = 0;
+ u64 pfn;
- if (!qedr_search_mmap(ucontext, phys_addr, len)) {
- DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
- vma->vm_pgoff);
- return -EINVAL;
- }
+ ibdev_dbg(dev,
+ "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
+ vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
- if (phys_addr < dpi_start ||
- ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
- DP_ERR(dev,
- "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
- (void *)phys_addr, (void *)dpi_start,
- ucontext->dpi_size);
+ rdma_entry = rdma_user_mmap_entry_get(ucontext, vma);
+ if (!rdma_entry) {
+ ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n",
+ vma->vm_pgoff);
return -EINVAL;
}
-
- if (vma->vm_flags & VM_READ) {
- DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
- return -EINVAL;
+ entry = get_qedr_mmap_entry(rdma_entry);
+ ibdev_dbg(dev,
+ "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
+ entry->io_address, length, entry->mmap_flag);
+
+ switch (entry->mmap_flag) {
+ case QEDR_USER_MMAP_IO_WC:
+ pfn = entry->io_address >> PAGE_SHIFT;
+ rc = rdma_user_mmap_io(ucontext, vma, pfn, length,
+ pgprot_writecombine(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ case QEDR_USER_MMAP_PHYS_PAGE:
+ rc = vm_insert_page(vma, vma->vm_start,
+ virt_to_page(entry->address));
+ break;
+ default:
+ rc = -EINVAL;
}
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
- return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
- vma->vm_page_prot);
+ if (rc)
+ ibdev_dbg(dev,
+ "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
+ entry->io_address, length, entry->mmap_flag, rc);
+
+ rdma_user_mmap_entry_put(rdma_entry);
+ return rc;
}
int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
@@ -657,16 +653,50 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
}
}
+static int qedr_db_recovery_add(struct qedr_dev *dev,
+ void __iomem *db_addr,
+ void *db_data,
+ enum qed_db_rec_width db_width,
+ enum qed_db_rec_space db_space)
+{
+ if (!db_data) {
+ DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
+ return 0;
+ }
+
+ return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data,
+ db_width, db_space);
+}
+
+static void qedr_db_recovery_del(struct qedr_dev *dev,
+ void __iomem *db_addr,
+ void *db_data)
+{
+ if (!db_data) {
+ DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
+ return;
+ }
+
+ /* Ignore return code as there is not much we can do about it. Error
+ * log will be printed inside.
+ */
+ dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data);
+}
+
static int qedr_copy_cq_uresp(struct qedr_dev *dev,
- struct qedr_cq *cq, struct ib_udata *udata)
+ struct qedr_cq *cq, struct ib_udata *udata,
+ u32 db_offset)
{
struct qedr_create_cq_uresp uresp;
int rc;
memset(&uresp, 0, sizeof(uresp));
- uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
+ uresp.db_offset = db_offset;
uresp.icid = cq->icid;
+ if (cq->q.db_mmap_entry)
+ uresp.db_rec_addr =
+ rdma_user_mmap_get_offset(cq->q.db_mmap_entry);
rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (rc)
@@ -694,10 +724,58 @@ static inline int qedr_align_cq_entries(int entries)
return aligned_size / QEDR_CQE_SIZE;
}
+static int qedr_init_user_db_rec(struct ib_udata *udata,
+ struct qedr_dev *dev, struct qedr_userq *q,
+ bool requires_db_rec)
+{
+ struct qedr_ucontext *uctx =
+ rdma_udata_to_drv_context(udata, struct qedr_ucontext,
+ ibucontext);
+ struct qedr_user_mmap_entry *entry;
+ int rc;
+
+ /* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */
+ if (requires_db_rec == 0 || !uctx->db_rec)
+ return 0;
+
+ /* Allocate a page for doorbell recovery, add to mmap */
+ q->db_rec_data = (void *)get_zeroed_page(GFP_USER);
+ if (!q->db_rec_data) {
+ DP_ERR(dev, "get_zeroed_page failed\n");
+ return -ENOMEM;
+ }
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ goto err_free_db_data;
+
+ entry->address = q->db_rec_data;
+ entry->length = PAGE_SIZE;
+ entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE;
+ rc = rdma_user_mmap_entry_insert(&uctx->ibucontext,
+ &entry->rdma_entry,
+ PAGE_SIZE);
+ if (rc)
+ goto err_free_entry;
+
+ q->db_mmap_entry = &entry->rdma_entry;
+
+ return 0;
+
+err_free_entry:
+ kfree(entry);
+
+err_free_db_data:
+ free_page((unsigned long)q->db_rec_data);
+ q->db_rec_data = NULL;
+ return -ENOMEM;
+}
+
static inline int qedr_init_user_queue(struct ib_udata *udata,
struct qedr_dev *dev,
struct qedr_userq *q, u64 buf_addr,
- size_t buf_len, int access, int dmasync,
+ size_t buf_len, bool requires_db_rec,
+ int access,
int alloc_and_init)
{
u32 fw_pages;
@@ -705,7 +783,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata,
q->buf_addr = buf_addr;
q->buf_len = buf_len;
- q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access, dmasync);
+ q->umem = ib_umem_get(&dev->ibdev, q->buf_addr, q->buf_len, access);
if (IS_ERR(q->umem)) {
DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
PTR_ERR(q->umem));
@@ -735,7 +813,8 @@ static inline int qedr_init_user_queue(struct ib_udata *udata,
}
}
- return 0;
+ /* mmap the user address used to store doorbell data for recovery */
+ return qedr_init_user_db_rec(udata, dev, q, requires_db_rec);
err0:
ib_umem_release(q->umem);
@@ -821,6 +900,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
int entries = attr->cqe;
struct qedr_cq *cq = get_qedr_cq(ibcq);
int chain_entries;
+ u32 db_offset;
int page_cnt;
u64 pbl_ptr;
u16 icid;
@@ -840,8 +920,12 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
chain_entries = qedr_align_cq_entries(entries);
chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
+ /* calc db offset. user will add DPI base, kernel will add db addr */
+ db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
+
if (udata) {
- if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
+ if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
+ udata->inlen))) {
DP_ERR(dev,
"create cq: problem copying data from user space\n");
goto err0;
@@ -856,7 +940,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->cq_type = QEDR_CQ_TYPE_USER;
rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
- ureq.len, IB_ACCESS_LOCAL_WRITE, 1,
+ ureq.len, true, IB_ACCESS_LOCAL_WRITE,
1);
if (rc)
goto err0;
@@ -865,6 +949,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
page_cnt = cq->q.pbl_info.num_pbes;
cq->ibcq.cqe = chain_entries;
+ cq->q.db_addr = ctx->dpi_addr + db_offset;
} else {
cq->cq_type = QEDR_CQ_TYPE_KERNEL;
@@ -876,7 +961,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
sizeof(union rdma_cqe),
&cq->pbl, NULL);
if (rc)
- goto err1;
+ goto err0;
page_cnt = qed_chain_get_page_cnt(&cq->pbl);
pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
@@ -888,21 +973,28 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
if (rc)
- goto err2;
+ goto err1;
cq->icid = icid;
cq->sig = QEDR_CQ_MAGIC_NUMBER;
spin_lock_init(&cq->cq_lock);
if (udata) {
- rc = qedr_copy_cq_uresp(dev, cq, udata);
+ rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
+ if (rc)
+ goto err2;
+
+ rc = qedr_db_recovery_add(dev, cq->q.db_addr,
+ &cq->q.db_rec_data->db_data,
+ DB_REC_WIDTH_64B,
+ DB_REC_USER);
if (rc)
- goto err3;
+ goto err2;
+
} else {
/* Generate doorbell address. */
- cq->db_addr = dev->db_addr +
- DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
cq->db.data.icid = cq->icid;
+ cq->db_addr = dev->db_addr + db_offset;
cq->db.data.params = DB_AGG_CMD_SET <<
RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
@@ -912,6 +1004,11 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->latest_cqe = NULL;
consume_cqe(cq);
cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
+
+ rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
+ DB_REC_WIDTH_64B, DB_REC_KERNEL);
+ if (rc)
+ goto err2;
}
DP_DEBUG(dev, QEDR_MSG_CQ,
@@ -920,18 +1017,19 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
return 0;
-err3:
+err2:
destroy_iparams.icid = cq->icid;
dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
&destroy_oparams);
-err2:
- if (udata)
- qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
- else
- dev->ops->common->chain_free(dev->cdev, &cq->pbl);
err1:
- if (udata)
+ if (udata) {
+ qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
ib_umem_release(cq->q.umem);
+ if (cq->q.db_mmap_entry)
+ rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
+ } else {
+ dev->ops->common->chain_free(dev->cdev, &cq->pbl);
+ }
err0:
return -EINVAL;
}
@@ -962,8 +1060,10 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
cq->destroyed = 1;
/* GSIs CQs are handled by driver, so they don't exist in the FW */
- if (cq->cq_type == QEDR_CQ_TYPE_GSI)
+ if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
+ qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
return;
+ }
iparams.icid = cq->icid;
dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
@@ -972,6 +1072,14 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
if (udata) {
qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
ib_umem_release(cq->q.umem);
+
+ if (cq->q.db_rec_data) {
+ qedr_db_recovery_del(dev, cq->q.db_addr,
+ &cq->q.db_rec_data->db_data);
+ rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
+ }
+ } else {
+ qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
}
/* We don't want the IRQ handler to handle a non-existing CQ so we
@@ -1136,8 +1244,8 @@ static int qedr_copy_srq_uresp(struct qedr_dev *dev,
}
static void qedr_copy_rq_uresp(struct qedr_dev *dev,
- struct qedr_create_qp_uresp *uresp,
- struct qedr_qp *qp)
+ struct qedr_create_qp_uresp *uresp,
+ struct qedr_qp *qp)
{
/* iWARP requires two doorbells per RQ. */
if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
@@ -1150,6 +1258,9 @@ static void qedr_copy_rq_uresp(struct qedr_dev *dev,
}
uresp->rq_icid = qp->icid;
+ if (qp->urq.db_mmap_entry)
+ uresp->rq_db_rec_addr =
+ rdma_user_mmap_get_offset(qp->urq.db_mmap_entry);
}
static void qedr_copy_sq_uresp(struct qedr_dev *dev,
@@ -1163,22 +1274,26 @@ static void qedr_copy_sq_uresp(struct qedr_dev *dev,
uresp->sq_icid = qp->icid;
else
uresp->sq_icid = qp->icid + 1;
+
+ if (qp->usq.db_mmap_entry)
+ uresp->sq_db_rec_addr =
+ rdma_user_mmap_get_offset(qp->usq.db_mmap_entry);
}
static int qedr_copy_qp_uresp(struct qedr_dev *dev,
- struct qedr_qp *qp, struct ib_udata *udata)
+ struct qedr_qp *qp, struct ib_udata *udata,
+ struct qedr_create_qp_uresp *uresp)
{
- struct qedr_create_qp_uresp uresp;
int rc;
- memset(&uresp, 0, sizeof(uresp));
- qedr_copy_sq_uresp(dev, &uresp, qp);
- qedr_copy_rq_uresp(dev, &uresp, qp);
+ memset(uresp, 0, sizeof(*uresp));
+ qedr_copy_sq_uresp(dev, uresp, qp);
+ qedr_copy_rq_uresp(dev, uresp, qp);
- uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
- uresp.qp_id = qp->qp_id;
+ uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
+ uresp->qp_id = qp->qp_id;
- rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp));
if (rc)
DP_ERR(dev,
"create qp: failed a copy to user space with qp icid=0x%x.\n",
@@ -1193,7 +1308,10 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
struct ib_qp_init_attr *attrs)
{
spin_lock_init(&qp->q_lock);
- atomic_set(&qp->refcnt, 1);
+ if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ kref_init(&qp->refcnt);
+ init_completion(&qp->iwarp_cm_comp);
+ }
qp->pd = pd;
qp->qp_type = attrs->qp_type;
qp->max_inline_data = attrs->cap.max_inline_data;
@@ -1222,16 +1340,35 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
qp->sq.max_sges, qp->sq_cq->icid);
}
-static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
+static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
{
+ int rc;
+
qp->sq.db = dev->db_addr +
DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
qp->sq.db_data.data.icid = qp->icid + 1;
+ rc = qedr_db_recovery_add(dev, qp->sq.db,
+ &qp->sq.db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_KERNEL);
+ if (rc)
+ return rc;
+
if (!qp->srq) {
qp->rq.db = dev->db_addr +
DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
qp->rq.db_data.data.icid = qp->icid;
+
+ rc = qedr_db_recovery_add(dev, qp->rq.db,
+ &qp->rq.db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_KERNEL);
+ if (rc)
+ qedr_db_recovery_del(dev, qp->sq.db,
+ &qp->sq.db_data);
}
+
+ return rc;
}
static int qedr_check_srq_params(struct qedr_dev *dev,
@@ -1279,19 +1416,18 @@ static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
static int qedr_init_srq_user_params(struct ib_udata *udata,
struct qedr_srq *srq,
struct qedr_create_srq_ureq *ureq,
- int access, int dmasync)
+ int access)
{
struct scatterlist *sg;
int rc;
rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
- ureq->srq_len, access, dmasync, 1);
+ ureq->srq_len, false, access, 1);
if (rc)
return rc;
- srq->prod_umem =
- ib_umem_get(udata, ureq->prod_pair_addr,
- sizeof(struct rdma_srq_producers), access, dmasync);
+ srq->prod_umem = ib_umem_get(srq->ibsrq.device, ureq->prod_pair_addr,
+ sizeof(struct rdma_srq_producers), access);
if (IS_ERR(srq->prod_umem)) {
qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
ib_umem_release(srq->usrq.umem);
@@ -1381,13 +1517,14 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
hw_srq->max_sges = init_attr->attr.max_sge;
if (udata) {
- if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
+ if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
+ udata->inlen))) {
DP_ERR(dev,
"create srq: problem copying data from user space\n");
goto err0;
}
- rc = qedr_init_srq_user_params(udata, srq, &ureq, 0, 0);
+ rc = qedr_init_srq_user_params(udata, srq, &ureq, 0);
if (rc)
goto err0;
@@ -1570,13 +1707,39 @@ qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
&qp->urq.pbl_info, FW_PAGE_SHIFT);
}
-static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
+static void qedr_cleanup_user(struct qedr_dev *dev,
+ struct qedr_ucontext *ctx,
+ struct qedr_qp *qp)
{
ib_umem_release(qp->usq.umem);
qp->usq.umem = NULL;
ib_umem_release(qp->urq.umem);
qp->urq.umem = NULL;
+
+ if (rdma_protocol_roce(&dev->ibdev, 1)) {
+ qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
+ qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
+ } else {
+ kfree(qp->usq.pbl_tbl);
+ kfree(qp->urq.pbl_tbl);
+ }
+
+ if (qp->usq.db_rec_data) {
+ qedr_db_recovery_del(dev, qp->usq.db_addr,
+ &qp->usq.db_rec_data->db_data);
+ rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry);
+ }
+
+ if (qp->urq.db_rec_data) {
+ qedr_db_recovery_del(dev, qp->urq.db_addr,
+ &qp->urq.db_rec_data->db_data);
+ rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry);
+ }
+
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr,
+ &qp->urq.db_rec_db2_data);
}
static int qedr_create_user_qp(struct qedr_dev *dev,
@@ -1588,27 +1751,30 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
struct qed_rdma_create_qp_in_params in_params;
struct qed_rdma_create_qp_out_params out_params;
struct qedr_pd *pd = get_qedr_pd(ibpd);
+ struct qedr_create_qp_uresp uresp;
+ struct qedr_ucontext *ctx = NULL;
struct qedr_create_qp_ureq ureq;
int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
int rc = -EINVAL;
+ qp->create_type = QEDR_QP_CREATE_USER;
memset(&ureq, 0, sizeof(ureq));
- rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
+ rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen));
if (rc) {
DP_ERR(dev, "Problem copying data from user space\n");
return rc;
}
- /* SQ - read access only (0), dma sync not required (0) */
+ /* SQ - read access only (0) */
rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
- ureq.sq_len, 0, 0, alloc_and_init);
+ ureq.sq_len, true, 0, alloc_and_init);
if (rc)
return rc;
if (!qp->srq) {
- /* RQ - read access only (0), dma sync not required (0) */
+ /* RQ - read access only (0) */
rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
- ureq.rq_len, 0, 0, alloc_and_init);
+ ureq.rq_len, true, 0, alloc_and_init);
if (rc)
return rc;
}
@@ -1638,29 +1804,76 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
qp->qp_id = out_params.qp_id;
qp->icid = out_params.icid;
- rc = qedr_copy_qp_uresp(dev, qp, udata);
+ rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
if (rc)
goto err;
+ /* db offset was calculated in copy_qp_uresp, now set in the user q */
+ ctx = pd->uctx;
+ qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
+ qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
+
+ if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
+
+ /* calculate the db_rec_db2 data since it is constant so no
+ * need to reflect from user
+ */
+ qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
+ qp->urq.db_rec_db2_data.data.value =
+ cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
+ }
+
+ rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
+ &qp->usq.db_rec_data->db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_USER);
+ if (rc)
+ goto err;
+
+ rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
+ &qp->urq.db_rec_data->db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_USER);
+ if (rc)
+ goto err;
+
+ if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
+ &qp->urq.db_rec_db2_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_USER);
+ if (rc)
+ goto err;
+ }
qedr_qp_user_print(dev, qp);
- return 0;
+ return rc;
err:
rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
if (rc)
DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
err1:
- qedr_cleanup_user(dev, qp);
+ qedr_cleanup_user(dev, ctx, qp);
return rc;
}
-static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
+static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
{
+ int rc;
+
qp->sq.db = dev->db_addr +
DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
qp->sq.db_data.data.icid = qp->icid;
+ rc = qedr_db_recovery_add(dev, qp->sq.db,
+ &qp->sq.db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_KERNEL);
+ if (rc)
+ return rc;
+
qp->rq.db = dev->db_addr +
DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
qp->rq.db_data.data.icid = qp->icid;
@@ -1668,6 +1881,19 @@ static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
qp->rq.iwarp_db2_data.data.icid = qp->icid;
qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
+
+ rc = qedr_db_recovery_add(dev, qp->rq.db,
+ &qp->rq.db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_KERNEL);
+ if (rc)
+ return rc;
+
+ rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2,
+ &qp->rq.iwarp_db2_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_KERNEL);
+ return rc;
}
static int
@@ -1715,8 +1941,7 @@ qedr_roce_create_kernel_qp(struct qedr_dev *dev,
qp->qp_id = out_params.qp_id;
qp->icid = out_params.icid;
- qedr_set_roce_db_info(dev, qp);
- return rc;
+ return qedr_set_roce_db_info(dev, qp);
}
static int
@@ -1774,8 +1999,7 @@ qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
qp->qp_id = out_params.qp_id;
qp->icid = out_params.icid;
- qedr_set_iwarp_db_info(dev, qp);
- return rc;
+ return qedr_set_iwarp_db_info(dev, qp);
err:
dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
@@ -1790,6 +2014,20 @@ static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
kfree(qp->rqe_wr_id);
+
+ /* GSI qp is not registered to db mechanism so no need to delete */
+ if (qp->qp_type == IB_QPT_GSI)
+ return;
+
+ qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
+
+ if (!qp->srq) {
+ qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data);
+
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ qedr_db_recovery_del(dev, qp->rq.iwarp_db2,
+ &qp->rq.iwarp_db2_data);
+ }
}
static int qedr_create_kernel_qp(struct qedr_dev *dev,
@@ -1805,6 +2043,7 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev,
u32 n_sq_entries;
memset(&in_params, 0, sizeof(in_params));
+ qp->create_type = QEDR_QP_CREATE_KERNEL;
/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
* the ring. The ring should allow at least a single WR, even if the
@@ -1918,7 +2157,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
qp->ibqp.qp_num = qp->qp_id;
if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
- rc = xa_insert_irq(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
+ rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
if (rc)
goto err;
}
@@ -2429,7 +2668,10 @@ err:
static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
struct ib_udata *udata)
{
- int rc = 0;
+ struct qedr_ucontext *ctx =
+ rdma_udata_to_drv_context(udata, struct qedr_ucontext,
+ ibucontext);
+ int rc;
if (qp->qp_type != IB_QPT_GSI) {
rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
@@ -2437,8 +2679,8 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
return rc;
}
- if (udata)
- qedr_cleanup_user(dev, qp);
+ if (qp->create_type == QEDR_QP_CREATE_USER)
+ qedr_cleanup_user(dev, ctx, qp);
else
qedr_cleanup_kernel(dev, qp);
@@ -2451,7 +2693,6 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
struct qedr_dev *dev = qp->dev;
struct ib_qp_attr attr;
int attr_mask = 0;
- int rc = 0;
DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
qp, qp->qp_type);
@@ -2468,35 +2709,45 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
}
} else {
- /* Wait for the connect/accept to complete */
- if (qp->ep) {
- int wait_count = 1;
-
- while (qp->ep->during_connect) {
- DP_DEBUG(dev, QEDR_MSG_QP,
- "Still in during connect/accept\n");
-
- msleep(100);
- if (wait_count++ > 200) {
- DP_NOTICE(dev,
- "during connect timeout\n");
- break;
- }
- }
- }
+ /* If connection establishment started the WAIT_FOR_CONNECT
+ * bit will be on and we need to Wait for the establishment
+ * to complete before destroying the qp.
+ */
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
+ &qp->iwarp_cm_flags))
+ wait_for_completion(&qp->iwarp_cm_comp);
+
+ /* If graceful disconnect started, the WAIT_FOR_DISCONNECT
+ * bit will be on, and we need to wait for the disconnect to
+ * complete before continuing. We can use the same completion,
+ * iwarp_cm_comp, since this is the only place that waits for
+ * this completion and it is sequential. In addition,
+ * disconnect can't occur before the connection is fully
+ * established, therefore if WAIT_FOR_DISCONNECT is on it
+ * means WAIT_FOR_CONNECT is also on and the completion for
+ * CONNECT already occurred.
+ */
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
+ &qp->iwarp_cm_flags))
+ wait_for_completion(&qp->iwarp_cm_comp);
}
if (qp->qp_type == IB_QPT_GSI)
qedr_destroy_gsi_qp(dev);
+ /* We need to remove the entry from the xarray before we release the
+ * qp_id to avoid a race of the qp_id being reallocated and failing
+ * on xa_insert
+ */
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ xa_erase(&dev->qps, qp->qp_id);
+
qedr_free_qp_resources(dev, qp, udata);
- if (atomic_dec_and_test(&qp->refcnt) &&
- rdma_protocol_iwarp(&dev->ibdev, 1)) {
- xa_erase_irq(&dev->qps, qp->qp_id);
- kfree(qp);
- }
- return rc;
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ qedr_iw_qp_rem_ref(&qp->ibqp);
+
+ return 0;
}
int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags,
@@ -2598,7 +2849,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
mr->type = QEDR_MR_USER;
- mr->umem = ib_umem_get(udata, start, len, acc, 0);
+ mr->umem = ib_umem_get(ibpd->device, start, len, acc);
if (IS_ERR(mr->umem)) {
rc = -EFAULT;
goto err0;
@@ -2674,8 +2925,8 @@ int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
- if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
- qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
+ if (mr->type != QEDR_MR_DMA)
+ free_mr_info(dev, &mr->info);
/* it could be user registered memory. */
ib_umem_release(mr->umem);
@@ -4107,19 +4358,10 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
}
int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
- u8 port_num,
- const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *mad_hdr,
- size_t in_mad_size, struct ib_mad_hdr *out_mad,
- size_t *out_mad_size, u16 *out_mad_pkey_index)
+ u8 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh, const struct ib_mad *in,
+ struct ib_mad *out_mad, size_t *out_mad_size,
+ u16 *out_mad_pkey_index)
{
- struct qedr_dev *dev = get_qedr_dev(ibdev);
-
- DP_DEBUG(dev, QEDR_MSG_GSI,
- "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
- mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
- mad_hdr->class_specific, mad_hdr->class_version,
- mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
return IB_MAD_RESULT_SUCCESS;
}
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 9aaa90283d6e..18027844eb87 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -35,8 +35,6 @@
int qedr_query_device(struct ib_device *ibdev,
struct ib_device_attr *attr, struct ib_udata *udata);
int qedr_query_port(struct ib_device *, u8 port, struct ib_port_attr *props);
-int qedr_modify_port(struct ib_device *, u8 port, int mask,
- struct ib_port_modify *props);
int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
int index, union ib_gid *gid);
@@ -46,7 +44,8 @@ int qedr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey);
int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata);
void qedr_dealloc_ucontext(struct ib_ucontext *uctx);
-int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
+int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma);
+void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
@@ -93,10 +92,9 @@ int qedr_post_recv(struct ib_qp *, const struct ib_recv_wr *,
const struct ib_recv_wr **bad_wr);
int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
u8 port_num, const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad,
- size_t in_mad_size, struct ib_mad_hdr *out_mad,
- size_t *out_mad_size, u16 *out_mad_pkey_index);
+ const struct ib_grh *in_grh, const struct ib_mad *in_mad,
+ struct ib_mad *out_mad, size_t *out_mad_size,
+ u16 *out_mad_pkey_index);
int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable);
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 27b6e664e59d..b0144229cf3b 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1789,7 +1789,6 @@ static void unlock_expected_tids(struct qib_ctxtdata *rcd)
static int qib_close(struct inode *in, struct file *fp)
{
- int ret = 0;
struct qib_filedata *fd;
struct qib_ctxtdata *rcd;
struct qib_devdata *dd;
@@ -1873,7 +1872,7 @@ static int qib_close(struct inode *in, struct file *fp)
bail:
kfree(fd);
- return ret;
+ return 0;
}
static int qib_ctxt_info(struct file *fp, struct qib_ctxt_info __user *uinfo)
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index 41a569558a15..e336d778e076 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -493,7 +493,7 @@ static int remove_device_files(struct super_block *sb,
remove_file(dir, "flash");
inode_unlock(d_inode(dir));
ret = simple_rmdir(d_inode(root), dir);
- d_delete(dir);
+ d_drop(dir);
dput(dir);
bail:
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 531d8a1db2c3..ca5ea734e3d0 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -1417,7 +1417,6 @@ static void qib_6120_quiet_serdes(struct qib_pportdata *ppd)
*
* The exact combo of LEDs if on is true is determined by looking
* at the ibcstatus.
-
* These LEDs indicate the physical and logical state of IB link.
* For this chip (at least with recommended board pinouts), LED1
* is Yellow (logical state) and LED2 is Green (physical state),
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index dd4843379f51..91d64dd71a8a 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -6630,7 +6630,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
/* vl15 buffers start just after the 4k buffers */
vl15off = dd->physaddr + (dd->piobufbase >> 32) +
dd->piobcnt4k * dd->align4k;
- dd->piovl15base = ioremap_nocache(vl15off,
+ dd->piovl15base = ioremap(vl15off,
NUM_VL15_BUFS * dd->align4k);
if (!dd->piovl15base) {
ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index d4fd8a6cff7b..43c8ee1f46e0 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1759,7 +1759,7 @@ int init_chip_wc_pat(struct qib_devdata *dd, u32 vl15buflen)
qib_userlen = dd->ureg_align * dd->cfgctxts;
/* Sanity checks passed, now create the new mappings */
- qib_kregbase = ioremap_nocache(qib_physaddr, qib_kreglen);
+ qib_kregbase = ioremap(qib_physaddr, qib_kreglen);
if (!qib_kregbase)
goto bail;
@@ -1768,7 +1768,7 @@ int init_chip_wc_pat(struct qib_devdata *dd, u32 vl15buflen)
goto bail_kregbase;
if (qib_userlen) {
- qib_userbase = ioremap_nocache(qib_physaddr + dd->uregbase,
+ qib_userbase = ioremap(qib_physaddr + dd->uregbase,
qib_userlen);
if (!qib_userbase)
goto bail_piobase;
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index f92faf5ec369..79bb83222e8d 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -2098,8 +2098,6 @@ static int cc_get_classportinfo(struct ib_cc_mad *ccp,
struct ib_cc_classportinfo_attr *p =
(struct ib_cc_classportinfo_attr *)ccp->mgmt_data;
- memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
-
p->base_version = 1;
p->class_version = 1;
p->cap_mask = 0;
@@ -2120,8 +2118,6 @@ static int cc_get_congestion_info(struct ib_cc_mad *ccp,
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
- memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
-
p->congestion_info = 0;
p->control_table_cap = ppd->cc_max_table_entries;
@@ -2138,8 +2134,6 @@ static int cc_get_congestion_setting(struct ib_cc_mad *ccp,
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_cc_congestion_entry_shadow *entries;
- memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
-
spin_lock(&ppd->cc_shadow_lock);
entries = ppd->congestion_entries_shadow->entries;
@@ -2176,8 +2170,6 @@ static int cc_get_congestion_control_table(struct ib_cc_mad *ccp,
if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
goto bail;
- memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
-
spin_lock(&ppd->cc_shadow_lock);
max_cct_block =
@@ -2296,18 +2288,11 @@ bail:
return reply_failure((struct ib_smp *) ccp);
}
-static int check_cc_key(struct qib_ibport *ibp,
- struct ib_cc_mad *ccp, int mad_flags)
-{
- return 0;
-}
-
static int process_cc(struct ib_device *ibdev, int mad_flags,
u8 port, const struct ib_mad *in_mad,
struct ib_mad *out_mad)
{
struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad;
- struct qib_ibport *ibp = to_iport(ibdev, port);
int ret;
*out_mad = *in_mad;
@@ -2318,10 +2303,6 @@ static int process_cc(struct ib_device *ibdev, int mad_flags,
goto bail;
}
- ret = check_cc_key(ibp, ccp, mad_flags);
- if (ret)
- goto bail;
-
switch (ccp->method) {
case IB_MGMT_METHOD_GET:
switch (ccp->attr_id) {
@@ -2405,28 +2386,21 @@ bail:
*/
int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index)
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
{
int ret;
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
- const struct ib_mad *in_mad = (const struct ib_mad *)in;
- struct ib_mad *out_mad = (struct ib_mad *)out;
-
- if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
- *out_mad_size != sizeof(*out_mad)))
- return IB_MAD_RESULT_FAILURE;
- switch (in_mad->mad_hdr.mgmt_class) {
+ switch (in->mad_hdr.mgmt_class) {
case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
case IB_MGMT_CLASS_SUBN_LID_ROUTED:
- ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
+ ret = process_subn(ibdev, mad_flags, port, in, out);
goto bail;
case IB_MGMT_CLASS_PERF_MGMT:
- ret = process_perf(ibdev, port, in_mad, out_mad);
+ ret = process_perf(ibdev, port, in, out);
goto bail;
case IB_MGMT_CLASS_CONG_MGMT:
@@ -2435,7 +2409,7 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
ret = IB_MAD_RESULT_SUCCESS;
goto bail;
}
- ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad);
+ ret = process_cc(ibdev, mad_flags, port, in, out);
goto bail;
default:
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 864f2af171f7..3dc6ce033319 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -145,7 +145,7 @@ int qib_pcie_ddinit(struct qib_devdata *dd, struct pci_dev *pdev,
addr = pci_resource_start(pdev, 0);
len = pci_resource_len(pdev, 0);
- dd->kregbase = ioremap_nocache(addr, len);
+ dd->kregbase = ioremap(addr, len);
if (!dd->kregbase)
return -ENOMEM;
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 1d5e2d4ee257..aaf7438258fa 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -313,11 +313,8 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
/* If no credit, return. */
- if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
- qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+ if (!rvt_rc_credit_avail(qp, wqe))
goto bail;
- }
if (len > pmtu) {
qp->s_state = OP(SEND_FIRST);
len = pmtu;
@@ -344,11 +341,8 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
goto no_flow_control;
case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */
- if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
- qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+ if (!rvt_rc_credit_avail(qp, wqe))
goto bail;
- }
no_flow_control:
ohdr->u.rc.reth.vaddr =
cpu_to_be64(wqe->rdma_wr.remote_addr);
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index 905206a0c2d5..568b21eb6ea1 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -301,6 +301,9 @@ static ssize_t qib_portattr_show(struct kobject *kobj,
struct qib_pportdata *ppd =
container_of(kobj, struct qib_pportdata, pport_kobj);
+ if (!pattr->show)
+ return -EIO;
+
return pattr->show(ppd, buf);
}
@@ -312,6 +315,9 @@ static ssize_t qib_portattr_store(struct kobject *kobj,
struct qib_pportdata *ppd =
container_of(kobj, struct qib_pportdata, pport_kobj);
+ if (!pattr->store)
+ return -EIO;
+
return pattr->store(ppd, buf, len);
}
@@ -436,6 +442,7 @@ QIB_DIAGC_ATTR(dmawait);
QIB_DIAGC_ATTR(unaligned);
QIB_DIAGC_ATTR(rc_dupreq);
QIB_DIAGC_ATTR(rc_seqnak);
+QIB_DIAGC_ATTR(rc_crwaits);
static struct attribute *diagc_default_attributes[] = {
&qib_diagc_attr_rc_resends.attr,
@@ -453,6 +460,7 @@ static struct attribute *diagc_default_attributes[] = {
&qib_diagc_attr_unaligned.attr,
&qib_diagc_attr_rc_dupreq.attr,
&qib_diagc_attr_rc_seqnak.attr,
+ &qib_diagc_attr_rc_crwaits.attr,
NULL
};
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index bfbfbb7e0ff4..342e3172ca40 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -40,10 +40,7 @@
static void __qib_release_user_pages(struct page **p, size_t num_pages,
int dirty)
{
- if (dirty)
- put_user_pages_dirty_lock(p, num_pages);
- else
- put_user_pages(p, num_pages);
+ unpin_user_pages_dirty_lock(p, num_pages, dirty);
}
/**
@@ -111,7 +108,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
down_read(&current->mm->mmap_sem);
for (got = 0; got < num_pages; got += ret) {
- ret = get_user_pages(start_page + got * PAGE_SIZE,
+ ret = pin_user_pages(start_page + got * PAGE_SIZE,
num_pages - got,
FOLL_LONGTERM | FOLL_WRITE | FOLL_FORCE,
p + got, NULL);
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index 05190edc2611..a67599b5a550 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -317,7 +317,7 @@ static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
* the caller can ignore this page.
*/
if (put) {
- put_user_page(page);
+ unpin_user_page(page);
} else {
/* coalesce case */
kunmap(page);
@@ -631,7 +631,7 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev,
kunmap(pkt->addr[i].page);
if (pkt->addr[i].put_page)
- put_user_page(pkt->addr[i].page);
+ unpin_user_page(pkt->addr[i].page);
else
__free_page(pkt->addr[i].page);
} else if (pkt->addr[i].kvaddr) {
@@ -670,7 +670,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
else
j = npages;
- ret = get_user_pages_fast(addr, j, FOLL_LONGTERM, pages);
+ ret = pin_user_pages_fast(addr, j, FOLL_LONGTERM, pages);
if (ret != j) {
i = 0;
j = ret;
@@ -706,7 +706,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
/* if error, return all pages not managed by pkt */
free_pages:
while (i < j)
- put_user_page(pages[i++]);
+ unpin_user_page(pages[i++]);
done:
return ret;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 17bdf8acee2f..8bf414b47b96 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -245,9 +245,8 @@ void qib_sys_guid_chg(struct qib_ibport *ibp);
void qib_node_desc_chg(struct qib_ibport *ibp);
int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index);
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx);
void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 03f54eb9404b..c9abe1c01e4e 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -89,9 +89,15 @@ static void usnic_ib_dump_vf(struct usnic_ib_vf *vf, char *buf, int buf_sz)
void usnic_ib_log_vf(struct usnic_ib_vf *vf)
{
- char buf[1000];
- usnic_ib_dump_vf(vf, buf, sizeof(buf));
+ char *buf = kzalloc(1000, GFP_KERNEL);
+
+ if (!buf)
+ return;
+
+ usnic_ib_dump_vf(vf, buf, 1000);
usnic_dbg("%s\n", buf);
+
+ kfree(buf);
}
/* Start of netdev section */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index eeb07b245ef9..556b8e44a51c 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -194,7 +194,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
return ERR_CAST(dev_list);
for (i = 0; dev_list[i]; i++) {
dev = dev_list[i];
- vf = pci_get_drvdata(to_pci_dev(dev));
+ vf = dev_get_drvdata(dev);
spin_lock(&vf->lock);
vnic = vf->vnic;
if (!usnic_vnic_check_room(vnic, res_spec)) {
@@ -356,13 +356,14 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
if (!us_ibdev->ufdev->link_up) {
props->state = IB_PORT_DOWN;
- props->phys_state = 3;
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
} else if (!us_ibdev->ufdev->inaddr) {
props->state = IB_PORT_INIT;
- props->phys_state = 4;
+ props->phys_state =
+ IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
} else {
props->state = IB_PORT_ACTIVE;
- props->phys_state = 5;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
}
props->port_cap_flags = 0;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 0b0237d41613..bd9f944b68fc 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -75,10 +75,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
for_each_sg(chunk->page_list, sg, chunk->nents, i) {
page = sg_page(sg);
pa = sg_phys(sg);
- if (dirty)
- put_user_pages_dirty_lock(&page, 1);
- else
- put_user_page(page);
+ unpin_user_pages_dirty_lock(&page, 1, dirty);
usnic_dbg("pa: %pa\n", &pa);
}
kfree(chunk);
@@ -144,7 +141,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
ret = 0;
while (npages) {
- ret = get_user_pages(cur_base,
+ ret = pin_user_pages(cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof(struct page *)),
gup_flags | FOLL_LONGTERM,
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index 7800e6930502..4f6cc0de7ef9 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -135,8 +135,8 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
goto err_cq;
}
- cq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size,
- IB_ACCESS_LOCAL_WRITE, 1);
+ cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, ucmd.buf_size,
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(cq->umem)) {
ret = PTR_ERR(cq->umem);
goto err_cq;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
index 8f9749d54688..86a6c054ea26 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
@@ -58,7 +58,8 @@
#define PVRDMA_ROCEV1_VERSION 17
#define PVRDMA_ROCEV2_VERSION 18
#define PVRDMA_PPN64_VERSION 19
-#define PVRDMA_VERSION PVRDMA_PPN64_VERSION
+#define PVRDMA_QPHANDLE_VERSION 20
+#define PVRDMA_VERSION PVRDMA_QPHANDLE_VERSION
#define PVRDMA_BOARD_ID 1
#define PVRDMA_REV_ID 1
@@ -581,6 +582,17 @@ struct pvrdma_cmd_create_qp_resp {
u32 max_inline_data;
};
+struct pvrdma_cmd_create_qp_resp_v2 {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u32 qpn;
+ u32 qp_handle;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 max_inline_data;
+};
+
struct pvrdma_cmd_modify_qp {
struct pvrdma_cmd_hdr hdr;
u32 qp_handle;
@@ -663,6 +675,7 @@ union pvrdma_cmd_resp {
struct pvrdma_cmd_create_cq_resp create_cq_resp;
struct pvrdma_cmd_resize_cq_resp resize_cq_resp;
struct pvrdma_cmd_create_qp_resp create_qp_resp;
+ struct pvrdma_cmd_create_qp_resp_v2 create_qp_resp_v2;
struct pvrdma_cmd_query_qp_resp query_qp_resp;
struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp;
struct pvrdma_cmd_create_srq_resp create_srq_resp;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
index f3a3d22ee8d7..b039f1f00e05 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
@@ -126,7 +126,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(-EINVAL);
}
- umem = ib_umem_get(udata, start, length, access_flags, 0);
+ umem = ib_umem_get(pd->device, start, length, access_flags);
if (IS_ERR(umem)) {
dev_warn(&dev->pdev->dev,
"could not get umem for mem region\n");
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index bca6a58a442e..9de1281f9a3b 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -52,6 +52,9 @@
#include "pvrdma.h"
+static void __pvrdma_destroy_qp(struct pvrdma_dev *dev,
+ struct pvrdma_qp *qp);
+
static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq,
struct pvrdma_cq **recv_cq)
{
@@ -195,7 +198,9 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
union pvrdma_cmd_resp rsp;
struct pvrdma_cmd_create_qp *cmd = &req.create_qp;
struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp;
+ struct pvrdma_cmd_create_qp_resp_v2 *resp_v2 = &rsp.create_qp_resp_v2;
struct pvrdma_create_qp ucmd;
+ struct pvrdma_create_qp_resp qp_resp = {};
unsigned long flags;
int ret;
bool is_srq = !!init_attr->srq;
@@ -260,10 +265,20 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
goto err_qp;
}
+ /* Userspace supports qpn and qp handles? */
+ if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION &&
+ udata->outlen < sizeof(qp_resp)) {
+ dev_warn(&dev->pdev->dev,
+ "create queuepair not supported\n");
+ ret = -EOPNOTSUPP;
+ goto err_qp;
+ }
+
if (!is_srq) {
/* set qp->sq.wqe_cnt, shift, buf_size.. */
- qp->rumem = ib_umem_get(udata, ucmd.rbuf_addr,
- ucmd.rbuf_size, 0, 0);
+ qp->rumem =
+ ib_umem_get(pd->device, ucmd.rbuf_addr,
+ ucmd.rbuf_size, 0);
if (IS_ERR(qp->rumem)) {
ret = PTR_ERR(qp->rumem);
goto err_qp;
@@ -274,8 +289,8 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
qp->srq = to_vsrq(init_attr->srq);
}
- qp->sumem = ib_umem_get(udata, ucmd.sbuf_addr,
- ucmd.sbuf_size, 0, 0);
+ qp->sumem = ib_umem_get(pd->device, ucmd.sbuf_addr,
+ ucmd.sbuf_size, 0);
if (IS_ERR(qp->sumem)) {
if (!is_srq)
ib_umem_release(qp->rumem);
@@ -379,13 +394,33 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
}
/* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */
- qp->qp_handle = resp->qpn;
qp->port = init_attr->port_num;
- qp->ibqp.qp_num = resp->qpn;
+
+ if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION) {
+ qp->ibqp.qp_num = resp_v2->qpn;
+ qp->qp_handle = resp_v2->qp_handle;
+ } else {
+ qp->ibqp.qp_num = resp->qpn;
+ qp->qp_handle = resp->qpn;
+ }
+
spin_lock_irqsave(&dev->qp_tbl_lock, flags);
dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp;
spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
+ if (udata) {
+ qp_resp.qpn = qp->ibqp.qp_num;
+ qp_resp.qp_handle = qp->qp_handle;
+
+ if (ib_copy_to_udata(udata, &qp_resp,
+ min(udata->outlen, sizeof(qp_resp)))) {
+ dev_warn(&dev->pdev->dev,
+ "failed to copy back udata\n");
+ __pvrdma_destroy_qp(dev, qp);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
return &qp->ibqp;
err_pdir:
@@ -400,27 +435,15 @@ err_qp:
return ERR_PTR(ret);
}
-static void pvrdma_free_qp(struct pvrdma_qp *qp)
+static void _pvrdma_free_qp(struct pvrdma_qp *qp)
{
+ unsigned long flags;
struct pvrdma_dev *dev = to_vdev(qp->ibqp.device);
- struct pvrdma_cq *scq;
- struct pvrdma_cq *rcq;
- unsigned long flags, scq_flags, rcq_flags;
-
- /* In case cq is polling */
- get_cqs(qp, &scq, &rcq);
- pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
-
- _pvrdma_flush_cqe(qp, scq);
- if (scq != rcq)
- _pvrdma_flush_cqe(qp, rcq);
spin_lock_irqsave(&dev->qp_tbl_lock, flags);
dev->qp_tbl[qp->qp_handle] = NULL;
spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
- pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
-
if (refcount_dec_and_test(&qp->refcnt))
complete(&qp->free);
wait_for_completion(&qp->free);
@@ -435,34 +458,71 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp)
atomic_dec(&dev->num_qps);
}
-/**
- * pvrdma_destroy_qp - destroy a queue pair
- * @qp: the queue pair to destroy
- * @udata: user data or null for kernel object
- *
- * @return: 0 on success.
- */
-int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
+static void pvrdma_free_qp(struct pvrdma_qp *qp)
+{
+ struct pvrdma_cq *scq;
+ struct pvrdma_cq *rcq;
+ unsigned long scq_flags, rcq_flags;
+
+ /* In case cq is polling */
+ get_cqs(qp, &scq, &rcq);
+ pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
+
+ _pvrdma_flush_cqe(qp, scq);
+ if (scq != rcq)
+ _pvrdma_flush_cqe(qp, rcq);
+
+ /*
+ * We're now unlocking the CQs before clearing out the qp handle this
+ * should still be safe. We have destroyed the backend QP and flushed
+ * the CQEs so there should be no other completions for this QP.
+ */
+ pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
+
+ _pvrdma_free_qp(qp);
+}
+
+static inline void _pvrdma_destroy_qp_work(struct pvrdma_dev *dev,
+ u32 qp_handle)
{
- struct pvrdma_qp *vqp = to_vqp(qp);
union pvrdma_cmd_req req;
struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp;
int ret;
memset(cmd, 0, sizeof(*cmd));
cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP;
- cmd->qp_handle = vqp->qp_handle;
+ cmd->qp_handle = qp_handle;
- ret = pvrdma_cmd_post(to_vdev(qp->device), &req, NULL, 0);
+ ret = pvrdma_cmd_post(dev, &req, NULL, 0);
if (ret < 0)
- dev_warn(&to_vdev(qp->device)->pdev->dev,
+ dev_warn(&dev->pdev->dev,
"destroy queuepair failed, error: %d\n", ret);
+}
+/**
+ * pvrdma_destroy_qp - destroy a queue pair
+ * @qp: the queue pair to destroy
+ * @udata: user data or null for kernel object
+ *
+ * @return: always 0.
+ */
+int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
+{
+ struct pvrdma_qp *vqp = to_vqp(qp);
+
+ _pvrdma_destroy_qp_work(to_vdev(qp->device), vqp->qp_handle);
pvrdma_free_qp(vqp);
return 0;
}
+static void __pvrdma_destroy_qp(struct pvrdma_dev *dev,
+ struct pvrdma_qp *qp)
+{
+ _pvrdma_destroy_qp_work(dev, qp->qp_handle);
+ _pvrdma_free_qp(qp);
+}
+
/**
* pvrdma_modify_qp - modify queue pair attributes
* @ibqp: the queue pair
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index 6cac0c88cf39..d330decfb80a 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -146,7 +146,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
goto err_srq;
}
- srq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, 0, 0);
+ srq->umem = ib_umem_get(ibsrq->device, ucmd.buf_addr, ucmd.buf_size, 0);
if (IS_ERR(srq->umem)) {
ret = PTR_ERR(srq->umem);
goto err_srq;
@@ -230,8 +230,6 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq)
pvrdma_page_dir_cleanup(dev, &srq->pdir);
- kfree(srq);
-
atomic_dec(&dev->num_srqs);
}
OpenPOWER on IntegriCloud