summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig3
-rw-r--r--drivers/infiniband/core/Makefile7
-rw-r--r--drivers/infiniband/core/addr.c67
-rw-r--r--drivers/infiniband/core/agent.c4
-rw-r--r--drivers/infiniband/core/cache.c205
-rw-r--r--drivers/infiniband/core/cgroup.c62
-rw-r--r--drivers/infiniband/core/cm.c117
-rw-r--r--drivers/infiniband/core/cma.c338
-rw-r--r--drivers/infiniband/core/cma_configfs.c42
-rw-r--r--drivers/infiniband/core/core_priv.h158
-rw-r--r--drivers/infiniband/core/cq.c16
-rw-r--r--drivers/infiniband/core/device.c152
-rw-r--r--drivers/infiniband/core/fmr_pool.c49
-rw-r--r--drivers/infiniband/core/iwpm_util.c6
-rw-r--r--drivers/infiniband/core/mad.c84
-rw-r--r--drivers/infiniband/core/mad_rmpp.c16
-rw-r--r--drivers/infiniband/core/multicast.c27
-rw-r--r--drivers/infiniband/core/netlink.c7
-rw-r--r--drivers/infiniband/core/rdma_core.c627
-rw-r--r--drivers/infiniband/core/rdma_core.h78
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c39
-rw-r--r--drivers/infiniband/core/sa_query.c889
-rw-r--r--drivers/infiniband/core/security.c709
-rw-r--r--drivers/infiniband/core/sysfs.c8
-rw-r--r--drivers/infiniband/core/ucm.c43
-rw-r--r--drivers/infiniband/core/ucma.c24
-rw-r--r--drivers/infiniband/core/umem.c25
-rw-r--r--drivers/infiniband/core/umem_odp.c169
-rw-r--r--drivers/infiniband/core/umem_rbtree.c21
-rw-r--r--drivers/infiniband/core/user_mad.c52
-rw-r--r--drivers/infiniband/core/uverbs.h70
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c1594
-rw-r--r--drivers/infiniband/core/uverbs_main.c500
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c85
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c275
-rw-r--r--drivers/infiniband/core/verbs.c198
-rw-r--r--drivers/infiniband/hw/Makefile1
-rw-r--r--drivers/infiniband/hw/bnxt_re/Kconfig9
-rw-r--r--drivers/infiniband/hw/bnxt_re/Makefile6
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h159
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c3452
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h206
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c1312
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c2210
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h456
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c708
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h210
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c825
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h227
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c687
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h165
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h2821
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_dbg.c35
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c209
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h7
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c25
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c19
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c300
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h18
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c21
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_ev.c26
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c138
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h9
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c75
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c507
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c86
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c286
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c39
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h50
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c50
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c54
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c109
-rw-r--r--drivers/infiniband/hw/cxgb4/resource.c64
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h24
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile2
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c2
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.h15
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c753
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h30
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h2
-rw-r--r--drivers/infiniband/hw/hfi1/common.h19
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c277
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.h62
-rw-r--r--drivers/infiniband/hw/hfi1/device.c2
-rw-r--r--drivers/infiniband/hw/hfi1/dma.c183
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c295
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.c26
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c447
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c14
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h230
-rw-r--r--drivers/infiniband/hw/hfi1/init.c119
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c33
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c97
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c50
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c19
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h34
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c200
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h25
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c365
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c258
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c45
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h46
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c7
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c9
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ctxts.h17
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h8
-rw-r--r--drivers/infiniband/hw/hfi1/trace_misc.h48
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rc.h7
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tx.h77
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c30
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c91
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c211
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.h17
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c7
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c218
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h18
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c272
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h44
-rw-r--r--drivers/infiniband/hw/hfi1/vnic.h183
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c904
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c323
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c62
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c6
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h5
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c197
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c13
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c22
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c5
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c13
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c151
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c82
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_osdep.h1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c5
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_type.h2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c34
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c95
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c40
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.h2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_virtchnl.c5
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c134
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c1
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c14
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c8
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c88
-rw-r--r--drivers/infiniband/hw/mlx4/main.c72
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c13
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h9
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c12
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c214
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c10
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c1
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c74
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c59
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h (renamed from drivers/infiniband/hw/qedr/qedr_hsi.h)36
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c37
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c20
-rw-r--r--drivers/infiniband/hw/mlx5/main.c1138
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c45
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h186
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c688
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c1073
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c728
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c13
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c69
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c17
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c24
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c18
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c94
-rw-r--r--drivers/infiniband/hw/nes/nes.c80
-rw-r--r--drivers/infiniband/hw/nes/nes.h1
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c29
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c11
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.c5
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c18
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h6
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c68
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h10
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c26
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c11
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c76
-rw-r--r--drivers/infiniband/hw/qedr/main.c112
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h34
-rw-r--r--drivers/infiniband/hw/qedr/qedr_cm.c274
-rw-r--r--drivers/infiniband/hw/qedr/qedr_cm.h3
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c897
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h4
-rw-r--r--drivers/infiniband/hw/qib/qib_dma.c169
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c7
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c15
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c15
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c7
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c152
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.h1
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c214
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c124
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c21
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c65
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c136
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h18
-rw-r--r--drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_common_util.h38
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.h3
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c6
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c7
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c54
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h19
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h8
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c186
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c43
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c55
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c34
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/Kconfig1
-rw-r--r--drivers/infiniband/sw/rdmavt/Makefile4
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c39
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.h6
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c3
-rw-r--r--drivers/infiniband/sw/rdmavt/dma.c198
-rw-r--r--drivers/infiniband/sw/rdmavt/mad.c8
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.c61
-rw-r--r--drivers/infiniband/sw/rdmavt/mmap.c4
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c122
-rw-r--r--drivers/infiniband/sw/rdmavt/pd.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c329
-rw-r--r--drivers/infiniband/sw/rdmavt/rc.c189
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.h4
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_cq.h127
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_rc.h (renamed from drivers/infiniband/sw/rdmavt/dma.h)68
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_tx.h34
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c11
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.h1
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig4
-rw-r--r--drivers/infiniband/sw/rxe/Makefile4
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h23
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c32
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c105
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_dma.c183
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hdr.h12
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.c78
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.h61
-rw-r--r--drivers/infiniband/sw/rxe/rxe_icrc.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h39
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mcast.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mmap.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c30
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c135
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c14
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c42
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c40
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c87
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c70
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h33
-rw-r--r--drivers/infiniband/ulp/Makefile1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h50
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c135
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c67
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c13
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c375
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c576
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c130
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c23
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c64
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c36
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c14
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h2
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c8
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c20
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c69
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h3
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Kconfig8
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Makefile7
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c475
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h489
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c187
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h329
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c387
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c1056
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c390
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c109
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h3
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c159
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h18
307 files changed, 33669 insertions, 11081 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 670917387eda..234fe01904e7 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -85,6 +85,7 @@ source "drivers/infiniband/ulp/srpt/Kconfig"
source "drivers/infiniband/ulp/iser/Kconfig"
source "drivers/infiniband/ulp/isert/Kconfig"
+source "drivers/infiniband/ulp/opa_vnic/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
source "drivers/infiniband/sw/rxe/Kconfig"
@@ -92,4 +93,6 @@ source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/qedr/Kconfig"
+source "drivers/infiniband/hw/bnxt_re/Kconfig"
+
endif # INFINIBAND
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index edaae9f9853c..e3cdafff8ece 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -10,9 +10,11 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
- multicast.o mad.o smi.o agent.o mad_rmpp.o
+ multicast.o mad.o smi.o agent.o mad_rmpp.o \
+ security.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
+ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
ib_cm-y := cm.o
@@ -28,4 +30,5 @@ ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
-ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
+ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
+ rdma_core.o uverbs_std_types.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 0f58f46dbad7..01236cef7bfb 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -88,7 +88,7 @@ static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
return false;
ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
- nlmsg_len(nlh), ib_nl_addr_policy);
+ nlmsg_len(nlh), ib_nl_addr_policy, NULL);
if (ret)
return false;
@@ -179,8 +179,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
}
/* Construct the family header first */
- header = (struct rdma_ls_ip_resolve_header *)
- skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
+ header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
header->ifindex = dev_addr->bound_dev_if;
nla_put(skb, attrtype, size, daddr);
@@ -269,6 +268,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
return ret;
ret = rdma_copy_addr(dev_addr, dev, NULL);
+ dev_addr->bound_dev_if = dev->ifindex;
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
@@ -281,6 +281,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
&((const struct sockaddr_in6 *)addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
+ dev_addr->bound_dev_if = dev->ifindex;
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
break;
@@ -406,10 +407,10 @@ static int addr4_resolve(struct sockaddr_in *src_in,
fl4.saddr = src_ip;
fl4.flowi4_oif = addr->bound_dev_if;
rt = ip_route_output_key(addr->net, &fl4);
- if (IS_ERR(rt)) {
- ret = PTR_ERR(rt);
- goto out;
- }
+ ret = PTR_ERR_OR_ZERO(rt);
+ if (ret)
+ return ret;
+
src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = fl4.saddr;
@@ -424,8 +425,6 @@ static int addr4_resolve(struct sockaddr_in *src_in,
*prt = rt;
return 0;
-out:
- return ret;
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -444,17 +443,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
fl6.saddr = src_in->sin6_addr;
fl6.flowi6_oif = addr->bound_dev_if;
- dst = ip6_route_output(addr->net, NULL, &fl6);
- if ((ret = dst->error))
- goto put;
+ ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
+ if (ret < 0)
+ return ret;
rt = (struct rt6_info *)dst;
- if (ipv6_addr_any(&fl6.saddr)) {
- ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
- &fl6.daddr, 0, &fl6.saddr);
- if (ret)
- goto put;
-
+ if (ipv6_addr_any(&src_in->sin6_addr)) {
src_in->sin6_family = AF_INET6;
src_in->sin6_addr = fl6.saddr;
}
@@ -471,9 +465,6 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
*pdst = dst;
return 0;
-put:
- dst_release(dst);
- return ret;
}
#else
static int addr6_resolve(struct sockaddr_in6 *src_in,
@@ -518,6 +509,11 @@ static int addr_resolve(struct sockaddr *src_in,
struct dst_entry *dst;
int ret;
+ if (!addr->net) {
+ pr_warn_ratelimited("%s: missing namespace\n", __func__);
+ return -EINVAL;
+ }
+
if (src_in->sa_family == AF_INET) {
struct rtable *rt = NULL;
const struct sockaddr_in *dst_in4 =
@@ -531,8 +527,12 @@ static int addr_resolve(struct sockaddr *src_in,
if (resolve_neigh)
ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
- ndev = rt->dst.dev;
- dev_hold(ndev);
+ if (addr->bound_dev_if) {
+ ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
+ } else {
+ ndev = rt->dst.dev;
+ dev_hold(ndev);
+ }
ip_rt_put(rt);
} else {
@@ -548,14 +548,27 @@ static int addr_resolve(struct sockaddr *src_in,
if (resolve_neigh)
ret = addr_resolve_neigh(dst, dst_in, addr, seq);
- ndev = dst->dev;
- dev_hold(ndev);
+ if (addr->bound_dev_if) {
+ ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
+ } else {
+ ndev = dst->dev;
+ dev_hold(ndev);
+ }
dst_release(dst);
}
- addr->bound_dev_if = ndev->ifindex;
- addr->net = dev_net(ndev);
+ if (ndev->flags & IFF_LOOPBACK) {
+ ret = rdma_translate_ip(dst_in, addr, NULL);
+ /*
+ * Put the loopback device and get the translated
+ * device instead.
+ */
+ dev_put(ndev);
+ ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
+ } else {
+ addr->bound_dev_if = ndev->ifindex;
+ }
dev_put(ndev);
return ret;
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 11dacd97a667..324ef85a13b6 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -137,13 +137,13 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *
err2:
ib_free_send_mad(send_buf);
err1:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
}
static void agent_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
- ib_destroy_ah(mad_send_wc->send_buf->ah);
+ rdma_destroy_ah(mad_send_wc->send_buf->ah);
ib_free_send_mad(mad_send_wc->send_buf);
}
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index ae04826e82fc..efc94304dee3 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -53,6 +53,7 @@ struct ib_update_work {
struct work_struct work;
struct ib_device *device;
u8 port_num;
+ bool enforce_security;
};
union ib_gid zgid;
@@ -314,14 +315,13 @@ static void make_default_gid(struct net_device *dev, union ib_gid *gid)
int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
int ret = 0;
struct net_device *idev;
int empty;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
if (!memcmp(gid, &zgid, sizeof(*gid)))
return -EINVAL;
@@ -369,11 +369,10 @@ out_unlock:
int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
write_lock_irq(&table->rwlock);
@@ -399,12 +398,11 @@ out_unlock:
int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
struct net_device *ndev)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
bool deleted = false;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
write_lock_irq(&table->rwlock);
@@ -428,10 +426,9 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
if (index < 0 || index >= table->sz)
return -EINVAL;
@@ -455,14 +452,13 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
unsigned long mask,
u8 *port, u16 *index)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
u8 p;
int local_index;
unsigned long flags;
for (p = 0; p < ib_dev->phys_port_cnt; p++) {
- table = ports_table[p];
+ table = ib_dev->cache.ports[p].gid;
read_lock_irqsave(&table->rwlock, flags);
local_index = find_gid(table, gid, val, false, mask, NULL);
if (local_index >= 0) {
@@ -503,18 +499,16 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
u16 *index)
{
int local_index;
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned long mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE;
struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
unsigned long flags;
- if (port < rdma_start_port(ib_dev) ||
- port > rdma_end_port(ib_dev))
+ if (!rdma_is_port_valid(ib_dev, port))
return -ENOENT;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -562,21 +556,17 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
void *context,
u16 *index)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned int i;
unsigned long flags;
bool found = false;
- if (!ports_table)
- return -EOPNOTSUPP;
- if (port < rdma_start_port(ib_dev) ||
- port > rdma_end_port(ib_dev) ||
+ if (!rdma_is_port_valid(ib_dev, port) ||
!rdma_protocol_roce(ib_dev, port))
return -EPROTONOSUPPORT;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
read_lock_irqsave(&table->rwlock, flags);
for (i = 0; i < table->sz; i++) {
@@ -668,14 +658,13 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
union ib_gid gid;
struct ib_gid_attr gid_attr;
struct ib_gid_attr zattr_type = zattr;
struct ib_gid_table *table;
unsigned int gid_type;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
make_default_gid(ndev, &gid);
memset(&gid_attr, 0, sizeof(gid_attr));
@@ -766,71 +755,64 @@ static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
static int _gid_table_setup_one(struct ib_device *ib_dev)
{
u8 port;
- struct ib_gid_table **table;
+ struct ib_gid_table *table;
int err = 0;
- table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
-
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
u8 rdma_port = port + rdma_start_port(ib_dev);
- table[port] =
+ table =
alloc_gid_table(
ib_dev->port_immutable[rdma_port].gid_tbl_len);
- if (!table[port]) {
+ if (!table) {
err = -ENOMEM;
goto rollback_table_setup;
}
err = gid_table_reserve_default(ib_dev,
port + rdma_start_port(ib_dev),
- table[port]);
+ table);
if (err)
goto rollback_table_setup;
+ ib_dev->cache.ports[port].gid = table;
}
- ib_dev->cache.gid_cache = table;
return 0;
rollback_table_setup:
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
+
cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
- table[port]);
- release_gid_table(table[port]);
+ table);
+ release_gid_table(table);
}
- kfree(table);
return err;
}
static void gid_table_release_one(struct ib_device *ib_dev)
{
- struct ib_gid_table **table = ib_dev->cache.gid_cache;
+ struct ib_gid_table *table;
u8 port;
- if (!table)
- return;
-
- for (port = 0; port < ib_dev->phys_port_cnt; port++)
- release_gid_table(table[port]);
-
- kfree(table);
- ib_dev->cache.gid_cache = NULL;
+ for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
+ release_gid_table(table);
+ ib_dev->cache.ports[port].gid = NULL;
+ }
}
static void gid_table_cleanup_one(struct ib_device *ib_dev)
{
- struct ib_gid_table **table = ib_dev->cache.gid_cache;
+ struct ib_gid_table *table;
u8 port;
- if (!table)
- return;
-
- for (port = 0; port < ib_dev->phys_port_cnt; port++)
+ for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
- table[port]);
+ table);
+ }
}
static int gid_table_setup_one(struct ib_device *ib_dev)
@@ -860,12 +842,12 @@ int ib_get_cached_gid(struct ib_device *device,
{
int res;
unsigned long flags;
- struct ib_gid_table **ports_table = device->cache.gid_cache;
- struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)];
+ struct ib_gid_table *table;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
+ table = device->cache.ports[port_num - rdma_start_port(device)].gid;
read_lock_irqsave(&table->rwlock, flags);
res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
read_unlock_irqrestore(&table->rwlock, flags);
@@ -912,12 +894,12 @@ int ib_get_cached_pkey(struct ib_device *device,
unsigned long flags;
int ret = 0;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
if (index < 0 || index >= cache->table_len)
ret = -EINVAL;
@@ -930,6 +912,26 @@ int ib_get_cached_pkey(struct ib_device *device,
}
EXPORT_SYMBOL(ib_get_cached_pkey);
+int ib_get_cached_subnet_prefix(struct ib_device *device,
+ u8 port_num,
+ u64 *sn_pfx)
+{
+ unsigned long flags;
+ int p;
+
+ if (port_num < rdma_start_port(device) ||
+ port_num > rdma_end_port(device))
+ return -EINVAL;
+
+ p = port_num - rdma_start_port(device);
+ read_lock_irqsave(&device->cache.lock, flags);
+ *sn_pfx = device->cache.ports[p].subnet_prefix;
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_get_cached_subnet_prefix);
+
int ib_find_cached_pkey(struct ib_device *device,
u8 port_num,
u16 pkey,
@@ -941,12 +943,12 @@ int ib_find_cached_pkey(struct ib_device *device,
int ret = -ENOENT;
int partial_ix = -1;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
*index = -1;
@@ -981,12 +983,12 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
int i;
int ret = -ENOENT;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
*index = -1;
@@ -1010,19 +1012,39 @@ int ib_get_cached_lmc(struct ib_device *device,
unsigned long flags;
int ret = 0;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)];
+ *lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_cached_lmc);
+int ib_get_cached_port_state(struct ib_device *device,
+ u8 port_num,
+ enum ib_port_state *port_state)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+ *port_state = device->cache.ports[port_num
+ - rdma_start_port(device)].port_state;
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_get_cached_port_state);
+
static void ib_cache_update(struct ib_device *device,
- u8 port)
+ u8 port,
+ bool enforce_security)
{
struct ib_port_attr *tprops = NULL;
struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
@@ -1033,14 +1055,13 @@ static void ib_cache_update(struct ib_device *device,
int i;
int ret;
struct ib_gid_table *table;
- struct ib_gid_table **ports_table = device->cache.gid_cache;
bool use_roce_gid_table =
rdma_cap_roce_gid_table(device, port);
- if (port < rdma_start_port(device) || port > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port))
return;
- table = ports_table[port - rdma_start_port(device)];
+ table = device->cache.ports[port - rdma_start_port(device)].gid;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
@@ -1092,9 +1113,10 @@ static void ib_cache_update(struct ib_device *device,
write_lock_irq(&device->cache.lock);
- old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)];
+ old_pkey_cache = device->cache.ports[port -
+ rdma_start_port(device)].pkey;
- device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
+ device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
if (!use_roce_gid_table) {
write_lock(&table->rwlock);
for (i = 0; i < gid_cache->table_len; i++) {
@@ -1104,10 +1126,19 @@ static void ib_cache_update(struct ib_device *device,
write_unlock(&table->rwlock);
}
- device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
+ device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
+ device->cache.ports[port - rdma_start_port(device)].port_state =
+ tprops->state;
+ device->cache.ports[port - rdma_start_port(device)].subnet_prefix =
+ tprops->subnet_prefix;
write_unlock_irq(&device->cache.lock);
+ if (enforce_security)
+ ib_security_cache_change(device,
+ port,
+ tprops->subnet_prefix);
+
kfree(gid_cache);
kfree(old_pkey_cache);
kfree(tprops);
@@ -1124,7 +1155,9 @@ static void ib_cache_task(struct work_struct *_work)
struct ib_update_work *work =
container_of(_work, struct ib_update_work, work);
- ib_cache_update(work->device, work->port_num);
+ ib_cache_update(work->device,
+ work->port_num,
+ work->enforce_security);
kfree(work);
}
@@ -1145,6 +1178,12 @@ static void ib_cache_event(struct ib_event_handler *handler,
INIT_WORK(&work->work, ib_cache_task);
work->device = event->device;
work->port_num = event->element.port_num;
+ if (event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_GID_CHANGE)
+ work->enforce_security = true;
+ else
+ work->enforce_security = false;
+
queue_work(ib_wq, &work->work);
}
}
@@ -1157,25 +1196,20 @@ int ib_cache_setup_one(struct ib_device *device)
rwlock_init(&device->cache.lock);
- device->cache.pkey_cache =
- kzalloc(sizeof *device->cache.pkey_cache *
+ device->cache.ports =
+ kzalloc(sizeof(*device->cache.ports) *
(rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
- device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
- (rdma_end_port(device) -
- rdma_start_port(device) + 1),
- GFP_KERNEL);
- if (!device->cache.pkey_cache ||
- !device->cache.lmc_cache) {
+ if (!device->cache.ports) {
err = -ENOMEM;
- goto free;
+ goto out;
}
err = gid_table_setup_one(device);
if (err)
- goto free;
+ goto out;
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
- ib_cache_update(device, p + rdma_start_port(device));
+ ib_cache_update(device, p + rdma_start_port(device), true);
INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
device, ib_cache_event);
@@ -1187,9 +1221,7 @@ int ib_cache_setup_one(struct ib_device *device)
err:
gid_table_cleanup_one(device);
-free:
- kfree(device->cache.pkey_cache);
- kfree(device->cache.lmc_cache);
+out:
return err;
}
@@ -1203,14 +1235,11 @@ void ib_cache_release_one(struct ib_device *device)
* all the device's resources when the cache could no
* longer be accessed.
*/
- if (device->cache.pkey_cache)
- for (p = 0;
- p <= rdma_end_port(device) - rdma_start_port(device); ++p)
- kfree(device->cache.pkey_cache[p]);
+ for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
+ kfree(device->cache.ports[p].pkey);
gid_table_release_one(device);
- kfree(device->cache.pkey_cache);
- kfree(device->cache.lmc_cache);
+ kfree(device->cache.ports);
}
void ib_cache_cleanup_one(struct ib_device *device)
diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c
new file mode 100644
index 000000000000..126ac5f99db7
--- /dev/null
+++ b/drivers/infiniband/core/cgroup.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include "core_priv.h"
+
+/**
+ * ib_device_register_rdmacg - register with rdma cgroup.
+ * @device: device to register to participate in resource
+ * accounting by rdma cgroup.
+ *
+ * Register with the rdma cgroup. Should be called before
+ * exposing rdma device to user space applications to avoid
+ * resource accounting leak.
+ * Returns 0 on success or otherwise failure code.
+ */
+int ib_device_register_rdmacg(struct ib_device *device)
+{
+ device->cg_device.name = device->name;
+ return rdmacg_register_device(&device->cg_device);
+}
+
+/**
+ * ib_device_unregister_rdmacg - unregister with rdma cgroup.
+ * @device: device to unregister.
+ *
+ * Unregister with the rdma cgroup. Should be called after
+ * all the resources are deallocated, and after a stage when any
+ * other resource allocation by user application cannot be done
+ * for this device to avoid any leak in accounting.
+ */
+void ib_device_unregister_rdmacg(struct ib_device *device)
+{
+ rdmacg_unregister_device(&device->cg_device);
+}
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{
+ return rdmacg_try_charge(&cg_obj->cg, &device->cg_device,
+ resource_index);
+}
+EXPORT_SYMBOL(ib_rdmacg_try_charge);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{
+ rdmacg_uncharge(cg_obj->cg, &device->cg_device,
+ resource_index);
+}
+EXPORT_SYMBOL(ib_rdmacg_uncharge);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index cf1edfa1cbac..2b4d613a3474 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -228,7 +228,7 @@ struct cm_device {
struct cm_av {
struct cm_port *port;
union ib_gid dgid;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
u16 pkey_index;
u8 timeout;
};
@@ -241,7 +241,7 @@ struct cm_work {
__be32 local_id; /* Established / timewait */
__be32 remote_id;
struct ib_cm_event cm_event;
- struct ib_sa_path_rec path[0];
+ struct sa_path_rec path[0];
};
struct cm_timewait_info {
@@ -343,7 +343,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
ret = -ENODEV;
goto out;
}
- ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
+ ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto out;
@@ -355,7 +355,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
ret = PTR_ERR(m);
goto out;
}
@@ -390,7 +390,7 @@ static int cm_alloc_response_msg(struct cm_port *port,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
return PTR_ERR(m);
}
m->ah = ah;
@@ -400,7 +400,7 @@ static int cm_alloc_response_msg(struct cm_port *port,
static void cm_free_msg(struct ib_mad_send_buf *msg)
{
- ib_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah);
if (msg->context[0])
cm_deref_id(msg->context[0]);
ib_free_send_mad(msg);
@@ -440,7 +440,7 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr);
}
-static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
+static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
struct cm_id_private *cm_id_priv)
{
struct cm_device *cm_dev;
@@ -453,7 +453,8 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
read_lock_irqsave(&cm.device_lock, flags);
list_for_each_entry(cm_dev, &cm.device_list, list) {
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
- path->gid_type, ndev, &p, NULL)) {
+ sa_conv_pathrec_to_gid_type(path),
+ ndev, &p, NULL)) {
port = cm_dev->port[p-1];
break;
}
@@ -1172,8 +1173,8 @@ static void cm_format_req(struct cm_req_msg *req_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_req_param *param)
{
- struct ib_sa_path_rec *pri_path = param->primary_path;
- struct ib_sa_path_rec *alt_path = param->alternate_path;
+ struct sa_path_rec *pri_path = param->primary_path;
+ struct sa_path_rec *alt_path = param->alternate_path;
cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
@@ -1202,8 +1203,10 @@ static void cm_format_req(struct cm_req_msg *req_msg,
}
if (pri_path->hop_limit <= 1) {
- req_msg->primary_local_lid = pri_path->slid;
- req_msg->primary_remote_lid = pri_path->dlid;
+ req_msg->primary_local_lid =
+ htons(ntohl(sa_path_get_slid(pri_path)));
+ req_msg->primary_remote_lid =
+ htons(ntohl(sa_path_get_dlid(pri_path)));
} else {
/* Work-around until there's a way to obtain remote LID info */
req_msg->primary_local_lid = IB_LID_PERMISSIVE;
@@ -1223,8 +1226,10 @@ static void cm_format_req(struct cm_req_msg *req_msg,
if (alt_path) {
if (alt_path->hop_limit <= 1) {
- req_msg->alt_local_lid = alt_path->slid;
- req_msg->alt_remote_lid = alt_path->dlid;
+ req_msg->alt_local_lid =
+ htons(ntohl(sa_path_get_slid(alt_path)));
+ req_msg->alt_remote_lid =
+ htons(ntohl(sa_path_get_dlid(alt_path)));
} else {
req_msg->alt_local_lid = IB_LID_PERMISSIVE;
req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
@@ -1401,14 +1406,15 @@ static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
}
static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
- struct ib_sa_path_rec *primary_path,
- struct ib_sa_path_rec *alt_path)
+ struct sa_path_rec *primary_path,
+ struct sa_path_rec *alt_path)
{
- memset(primary_path, 0, sizeof *primary_path);
primary_path->dgid = req_msg->primary_local_gid;
primary_path->sgid = req_msg->primary_remote_gid;
- primary_path->dlid = req_msg->primary_local_lid;
- primary_path->slid = req_msg->primary_remote_lid;
+ sa_path_set_dlid(primary_path,
+ htonl(ntohs(req_msg->primary_local_lid)));
+ sa_path_set_slid(primary_path,
+ htonl(ntohs(req_msg->primary_remote_lid)));
primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
primary_path->hop_limit = req_msg->primary_hop_limit;
primary_path->traffic_class = req_msg->primary_traffic_class;
@@ -1426,11 +1432,12 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
primary_path->service_id = req_msg->service_id;
if (req_msg->alt_local_lid) {
- memset(alt_path, 0, sizeof *alt_path);
alt_path->dgid = req_msg->alt_local_gid;
alt_path->sgid = req_msg->alt_remote_gid;
- alt_path->dlid = req_msg->alt_local_lid;
- alt_path->slid = req_msg->alt_remote_lid;
+ sa_path_set_dlid(alt_path,
+ htonl(ntohs(req_msg->alt_local_lid)));
+ sa_path_set_slid(alt_path,
+ htonl(ntohs(req_msg->alt_remote_lid)));
alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
alt_path->hop_limit = req_msg->alt_hop_limit;
alt_path->traffic_class = req_msg->alt_traffic_class;
@@ -1722,6 +1729,7 @@ static int cm_req_handler(struct cm_work *work)
struct cm_req_msg *req_msg;
union ib_gid gid;
struct ib_gid_attr gid_attr;
+ const struct ib_global_route *grh;
int ret;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1758,21 +1766,34 @@ static int cm_req_handler(struct cm_work *work)
cm_id_priv->id.service_mask = ~cpu_to_be64(0);
cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
- cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
- memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
- work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
+ memset(&work->path[0], 0, sizeof(work->path[0]));
+ memset(&work->path[1], 0, sizeof(work->path[1]));
+ grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
work->port->port_num,
- cm_id_priv->av.ah_attr.grh.sgid_index,
+ grh->sgid_index,
&gid, &gid_attr);
if (!ret) {
if (gid_attr.ndev) {
- work->path[0].ifindex = gid_attr.ndev->ifindex;
- work->path[0].net = dev_net(gid_attr.ndev);
+ work->path[0].rec_type =
+ sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
+ sa_path_set_ifindex(&work->path[0],
+ gid_attr.ndev->ifindex);
+ sa_path_set_ndev(&work->path[0],
+ dev_net(gid_attr.ndev));
dev_put(gid_attr.ndev);
+ } else {
+ work->path[0].rec_type = SA_PATH_REC_TYPE_IB;
}
- work->path[0].gid_type = gid_attr.gid_type;
+ if (req_msg->alt_local_lid)
+ work->path[1].rec_type = work->path[0].rec_type;
+ cm_format_paths_from_req(req_msg, &work->path[0],
+ &work->path[1]);
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
+ sa_path_set_dmac(&work->path[0],
+ cm_id_priv->av.ah_attr.roce.dmac);
+ work->path[0].hop_limit = grh->hop_limit;
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
cm_id_priv);
}
@@ -1782,11 +1803,18 @@ static int cm_req_handler(struct cm_work *work)
&work->path[0].sgid,
&gid_attr);
if (!err && gid_attr.ndev) {
- work->path[0].ifindex = gid_attr.ndev->ifindex;
- work->path[0].net = dev_net(gid_attr.ndev);
+ work->path[0].rec_type =
+ sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
+ sa_path_set_ifindex(&work->path[0],
+ gid_attr.ndev->ifindex);
+ sa_path_set_ndev(&work->path[0],
+ dev_net(gid_attr.ndev));
dev_put(gid_attr.ndev);
+ } else {
+ work->path[0].rec_type = SA_PATH_REC_TYPE_IB;
}
- work->path[0].gid_type = gid_attr.gid_type;
+ if (req_msg->alt_local_lid)
+ work->path[1].rec_type = work->path[0].rec_type;
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
&work->path[0].sgid, sizeof work->path[0].sgid,
NULL, 0);
@@ -2811,7 +2839,7 @@ out:
static void cm_format_lap(struct cm_lap_msg *lap_msg,
struct cm_id_private *cm_id_priv,
- struct ib_sa_path_rec *alternate_path,
+ struct sa_path_rec *alternate_path,
const void *private_data,
u8 private_data_len)
{
@@ -2822,8 +2850,10 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
/* todo: need remote CM response timeout */
cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
- lap_msg->alt_local_lid = alternate_path->slid;
- lap_msg->alt_remote_lid = alternate_path->dlid;
+ lap_msg->alt_local_lid =
+ htons(ntohl(sa_path_get_slid(alternate_path)));
+ lap_msg->alt_remote_lid =
+ htons(ntohl(sa_path_get_dlid(alternate_path)));
lap_msg->alt_local_gid = alternate_path->sgid;
lap_msg->alt_remote_gid = alternate_path->dgid;
cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
@@ -2841,7 +2871,7 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
}
int ib_send_cm_lap(struct ib_cm_id *cm_id,
- struct ib_sa_path_rec *alternate_path,
+ struct sa_path_rec *alternate_path,
const void *private_data,
u8 private_data_len)
{
@@ -2895,14 +2925,15 @@ out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
EXPORT_SYMBOL(ib_send_cm_lap);
static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
- struct ib_sa_path_rec *path,
+ struct sa_path_rec *path,
struct cm_lap_msg *lap_msg)
{
memset(path, 0, sizeof *path);
+ path->rec_type = SA_PATH_REC_TYPE_IB;
path->dgid = lap_msg->alt_local_gid;
path->sgid = lap_msg->alt_remote_gid;
- path->dlid = lap_msg->alt_local_lid;
- path->slid = lap_msg->alt_remote_lid;
+ sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid)));
+ sa_path_set_slid(path, htonl(ntohs(lap_msg->alt_remote_lid)));
path->flow_label = cm_lap_get_flow_label(lap_msg);
path->hop_limit = lap_msg->alt_hop_limit;
path->traffic_class = cm_lap_get_traffic_class(lap_msg);
@@ -3409,6 +3440,8 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
goto discard;
+ pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
+ state, ib_wc_status_msg(wc_status));
switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
@@ -3706,7 +3739,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
atomic_long_inc(&port->counter_group[CM_RECV].
counter[attr_id - CM_ATTR_ID_OFFSET]);
- work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
+ work = kmalloc(sizeof(*work) + sizeof(struct sa_path_rec) * paths,
GFP_KERNEL);
if (!work) {
ib_free_recv_mad(mad_recv_wc);
@@ -3798,7 +3831,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
cm_id_priv->responder_resources;
qp_attr->min_rnr_timer = 0;
}
- if (cm_id_priv->alt_av.ah_attr.dlid) {
+ if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
*qp_attr_mask |= IB_QP_ALT_PATH;
qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
@@ -3852,7 +3885,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
default:
break;
}
- if (cm_id_priv->alt_av.ah_attr.dlid) {
+ if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
qp_attr->path_mig_state = IB_MIG_REARM;
}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 3e70a9c5d79d..0eb393237ba2 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -198,6 +198,7 @@ struct cma_device {
atomic_t refcount;
struct list_head id_list;
enum ib_gid_type *default_gid_type;
+ u8 *default_roce_tos;
};
struct rdma_bind_list {
@@ -269,8 +270,7 @@ struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
int cma_get_default_gid_type(struct cma_device *cma_dev,
unsigned int port)
{
- if (port < rdma_start_port(cma_dev->device) ||
- port > rdma_end_port(cma_dev->device))
+ if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
@@ -282,8 +282,7 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
{
unsigned long supported_gids;
- if (port < rdma_start_port(cma_dev->device) ||
- port > rdma_end_port(cma_dev->device))
+ if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
@@ -297,6 +296,25 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
return 0;
}
+int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port)
+{
+ if (!rdma_is_port_valid(cma_dev->device, port))
+ return -EINVAL;
+
+ return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)];
+}
+
+int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port,
+ u8 default_roce_tos)
+{
+ if (!rdma_is_port_valid(cma_dev->device, port))
+ return -EINVAL;
+
+ cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] =
+ default_roce_tos;
+
+ return 0;
+}
struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
{
return cma_dev->device;
@@ -343,6 +361,7 @@ struct rdma_id_private {
u32 options;
u8 srq;
u8 tos;
+ bool tos_set;
u8 reuseaddr;
u8 afonly;
enum ib_gid_type gid_type;
@@ -604,22 +623,11 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
return ret;
- if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
+ if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port))
ndev = dev_get_by_index(&init_net, bound_if_index);
- if (ndev && ndev->flags & IFF_LOOPBACK) {
- pr_info("detected loopback device\n");
- dev_put(ndev);
-
- if (!device->get_netdev)
- return -EOPNOTSUPP;
-
- ndev = device->get_netdev(device, port);
- if (!ndev)
- return -ENODEV;
- }
- } else {
+ else
gid_type = IB_GID_TYPE_IB;
- }
+
ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
ndev, NULL);
@@ -709,6 +717,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
union ib_gid gid, sgid, *dgid;
u16 pkey, index;
u8 p;
+ enum ib_port_state port_state;
int i;
cma_dev = NULL;
@@ -724,6 +733,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
continue;
+ if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
+ continue;
for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
&gid, NULL);
i++) {
@@ -735,7 +746,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
}
if (!cma_dev && (gid.global.subnet_prefix ==
- dgid->global.subnet_prefix)) {
+ dgid->global.subnet_prefix) &&
+ port_state == IB_PORT_ACTIVE) {
cma_dev = cur_dev;
sgid = gid;
id_priv->id.port_num = p;
@@ -778,6 +790,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
id_priv->id.event_handler = event_handler;
id_priv->id.ps = ps;
id_priv->id.qp_type = qp_type;
+ id_priv->tos_set = false;
spin_lock_init(&id_priv->lock);
mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
@@ -905,7 +918,8 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
goto out;
ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
- qp_attr.ah_attr.grh.sgid_index, &sgid, NULL);
+ rdma_ah_read_grh(&qp_attr.ah_attr)->sgid_index,
+ &sgid, NULL);
if (ret)
goto out;
@@ -1019,6 +1033,8 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
} else
ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
qp_attr_mask);
+ qp_attr->port_num = id_priv->id.port_num;
+ *qp_attr_mask |= IB_QP_PORT;
} else
ret = -ENOSYS;
@@ -1103,7 +1119,7 @@ static inline int cma_any_port(struct sockaddr *addr)
static void cma_save_ib_info(struct sockaddr *src_addr,
struct sockaddr *dst_addr,
struct rdma_cm_id *listen_id,
- struct ib_sa_path_rec *path)
+ struct sa_path_rec *path)
{
struct sockaddr_ib *listen_ib, *ib;
@@ -1249,7 +1265,7 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event,
memcpy(&req->local_gid, &req_param->primary_path->sgid,
sizeof(req->local_gid));
req->has_gid = true;
- req->service_id = req_param->primary_path->service_id;
+ req->service_id = req_param->primary_path->service_id;
req->pkey = be16_to_cpu(req_param->primary_path->pkey);
if (req->pkey != req_param->bth_pkey)
pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n"
@@ -1689,6 +1705,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
return 0;
reject:
+ pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret);
cma_modify_qp_err(id_priv);
ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
@@ -1730,6 +1747,9 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
event.status = -ETIMEDOUT;
break;
case IB_CM_REP_RECEIVED:
+ if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
+ (id_priv->id.qp_type != IB_QPT_UD))
+ ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
if (id_priv->id.qp) {
event.status = cma_rep_recv(id_priv);
event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
@@ -1760,6 +1780,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
/* ignore event */
goto out;
case IB_CM_REJ_RECEIVED:
+ pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv->id,
+ ib_event->param.rej_rcvd.reason));
cma_modify_qp_err(id_priv);
event.status = ib_event->param.rej_rcvd.reason;
event.event = RDMA_CM_EVENT_REJECTED;
@@ -1794,8 +1816,9 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
struct rdma_cm_id *id;
struct rdma_route *rt;
const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
+ struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path;
const __be64 service_id =
- ib_event->param.req_rcvd.primary_path->service_id;
+ ib_event->param.req_rcvd.primary_path->service_id;
int ret;
id = rdma_create_id(listen_id->route.addr.dev_addr.net,
@@ -1817,7 +1840,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
if (!rt->path_rec)
goto err;
- rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
+ rt->path_rec[0] = *path;
if (rt->num_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
@@ -2266,10 +2289,11 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->tos = (u8) tos;
+ id_priv->tos_set = true;
}
EXPORT_SYMBOL(rdma_set_service_type);
-static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
+static void cma_query_handler(int status, struct sa_path_rec *path_rec,
void *context)
{
struct cma_work *work = context;
@@ -2285,6 +2309,8 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
work->new_state = RDMA_CM_ADDR_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
work->event.status = status;
+ pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
+ status);
}
queue_work(cma_wq, &work->work);
@@ -2294,18 +2320,24 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
struct cma_work *work)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
- struct ib_sa_path_rec path_rec;
+ struct sa_path_rec path_rec;
ib_sa_comp_mask comp_mask;
struct sockaddr_in6 *sin6;
struct sockaddr_ib *sib;
memset(&path_rec, 0, sizeof path_rec);
+
+ if (rdma_cap_opa_ah(id_priv->id.device, id_priv->id.port_num))
+ path_rec.rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ path_rec.rec_type = SA_PATH_REC_TYPE_IB;
rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
path_rec.numb_path = 1;
path_rec.reversible = 1;
- path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
+ path_rec.service_id = rdma_get_service_id(&id_priv->id,
+ cma_dst_addr(id_priv));
comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
@@ -2419,7 +2451,7 @@ err1:
}
int rdma_set_ib_paths(struct rdma_cm_id *id,
- struct ib_sa_path_rec *path_rec, int num_paths)
+ struct sa_path_rec *path_rec, int num_paths)
{
struct rdma_id_private *id_priv;
int ret;
@@ -2467,14 +2499,12 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
struct net_device *dev;
prio = rt_tos2priority(tos);
- dev = ndev->priv_flags & IFF_802_1Q_VLAN ?
- vlan_dev_real_dev(ndev) : ndev;
-
+ dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
if (dev->num_tc)
return netdev_get_prio_tc_map(dev, prio);
#if IS_ENABLED(CONFIG_VLAN_8021Q)
- if (ndev->priv_flags & IFF_802_1Q_VLAN)
+ if (is_vlan_dev(ndev))
return (vlan_dev_get_egress_qos_mask(ndev, prio) &
VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
#endif
@@ -2500,6 +2530,10 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
struct cma_work *work;
int ret;
struct net_device *ndev = NULL;
+ enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+ u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
work = kzalloc(sizeof *work, GFP_KERNEL);
@@ -2526,36 +2560,22 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err2;
}
- if (ndev->flags & IFF_LOOPBACK) {
- dev_put(ndev);
- if (!id_priv->id.device->get_netdev) {
- ret = -EOPNOTSUPP;
- goto err2;
- }
-
- ndev = id_priv->id.device->get_netdev(id_priv->id.device,
- id_priv->id.port_num);
- if (!ndev) {
- ret = -ENODEV;
- goto err2;
- }
- }
-
- route->path_rec->net = &init_net;
- route->path_rec->ifindex = ndev->ifindex;
supported_gids = roce_gid_type_mask_support(id_priv->id.device,
id_priv->id.port_num);
- route->path_rec->gid_type =
- cma_route_gid_type(addr->dev_addr.network,
- supported_gids,
- id_priv->gid_type);
+ gid_type = cma_route_gid_type(addr->dev_addr.network,
+ supported_gids,
+ id_priv->gid_type);
+ route->path_rec->rec_type =
+ sa_conv_gid_to_pathrec_type(gid_type);
+ sa_path_set_ndev(route->path_rec, &init_net);
+ sa_path_set_ifindex(route->path_rec, ndev->ifindex);
}
if (!ndev) {
ret = -ENODEV;
goto err2;
}
- memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
+ sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&route->path_rec->sgid);
@@ -2563,8 +2583,10 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
&route->path_rec->dgid);
/* Use the hint from IP Stack to select GID Type */
- if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network))
- route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+ if (gid_type < ib_network_to_gid_type(addr->dev_addr.network))
+ gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+ route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
+
if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
/* TODO: get the hoplimit from the inet/inet6 device */
route->path_rec->hop_limit = addr->dev_addr.hoplimit;
@@ -2573,7 +2595,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->reversible = 1;
route->path_rec->pkey = cpu_to_be16(0xffff);
route->path_rec->mtu_selector = IB_SA_EQ;
- route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos);
+ route->path_rec->sl = iboe_tos_to_sl(ndev, tos);
+ route->path_rec->traffic_class = tos;
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
route->path_rec->rate_selector = IB_SA_EQ;
route->path_rec->rate = iboe_get_rate(ndev);
@@ -2652,8 +2675,8 @@ static void cma_set_loopback(struct sockaddr *addr)
static int cma_bind_loopback(struct rdma_id_private *id_priv)
{
struct cma_device *cma_dev, *cur_dev;
- struct ib_port_attr port_attr;
union ib_gid gid;
+ enum ib_port_state port_state;
u16 pkey;
int ret;
u8 p;
@@ -2669,8 +2692,8 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
cma_dev = cur_dev;
for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
- if (!ib_query_port(cur_dev->device, p, &port_attr) &&
- port_attr.state == IB_PORT_ACTIVE) {
+ if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) &&
+ port_state == IB_PORT_ACTIVE) {
cma_dev = cur_dev;
goto port_found;
}
@@ -2720,8 +2743,14 @@ static void addr_handler(int status, struct sockaddr *src_addr,
goto out;
memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
- if (!status && !id_priv->cma_dev)
+ if (!status && !id_priv->cma_dev) {
status = cma_acquire_dev(id_priv, NULL);
+ if (status)
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
+ status);
+ } else {
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
+ }
if (status) {
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
@@ -2833,20 +2862,26 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
if (id_priv->state == RDMA_CM_IDLE) {
ret = cma_bind_addr(id, src_addr, dst_addr);
- if (ret)
+ if (ret) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return ret;
+ }
}
- if (cma_family(id_priv) != dst_addr->sa_family)
+ if (cma_family(id_priv) != dst_addr->sa_family) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return -EINVAL;
+ }
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return -EINVAL;
+ }
atomic_inc(&id_priv->refcount);
- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
} else {
@@ -2962,6 +2997,43 @@ err:
return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;
}
+static int cma_port_is_unique(struct rdma_bind_list *bind_list,
+ struct rdma_id_private *id_priv)
+{
+ struct rdma_id_private *cur_id;
+ struct sockaddr *daddr = cma_dst_addr(id_priv);
+ struct sockaddr *saddr = cma_src_addr(id_priv);
+ __be16 dport = cma_port(daddr);
+
+ hlist_for_each_entry(cur_id, &bind_list->owners, node) {
+ struct sockaddr *cur_daddr = cma_dst_addr(cur_id);
+ struct sockaddr *cur_saddr = cma_src_addr(cur_id);
+ __be16 cur_dport = cma_port(cur_daddr);
+
+ if (id_priv == cur_id)
+ continue;
+
+ /* different dest port -> unique */
+ if (!cma_any_port(cur_daddr) &&
+ (dport != cur_dport))
+ continue;
+
+ /* different src address -> unique */
+ if (!cma_any_addr(saddr) &&
+ !cma_any_addr(cur_saddr) &&
+ cma_addr_cmp(saddr, cur_saddr))
+ continue;
+
+ /* different dst address -> unique */
+ if (!cma_any_addr(cur_daddr) &&
+ cma_addr_cmp(daddr, cur_daddr))
+ continue;
+
+ return -EADDRNOTAVAIL;
+ }
+ return 0;
+}
+
static int cma_alloc_any_port(enum rdma_port_space ps,
struct rdma_id_private *id_priv)
{
@@ -2974,9 +3046,19 @@ static int cma_alloc_any_port(enum rdma_port_space ps,
remaining = (high - low) + 1;
rover = prandom_u32() % remaining + low;
retry:
- if (last_used_port != rover &&
- !cma_ps_find(net, ps, (unsigned short)rover)) {
- int ret = cma_alloc_port(ps, id_priv, rover);
+ if (last_used_port != rover) {
+ struct rdma_bind_list *bind_list;
+ int ret;
+
+ bind_list = cma_ps_find(net, ps, (unsigned short)rover);
+
+ if (!bind_list) {
+ ret = cma_alloc_port(ps, id_priv, rover);
+ } else {
+ ret = cma_port_is_unique(bind_list, id_priv);
+ if (!ret)
+ cma_bind_port(bind_list, id_priv);
+ }
/*
* Remember previously used port number in order to avoid
* re-using same port immediately after it is closed.
@@ -3205,6 +3287,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
int ret;
+ struct sockaddr *daddr;
if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
addr->sa_family != AF_IB)
@@ -3244,6 +3327,9 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret)
goto err2;
+ daddr = cma_dst_addr(id_priv);
+ daddr->sa_family = addr->sa_family;
+
return 0;
err2:
if (id_priv->cma_dev)
@@ -3308,10 +3394,13 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
if (rep->status != IB_SIDR_SUCCESS) {
event.event = RDMA_CM_EVENT_UNREACHABLE;
event.status = ib_event->param.sidr_rep_rcvd.status;
+ pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n",
+ event.status);
break;
}
ret = cma_set_qkey(id_priv, rep->qkey);
if (ret) {
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret);
event.event = RDMA_CM_EVENT_ADDR_ERROR;
event.status = ret;
break;
@@ -3583,6 +3672,9 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
struct iw_cm_conn_param iw_param;
int ret;
+ if (!conn_param)
+ return -EINVAL;
+
ret = cma_modify_qp_rtr(id_priv, conn_param);
if (ret)
return ret;
@@ -3760,10 +3852,17 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
if (!status)
status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
+ else
+ pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
+ status);
mutex_lock(&id_priv->qp_mutex);
- if (!status && id_priv->id.qp)
+ if (!status && id_priv->id.qp) {
status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
be16_to_cpu(multicast->rec.mlid));
+ if (status)
+ pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n",
+ status);
+ }
mutex_unlock(&id_priv->qp_mutex);
memset(&event, 0, sizeof event);
@@ -3833,63 +3932,10 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
}
}
-static void cma_query_sa_classport_info_cb(int status,
- struct ib_class_port_info *rec,
- void *context)
-{
- struct class_port_info_context *cb_ctx = context;
-
- WARN_ON(!context);
-
- if (status || !rec) {
- pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n",
- cb_ctx->device->name, cb_ctx->port_num, status);
- goto out;
- }
-
- memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info));
-
-out:
- complete(&cb_ctx->done);
-}
-
-static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num,
- struct ib_class_port_info *class_port_info)
-{
- struct class_port_info_context *cb_ctx;
- int ret;
-
- cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL);
- if (!cb_ctx)
- return -ENOMEM;
-
- cb_ctx->device = device;
- cb_ctx->class_port_info = class_port_info;
- cb_ctx->port_num = port_num;
- init_completion(&cb_ctx->done);
-
- ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num,
- CMA_QUERY_CLASSPORT_INFO_TIMEOUT,
- GFP_KERNEL, cma_query_sa_classport_info_cb,
- cb_ctx, &cb_ctx->sa_query);
- if (ret < 0) {
- pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n",
- device->name, port_num, ret);
- goto out;
- }
-
- wait_for_completion(&cb_ctx->done);
-
-out:
- kfree(cb_ctx);
- return ret;
-}
-
static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
struct ib_sa_mcmember_rec rec;
- struct ib_class_port_info class_port_info;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
ib_sa_comp_mask comp_mask;
int ret;
@@ -3910,21 +3956,14 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = mc->join_state;
- if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) {
- ret = cma_query_sa_classport_info(id_priv->id.device,
- id_priv->id.port_num,
- &class_port_info);
-
- if (ret)
- return ret;
-
- if (!(ib_get_cpi_capmask2(&class_port_info) &
- IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) {
- pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
- "RDMA CM: SM doesn't support Send Only Full Member option\n",
- id_priv->id.device->name, id_priv->id.port_num);
- return -EOPNOTSUPP;
- }
+ if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) &&
+ (!ib_sa_sendonly_fullmem_support(&sa_client,
+ id_priv->id.device,
+ id_priv->id.port_num))) {
+ pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
+ "RDMA CM: SM doesn't support Send Only Full Member option\n",
+ id_priv->id.device->name, id_priv->id.port_num);
+ return -EOPNOTSUPP;
}
comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
@@ -4229,15 +4268,21 @@ static void cma_add_one(struct ib_device *device)
cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_gid_type),
GFP_KERNEL);
- if (!cma_dev->default_gid_type) {
- kfree(cma_dev);
- return;
- }
+ if (!cma_dev->default_gid_type)
+ goto free_cma_dev;
+
+ cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
+ sizeof(*cma_dev->default_roce_tos),
+ GFP_KERNEL);
+ if (!cma_dev->default_roce_tos)
+ goto free_gid_type;
+
for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
supported_gids = roce_gid_type_mask_support(device, i);
WARN_ON(!supported_gids);
cma_dev->default_gid_type[i - rdma_start_port(device)] =
find_first_bit(&supported_gids, BITS_PER_LONG);
+ cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0;
}
init_completion(&cma_dev->comp);
@@ -4250,6 +4295,16 @@ static void cma_add_one(struct ib_device *device)
list_for_each_entry(id_priv, &listen_any_list, list)
cma_listen_on_dev(id_priv, cma_dev);
mutex_unlock(&lock);
+
+ return;
+
+free_gid_type:
+ kfree(cma_dev->default_gid_type);
+
+free_cma_dev:
+ kfree(cma_dev);
+
+ return;
}
static int cma_remove_id_dev(struct rdma_id_private *id_priv)
@@ -4318,6 +4373,7 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
mutex_unlock(&lock);
cma_process_remove(cma_dev);
+ kfree(cma_dev->default_roce_tos);
kfree(cma_dev->default_gid_type);
kfree(cma_dev);
}
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 41573df1d9fc..54076a3e8007 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -139,8 +139,50 @@ static ssize_t default_roce_mode_store(struct config_item *item,
CONFIGFS_ATTR(, default_roce_mode);
+static ssize_t default_roce_tos_show(struct config_item *item, char *buf)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ ssize_t ret;
+ u8 tos;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ tos = cma_get_default_roce_tos(cma_dev, group->port_num);
+ cma_configfs_params_put(cma_dev);
+
+ return sprintf(buf, "%u\n", tos);
+}
+
+static ssize_t default_roce_tos_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ ssize_t ret;
+ u8 tos;
+
+ ret = kstrtou8(buf, 0, &tos);
+ if (ret)
+ return ret;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ ret = cma_set_default_roce_tos(cma_dev, group->port_num, tos);
+ cma_configfs_params_put(cma_dev);
+
+ return ret ? ret : strnlen(buf, count);
+}
+
+CONFIGFS_ATTR(, default_roce_tos);
+
static struct configfs_attribute *cma_configfs_attributes[] = {
&attr_default_roce_mode,
+ &attr_default_roce_tos,
NULL,
};
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index d29372624f3a..11ae67514e13 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -35,8 +35,19 @@
#include <linux/list.h>
#include <linux/spinlock.h>
+#include <linux/cgroup_rdma.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include "mad_priv.h"
+
+struct pkey_index_qp_list {
+ struct list_head pkey_index_list;
+ u16 pkey_index;
+ /* Lock to hold while iterating the qp_list. */
+ spinlock_t qp_list_lock;
+ struct list_head qp_list;
+};
#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
int cma_configfs_init(void);
@@ -62,6 +73,9 @@ int cma_get_default_gid_type(struct cma_device *cma_dev,
int cma_set_default_gid_type(struct cma_device *cma_dev,
unsigned int port,
enum ib_gid_type default_gid_type);
+int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port);
+int cma_set_default_roce_tos(struct cma_device *a_dev, unsigned int port,
+ u8 default_roce_tos);
struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
int ib_device_register_sysfs(struct ib_device *device,
@@ -121,6 +135,35 @@ int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
+#ifdef CONFIG_CGROUP_RDMA
+int ib_device_register_rdmacg(struct ib_device *device);
+void ib_device_unregister_rdmacg(struct ib_device *device);
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index);
+#else
+static inline int ib_device_register_rdmacg(struct ib_device *device)
+{ return 0; }
+
+static inline void ib_device_unregister_rdmacg(struct ib_device *device)
+{ }
+
+static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{ return 0; }
+
+static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{ }
+#endif
+
static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
struct net_device *upper)
{
@@ -136,6 +179,16 @@ void ib_mad_cleanup(void);
int ib_sa_init(void);
void ib_sa_cleanup(void);
+int ibnl_init(void);
+void ibnl_cleanup(void);
+
+/**
+ * Check if there are any listeners to the netlink group
+ * @group: the netlink group ID
+ * Returns 0 on success or a negative for no listeners.
+ */
+int ibnl_chk_listeners(unsigned int group);
+
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct netlink_callback *cb);
int ib_nl_handle_set_timeout(struct sk_buff *skb,
@@ -143,4 +196,109 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
struct netlink_callback *cb);
+int ib_get_cached_subnet_prefix(struct ib_device *device,
+ u8 port_num,
+ u64 *sn_pfx);
+
+#ifdef CONFIG_SECURITY_INFINIBAND
+int ib_security_pkey_access(struct ib_device *dev,
+ u8 port_num,
+ u16 pkey_index,
+ void *sec);
+
+void ib_security_destroy_port_pkey_list(struct ib_device *device);
+
+void ib_security_cache_change(struct ib_device *device,
+ u8 port_num,
+ u64 subnet_prefix);
+
+int ib_security_modify_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_udata *udata);
+
+int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev);
+void ib_destroy_qp_security_begin(struct ib_qp_security *sec);
+void ib_destroy_qp_security_abort(struct ib_qp_security *sec);
+void ib_destroy_qp_security_end(struct ib_qp_security *sec);
+int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev);
+void ib_close_shared_qp_security(struct ib_qp_security *sec);
+int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
+ enum ib_qp_type qp_type);
+void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent);
+int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index);
+#else
+static inline int ib_security_pkey_access(struct ib_device *dev,
+ u8 port_num,
+ u16 pkey_index,
+ void *sec)
+{
+ return 0;
+}
+
+static inline void ib_security_destroy_port_pkey_list(struct ib_device *device)
+{
+}
+
+static inline void ib_security_cache_change(struct ib_device *device,
+ u8 port_num,
+ u64 subnet_prefix)
+{
+}
+
+static inline int ib_security_modify_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_udata *udata)
+{
+ return qp->device->modify_qp(qp->real_qp,
+ qp_attr,
+ qp_attr_mask,
+ udata);
+}
+
+static inline int ib_create_qp_security(struct ib_qp *qp,
+ struct ib_device *dev)
+{
+ return 0;
+}
+
+static inline void ib_destroy_qp_security_begin(struct ib_qp_security *sec)
+{
+}
+
+static inline void ib_destroy_qp_security_abort(struct ib_qp_security *sec)
+{
+}
+
+static inline void ib_destroy_qp_security_end(struct ib_qp_security *sec)
+{
+}
+
+static inline int ib_open_shared_qp_security(struct ib_qp *qp,
+ struct ib_device *dev)
+{
+ return 0;
+}
+
+static inline void ib_close_shared_qp_security(struct ib_qp_security *sec)
+{
+}
+
+static inline int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
+ enum ib_qp_type qp_type)
+{
+ return 0;
+}
+
+static inline void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent)
+{
+}
+
+static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
+ u16 pkey_index)
+{
+ return 0;
+}
+#endif
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index a754fc727de5..f2ae75fa3128 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -29,7 +29,13 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
{
int i, n, completed = 0;
- while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
+ /*
+ * budget might be (-1) if the caller does not
+ * want to bound this call, thus we need unsigned
+ * minimum here.
+ */
+ while ((n = ib_poll_cq(cq, min_t(u32, IB_POLL_BATCH,
+ budget - completed), cq->wc)) > 0) {
for (i = 0; i < n; i++) {
struct ib_wc *wc = &cq->wc[i];
@@ -58,8 +64,8 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
* %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different
* context and does not ask for completion interrupts from the HCA.
*
- * Note: for compatibility reasons -1 can be passed in %budget for unlimited
- * polling. Do not use this feature in new code, it will be removed soon.
+ * Note: do not pass -1 as %budget unless it is guaranteed that the number
+ * of completions that will be processed is small.
*/
int ib_process_cq_direct(struct ib_cq *cq, int budget)
{
@@ -120,7 +126,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
- * specified context. The ULP needs must use wr->wr_cqe instead of wr->wr_id
+ * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
@@ -196,7 +202,7 @@ void ib_free_cq(struct ib_cq *cq)
irq_poll_disable(&cq->iop);
break;
case IB_POLL_WORKQUEUE:
- flush_work(&cq->work);
+ cancel_work_sync(&cq->work);
break;
default:
WARN_ON_ONCE(1);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 571974cd3919..a5dfab6adf49 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -39,6 +39,8 @@
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
+#include <linux/security.h>
+#include <linux/notifier.h>
#include <rdma/rdma_netlink.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
@@ -82,6 +84,14 @@ static LIST_HEAD(client_list);
static DEFINE_MUTEX(device_mutex);
static DECLARE_RWSEM(lists_rwsem);
+static int ib_security_change(struct notifier_block *nb, unsigned long event,
+ void *lsm_data);
+static void ib_policy_change_task(struct work_struct *work);
+static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task);
+
+static struct notifier_block ibdev_lsm_nb = {
+ .notifier_call = ib_security_change,
+};
static int ib_device_check_mandatory(struct ib_device *device)
{
@@ -172,8 +182,16 @@ static void ib_device_release(struct device *device)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
- ib_cache_release_one(dev);
- kfree(dev->port_immutable);
+ WARN_ON(dev->reg_state == IB_DEV_REGISTERED);
+ if (dev->reg_state == IB_DEV_UNREGISTERED) {
+ /*
+ * In IB_DEV_UNINITIALIZED state, cache or port table
+ * is not even created. Free cache and port table only when
+ * device reaches UNREGISTERED state.
+ */
+ ib_cache_release_one(dev);
+ kfree(dev->port_immutable);
+ }
kfree(dev);
}
@@ -317,6 +335,65 @@ void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len)
}
EXPORT_SYMBOL(ib_get_device_fw_str);
+static int setup_port_pkey_list(struct ib_device *device)
+{
+ int i;
+
+ /**
+ * device->port_pkey_list is indexed directly by the port number,
+ * Therefore it is declared as a 1 based array with potential empty
+ * slots at the beginning.
+ */
+ device->port_pkey_list = kcalloc(rdma_end_port(device) + 1,
+ sizeof(*device->port_pkey_list),
+ GFP_KERNEL);
+
+ if (!device->port_pkey_list)
+ return -ENOMEM;
+
+ for (i = 0; i < (rdma_end_port(device) + 1); i++) {
+ spin_lock_init(&device->port_pkey_list[i].list_lock);
+ INIT_LIST_HEAD(&device->port_pkey_list[i].pkey_list);
+ }
+
+ return 0;
+}
+
+static void ib_policy_change_task(struct work_struct *work)
+{
+ struct ib_device *dev;
+
+ down_read(&lists_rwsem);
+ list_for_each_entry(dev, &device_list, core_list) {
+ int i;
+
+ for (i = rdma_start_port(dev); i <= rdma_end_port(dev); i++) {
+ u64 sp;
+ int ret = ib_get_cached_subnet_prefix(dev,
+ i,
+ &sp);
+
+ WARN_ONCE(ret,
+ "ib_get_cached_subnet_prefix err: %d, this should never happen here\n",
+ ret);
+ if (!ret)
+ ib_security_cache_change(dev, i, sp);
+ }
+ }
+ up_read(&lists_rwsem);
+}
+
+static int ib_security_change(struct notifier_block *nb, unsigned long event,
+ void *lsm_data)
+{
+ if (event != LSM_POLICY_CHANGE)
+ return NOTIFY_DONE;
+
+ schedule_work(&ib_policy_change_work);
+
+ return NOTIFY_OK;
+}
+
/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register
@@ -333,6 +410,29 @@ int ib_register_device(struct ib_device *device,
int ret;
struct ib_client *client;
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
+ struct device *parent = device->dev.parent;
+
+ WARN_ON_ONCE(!parent);
+ WARN_ON_ONCE(device->dma_device);
+ if (device->dev.dma_ops) {
+ /*
+ * The caller provided custom DMA operations. Copy the
+ * DMA-related fields that are used by e.g. dma_alloc_coherent()
+ * into device->dev.
+ */
+ device->dma_device = &device->dev;
+ if (!device->dev.dma_mask)
+ device->dev.dma_mask = parent->dma_mask;
+ if (!device->dev.coherent_dma_mask)
+ device->dev.coherent_dma_mask =
+ parent->coherent_dma_mask;
+ } else {
+ /*
+ * The caller did not provide custom DMA operations. Use the
+ * DMA mapping operations of the parent device.
+ */
+ device->dma_device = parent;
+ }
mutex_lock(&device_mutex);
@@ -354,26 +454,36 @@ int ib_register_device(struct ib_device *device,
goto out;
}
+ ret = setup_port_pkey_list(device);
+ if (ret) {
+ pr_warn("Couldn't create per port_pkey_list\n");
+ goto out;
+ }
+
ret = ib_cache_setup_one(device);
if (ret) {
pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
- goto out;
+ goto port_cleanup;
+ }
+
+ ret = ib_device_register_rdmacg(device);
+ if (ret) {
+ pr_warn("Couldn't register device with rdma cgroup\n");
+ goto cache_cleanup;
}
memset(&device->attrs, 0, sizeof(device->attrs));
ret = device->query_device(device, &device->attrs, &uhw);
if (ret) {
pr_warn("Couldn't query the device attributes\n");
- ib_cache_cleanup_one(device);
- goto out;
+ goto cache_cleanup;
}
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
pr_warn("Couldn't register device %s with driver model\n",
device->name);
- ib_cache_cleanup_one(device);
- goto out;
+ goto cache_cleanup;
}
device->reg_state = IB_DEV_REGISTERED;
@@ -385,6 +495,14 @@ int ib_register_device(struct ib_device *device,
down_write(&lists_rwsem);
list_add_tail(&device->core_list, &device_list);
up_write(&lists_rwsem);
+ mutex_unlock(&device_mutex);
+ return 0;
+
+cache_cleanup:
+ ib_cache_cleanup_one(device);
+ ib_cache_release_one(device);
+port_cleanup:
+ kfree(device->port_immutable);
out:
mutex_unlock(&device_mutex);
return ret;
@@ -421,9 +539,13 @@ void ib_unregister_device(struct ib_device *device)
mutex_unlock(&device_mutex);
+ ib_device_unregister_rdmacg(device);
ib_device_unregister_sysfs(device);
ib_cache_cleanup_one(device);
+ ib_security_destroy_port_pkey_list(device);
+ kfree(device->port_pkey_list);
+
down_write(&lists_rwsem);
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
@@ -659,7 +781,7 @@ int ib_query_port(struct ib_device *device,
union ib_gid gid;
int err;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
memset(port_attr, 0, sizeof(*port_attr));
@@ -825,7 +947,7 @@ int ib_modify_port(struct ib_device *device,
if (!device->modify_port)
return -ENOSYS;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
return device->modify_port(device, port_num, port_modify_mask,
@@ -996,8 +1118,7 @@ static int __init ib_core_init(void)
return -ENOMEM;
ib_comp_wq = alloc_workqueue("ib-comp-wq",
- WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
- WQ_UNBOUND_MAX_ACTIVE);
+ WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
if (!ib_comp_wq) {
ret = -ENOMEM;
goto err;
@@ -1039,10 +1160,18 @@ static int __init ib_core_init(void)
goto err_sa;
}
+ ret = register_lsm_notifier(&ibdev_lsm_nb);
+ if (ret) {
+ pr_warn("Couldn't register LSM notifier. ret %d\n", ret);
+ goto err_ibnl_clients;
+ }
+
ib_cache_setup();
return 0;
+err_ibnl_clients:
+ ib_remove_ibnl_clients();
err_sa:
ib_sa_cleanup();
err_mad:
@@ -1062,6 +1191,7 @@ err:
static void __exit ib_core_cleanup(void)
{
+ unregister_lsm_notifier(&ibdev_lsm_nb);
ib_cache_cleanup();
ib_remove_ibnl_clients();
ib_sa_cleanup();
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index cdfad5f26212..84d2615b5d4b 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -96,7 +96,8 @@ struct ib_fmr_pool {
void * arg);
void *flush_arg;
- struct task_struct *thread;
+ struct kthread_worker *worker;
+ struct kthread_work work;
atomic_t req_ser;
atomic_t flush_ser;
@@ -174,29 +175,19 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
spin_unlock_irq(&pool->pool_lock);
}
-static int ib_fmr_cleanup_thread(void *pool_ptr)
+static void ib_fmr_cleanup_func(struct kthread_work *work)
{
- struct ib_fmr_pool *pool = pool_ptr;
+ struct ib_fmr_pool *pool = container_of(work, struct ib_fmr_pool, work);
- do {
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
- ib_fmr_batch_release(pool);
-
- atomic_inc(&pool->flush_ser);
- wake_up_interruptible(&pool->force_wait);
-
- if (pool->flush_function)
- pool->flush_function(pool, pool->flush_arg);
- }
+ ib_fmr_batch_release(pool);
+ atomic_inc(&pool->flush_ser);
+ wake_up_interruptible(&pool->force_wait);
- set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
- !kthread_should_stop())
- schedule();
- __set_current_state(TASK_RUNNING);
- } while (!kthread_should_stop());
+ if (pool->flush_function)
+ pool->flush_function(pool, pool->flush_arg);
- return 0;
+ if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0)
+ kthread_queue_work(pool->worker, &pool->work);
}
/**
@@ -265,15 +256,13 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
atomic_set(&pool->flush_ser, 0);
init_waitqueue_head(&pool->force_wait);
- pool->thread = kthread_run(ib_fmr_cleanup_thread,
- pool,
- "ib_fmr(%s)",
- device->name);
- if (IS_ERR(pool->thread)) {
- pr_warn(PFX "couldn't start cleanup thread\n");
- ret = PTR_ERR(pool->thread);
+ pool->worker = kthread_create_worker(0, "ib_fmr(%s)", device->name);
+ if (IS_ERR(pool->worker)) {
+ pr_warn(PFX "couldn't start cleanup kthread worker\n");
+ ret = PTR_ERR(pool->worker);
goto out_free_pool;
}
+ kthread_init_work(&pool->work, ib_fmr_cleanup_func);
{
struct ib_pool_fmr *fmr;
@@ -338,7 +327,7 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
LIST_HEAD(fmr_list);
int i;
- kthread_stop(pool->thread);
+ kthread_destroy_worker(pool->worker);
ib_fmr_batch_release(pool);
i = 0;
@@ -388,7 +377,7 @@ int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
spin_unlock_irq(&pool->pool_lock);
serial = atomic_inc_return(&pool->req_ser);
- wake_up_process(pool->thread);
+ kthread_queue_work(pool->worker, &pool->work);
if (wait_event_interruptible(pool->force_wait,
atomic_read(&pool->flush_ser) - serial >= 0))
@@ -502,7 +491,7 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
list_add_tail(&fmr->list, &pool->dirty_list);
if (++pool->dirty_len >= pool->dirty_watermark) {
atomic_inc(&pool->req_ser);
- wake_up_process(pool->thread);
+ kthread_queue_work(pool->worker, &pool->work);
}
}
}
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 3ef51a96bbf1..f13870e69ccd 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -472,12 +472,14 @@ int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
int ret;
const char *err_str = "";
- ret = nlmsg_validate(cb->nlh, nlh_len, policy_max-1, nlmsg_policy);
+ ret = nlmsg_validate(cb->nlh, nlh_len, policy_max - 1, nlmsg_policy,
+ NULL);
if (ret) {
err_str = "Invalid attribute";
goto parse_nlmsg_error;
}
- ret = nlmsg_parse(cb->nlh, nlh_len, nltb, policy_max-1, nlmsg_policy);
+ ret = nlmsg_parse(cb->nlh, nlh_len, nltb, policy_max - 1,
+ nlmsg_policy, NULL);
if (ret) {
err_str = "Unable to parse the nlmsg";
goto parse_nlmsg_error;
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index a009f7132c73..f8f53bb90837 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -40,9 +40,11 @@
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/security.h>
#include <rdma/ib_cache.h>
#include "mad_priv.h"
+#include "core_priv.h"
#include "mad_rmpp.h"
#include "smi.h"
#include "opa_smi.h"
@@ -316,7 +318,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Validate device and port */
port_priv = ib_get_mad_port(device, port_num);
if (!port_priv) {
- dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n");
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid port %d\n",
+ port_num);
ret = ERR_PTR(-ENODEV);
goto error1;
}
@@ -367,6 +371,12 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
atomic_set(&mad_agent_priv->refcount, 1);
init_completion(&mad_agent_priv->comp);
+ ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type);
+ if (ret2) {
+ ret = ERR_PTR(ret2);
+ goto error4;
+ }
+
spin_lock_irqsave(&port_priv->reg_lock, flags);
mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
@@ -384,7 +394,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
if (method) {
if (method_in_use(&method,
mad_reg_req))
- goto error4;
+ goto error5;
}
}
ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
@@ -400,14 +410,14 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
if (is_vendor_method_in_use(
vendor_class,
mad_reg_req))
- goto error4;
+ goto error5;
}
}
ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
}
if (ret2) {
ret = ERR_PTR(ret2);
- goto error4;
+ goto error5;
}
}
@@ -416,9 +426,10 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
spin_unlock_irqrestore(&port_priv->reg_lock, flags);
return &mad_agent_priv->agent;
-
-error4:
+error5:
spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+ ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
+error4:
kfree(reg_req);
error3:
kfree(mad_agent_priv);
@@ -489,6 +500,7 @@ struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
struct ib_mad_agent *ret;
struct ib_mad_snoop_private *mad_snoop_priv;
int qpn;
+ int err;
/* Validate parameters */
if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
@@ -523,17 +535,25 @@ struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
mad_snoop_priv->agent.port_num = port_num;
mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
init_completion(&mad_snoop_priv->comp);
+
+ err = ib_mad_agent_security_setup(&mad_snoop_priv->agent, qp_type);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto error2;
+ }
+
mad_snoop_priv->snoop_index = register_snoop_agent(
&port_priv->qp_info[qpn],
mad_snoop_priv);
if (mad_snoop_priv->snoop_index < 0) {
ret = ERR_PTR(mad_snoop_priv->snoop_index);
- goto error2;
+ goto error3;
}
atomic_set(&mad_snoop_priv->refcount, 1);
return &mad_snoop_priv->agent;
-
+error3:
+ ib_mad_agent_security_cleanup(&mad_snoop_priv->agent);
error2:
kfree(mad_snoop_priv);
error1:
@@ -579,6 +599,8 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
deref_mad_agent(mad_agent_priv);
wait_for_completion(&mad_agent_priv->comp);
+ ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
+
kfree(mad_agent_priv->reg_req);
kfree(mad_agent_priv);
}
@@ -597,13 +619,15 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
deref_snoop_agent(mad_snoop_priv);
wait_for_completion(&mad_snoop_priv->comp);
+ ib_mad_agent_security_cleanup(&mad_snoop_priv->agent);
+
kfree(mad_snoop_priv);
}
/*
* ib_unregister_mad_agent - Unregisters a client from using MAD services
*/
-int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
+void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_snoop_private *mad_snoop_priv;
@@ -620,7 +644,6 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
agent);
unregister_mad_snoop(mad_snoop_priv);
}
- return 0;
}
EXPORT_SYMBOL(ib_unregister_mad_agent);
@@ -1214,12 +1237,16 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
/* Walk list of send WRs and post each on send list */
for (; send_buf; send_buf = next_send_buf) {
-
mad_send_wr = container_of(send_buf,
struct ib_mad_send_wr_private,
send_buf);
mad_agent_priv = mad_send_wr->mad_agent_priv;
+ ret = ib_mad_enforce_security(mad_agent_priv,
+ mad_send_wr->send_wr.pkey_index);
+ if (ret)
+ goto error;
+
if (!send_buf->mad_agent->send_handler ||
(send_buf->timeout_ms &&
!send_buf->mad_agent->recv_handler)) {
@@ -1832,12 +1859,13 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
const struct ib_mad_send_wr_private *wr,
const struct ib_mad_recv_wc *rwc )
{
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
u8 send_resp, rcv_resp;
union ib_gid sgid;
struct ib_device *device = mad_agent_priv->agent.device;
u8 port_num = mad_agent_priv->agent.port_num;
u8 lmc;
+ bool has_grh;
send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad);
rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr);
@@ -1846,36 +1874,40 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
/* both requests, or both responses. GIDs different */
return 0;
- if (ib_query_ah(wr->send_buf.ah, &attr))
+ if (rdma_query_ah(wr->send_buf.ah, &attr))
/* Assume not equal, to avoid false positives. */
return 0;
- if (!!(attr.ah_flags & IB_AH_GRH) !=
- !!(rwc->wc->wc_flags & IB_WC_GRH))
+ has_grh = !!(rdma_ah_get_ah_flags(&attr) & IB_AH_GRH);
+ if (has_grh != !!(rwc->wc->wc_flags & IB_WC_GRH))
/* one has GID, other does not. Assume different */
return 0;
if (!send_resp && rcv_resp) {
/* is request/response. */
- if (!(attr.ah_flags & IB_AH_GRH)) {
+ if (!has_grh) {
if (ib_get_cached_lmc(device, port_num, &lmc))
return 0;
- return (!lmc || !((attr.src_path_bits ^
+ return (!lmc || !((rdma_ah_get_path_bits(&attr) ^
rwc->wc->dlid_path_bits) &
((1 << lmc) - 1)));
} else {
+ const struct ib_global_route *grh =
+ rdma_ah_read_grh(&attr);
+
if (ib_get_cached_gid(device, port_num,
- attr.grh.sgid_index, &sgid, NULL))
+ grh->sgid_index, &sgid, NULL))
return 0;
return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
16);
}
}
- if (!(attr.ah_flags & IB_AH_GRH))
- return attr.dlid == rwc->wc->slid;
+ if (!has_grh)
+ return rdma_ah_get_dlid(&attr) == rwc->wc->slid;
else
- return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw,
+ return !memcmp(rdma_ah_read_grh(&attr)->dgid.raw,
+ rwc->recv_buf.grh->sgid.raw,
16);
}
@@ -1940,6 +1972,14 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
struct ib_mad_send_wr_private *mad_send_wr;
struct ib_mad_send_wc mad_send_wc;
unsigned long flags;
+ int ret;
+
+ ret = ib_mad_enforce_security(mad_agent_priv,
+ mad_recv_wc->wc->pkey_index);
+ if (ret) {
+ ib_free_recv_mad(mad_recv_wc);
+ deref_mad_agent(mad_agent_priv);
+ }
INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
@@ -1997,6 +2037,8 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
mad_recv_wc);
deref_mad_agent(mad_agent_priv);
}
+
+ return;
}
static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv,
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 382941b46e43..0d3cca0a8890 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -81,7 +81,7 @@ static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
deref_rmpp_recv(rmpp_recv);
wait_for_completion(&rmpp_recv->comp);
- ib_destroy_ah(rmpp_recv->ah);
+ rdma_destroy_ah(rmpp_recv->ah);
kfree(rmpp_recv);
}
@@ -171,7 +171,7 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
hdr_len, 0, GFP_KERNEL,
IB_MGMT_BASE_VERSION);
if (IS_ERR(msg))
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
else {
msg->ah = ah;
msg->context[0] = ah;
@@ -201,7 +201,7 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent,
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- ib_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah);
ib_free_send_mad(msg);
}
}
@@ -209,7 +209,7 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent,
void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah)
- ib_destroy_ah(mad_send_wc->send_buf->ah);
+ rdma_destroy_ah(mad_send_wc->send_buf->ah);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -237,7 +237,7 @@ static void nack_recv(struct ib_mad_agent_private *agent,
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- ib_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah);
ib_free_send_mad(msg);
}
}
@@ -852,7 +852,7 @@ static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
struct mad_rmpp_recv *rmpp_recv;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
int newwin = 1;
@@ -867,10 +867,10 @@ static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
(rmpp_recv->method & IB_MGMT_METHOD_RESP))
continue;
- if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
+ if (rdma_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
continue;
- if (rmpp_recv->slid == ah_attr.dlid) {
+ if (rmpp_recv->slid == rdma_ah_get_dlid(&ah_attr)) {
newwin = rmpp_recv->repwin;
break;
}
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 322cb67b07a9..45f2f095f793 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -720,7 +720,7 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
struct net_device *ndev,
enum ib_gid_type gid_type,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
@@ -743,19 +743,18 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
return ret;
memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->dlid = be16_to_cpu(rec->mlid);
- ah_attr->sl = rec->sl;
- ah_attr->port_num = port_num;
- ah_attr->static_rate = rec->rate;
-
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = rec->mgid;
-
- ah_attr->grh.sgid_index = (u8) gid_index;
- ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
- ah_attr->grh.hop_limit = rec->hop_limit;
- ah_attr->grh.traffic_class = rec->traffic_class;
-
+ ah_attr->type = rdma_ah_find_type(device, port_num);
+
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid));
+ rdma_ah_set_sl(ah_attr, rec->sl);
+ rdma_ah_set_port_num(ah_attr, port_num);
+ rdma_ah_set_static_rate(ah_attr, rec->rate);
+
+ rdma_ah_set_grh(ah_attr, &rec->mgid,
+ be32_to_cpu(rec->flow_label),
+ (u8)gid_index,
+ rec->hop_limit,
+ rec->traffic_class);
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_mcmember);
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 10469b0088b5..94931c474d41 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -37,6 +37,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <rdma/rdma_netlink.h>
+#include "core_priv.h"
struct ibnl_client {
struct list_head list;
@@ -55,7 +56,6 @@ int ibnl_chk_listeners(unsigned int group)
return -1;
return 0;
}
-EXPORT_SYMBOL(ibnl_chk_listeners);
int ibnl_add_client(int index, int nops,
const struct ibnl_client_cbs cb_table[])
@@ -146,7 +146,8 @@ nla_put_failure:
}
EXPORT_SYMBOL(ibnl_put_attr);
-static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct ibnl_client *client;
int type = nlh->nlmsg_type;
@@ -209,7 +210,7 @@ static void ibnl_rcv_reply_skb(struct sk_buff *skb)
if (nlh->nlmsg_flags & NLM_F_REQUEST)
return;
- ibnl_rcv_msg(skb, nlh);
+ ibnl_rcv_msg(skb, nlh, NULL);
msglen = NLMSG_ALIGN(nlh->nlmsg_len);
if (msglen > skb->len)
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
new file mode 100644
index 000000000000..41c31a2bf093
--- /dev/null
+++ b/drivers/infiniband/core/rdma_core.c
@@ -0,0 +1,627 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <linux/rcupdate.h>
+#include "uverbs.h"
+#include "core_priv.h"
+#include "rdma_core.h"
+
+void uverbs_uobject_get(struct ib_uobject *uobject)
+{
+ kref_get(&uobject->ref);
+}
+
+static void uverbs_uobject_free(struct kref *ref)
+{
+ struct ib_uobject *uobj =
+ container_of(ref, struct ib_uobject, ref);
+
+ if (uobj->type->type_class->needs_kfree_rcu)
+ kfree_rcu(uobj, rcu);
+ else
+ kfree(uobj);
+}
+
+void uverbs_uobject_put(struct ib_uobject *uobject)
+{
+ kref_put(&uobject->ref, uverbs_uobject_free);
+}
+
+static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
+{
+ /*
+ * When a shared access is required, we use a positive counter. Each
+ * shared access request checks that the value != -1 and increment it.
+ * Exclusive access is required for operations like write or destroy.
+ * In exclusive access mode, we check that the counter is zero (nobody
+ * claimed this object) and we set it to -1. Releasing a shared access
+ * lock is done simply by decreasing the counter. As for exclusive
+ * access locks, since only a single one of them is is allowed
+ * concurrently, setting the counter to zero is enough for releasing
+ * this lock.
+ */
+ if (!exclusive)
+ return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
+ -EBUSY : 0;
+
+ /* lock is either WRITE or DESTROY - should be exclusive */
+ return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
+}
+
+static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
+ const struct uverbs_obj_type *type)
+{
+ struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL);
+
+ if (!uobj)
+ return ERR_PTR(-ENOMEM);
+ /*
+ * user_handle should be filled by the handler,
+ * The object is added to the list in the commit stage.
+ */
+ uobj->context = context;
+ uobj->type = type;
+ atomic_set(&uobj->usecnt, 0);
+ kref_init(&uobj->ref);
+
+ return uobj;
+}
+
+static int idr_add_uobj(struct ib_uobject *uobj)
+{
+ int ret;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&uobj->context->ufile->idr_lock);
+
+ /*
+ * We start with allocating an idr pointing to NULL. This represents an
+ * object which isn't initialized yet. We'll replace it later on with
+ * the real object once we commit.
+ */
+ ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0,
+ min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
+ if (ret >= 0)
+ uobj->id = ret;
+
+ spin_unlock(&uobj->context->ufile->idr_lock);
+ idr_preload_end();
+
+ return ret < 0 ? ret : 0;
+}
+
+/*
+ * It only removes it from the uobjects list, uverbs_uobject_put() is still
+ * required.
+ */
+static void uverbs_idr_remove_uobj(struct ib_uobject *uobj)
+{
+ spin_lock(&uobj->context->ufile->idr_lock);
+ idr_remove(&uobj->context->ufile->idr, uobj->id);
+ spin_unlock(&uobj->context->ufile->idr_lock);
+}
+
+/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
+static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive)
+{
+ struct ib_uobject *uobj;
+
+ rcu_read_lock();
+ /* object won't be released as we're protected in rcu */
+ uobj = idr_find(&ucontext->ufile->idr, id);
+ if (!uobj) {
+ uobj = ERR_PTR(-ENOENT);
+ goto free;
+ }
+
+ uverbs_uobject_get(uobj);
+free:
+ rcu_read_unlock();
+ return uobj;
+}
+
+static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive)
+{
+ struct file *f;
+ struct ib_uobject *uobject;
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(type, struct uverbs_obj_fd_type, type);
+
+ if (exclusive)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ f = fget(id);
+ if (!f)
+ return ERR_PTR(-EBADF);
+
+ uobject = f->private_data;
+ /*
+ * fget(id) ensures we are not currently running uverbs_close_fd,
+ * and the caller is expected to ensure that uverbs_close_fd is never
+ * done while a call top lookup is possible.
+ */
+ if (f->f_op != fd_type->fops) {
+ fput(f);
+ return ERR_PTR(-EBADF);
+ }
+
+ uverbs_uobject_get(uobject);
+ return uobject;
+}
+
+struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive)
+{
+ struct ib_uobject *uobj;
+ int ret;
+
+ uobj = type->type_class->lookup_get(type, ucontext, id, exclusive);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ if (uobj->type != type) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ ret = uverbs_try_lock_object(uobj, exclusive);
+ if (ret) {
+ WARN(ucontext->cleanup_reason,
+ "ib_uverbs: Trying to lookup_get while cleanup context\n");
+ goto free;
+ }
+
+ return uobj;
+free:
+ uobj->type->type_class->lookup_put(uobj, exclusive);
+ uverbs_uobject_put(uobj);
+ return ERR_PTR(ret);
+}
+
+static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ int ret;
+ struct ib_uobject *uobj;
+
+ uobj = alloc_uobj(ucontext, type);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ ret = idr_add_uobj(uobj);
+ if (ret)
+ goto uobj_put;
+
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto idr_remove;
+
+ return uobj;
+
+idr_remove:
+ uverbs_idr_remove_uobj(uobj);
+uobj_put:
+ uverbs_uobject_put(uobj);
+ return ERR_PTR(ret);
+}
+
+static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(type, struct uverbs_obj_fd_type, type);
+ int new_fd;
+ struct ib_uobject *uobj;
+ struct ib_uobject_file *uobj_file;
+ struct file *filp;
+
+ new_fd = get_unused_fd_flags(O_CLOEXEC);
+ if (new_fd < 0)
+ return ERR_PTR(new_fd);
+
+ uobj = alloc_uobj(ucontext, type);
+ if (IS_ERR(uobj)) {
+ put_unused_fd(new_fd);
+ return uobj;
+ }
+
+ uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
+ filp = anon_inode_getfile(fd_type->name,
+ fd_type->fops,
+ uobj_file,
+ fd_type->flags);
+ if (IS_ERR(filp)) {
+ put_unused_fd(new_fd);
+ uverbs_uobject_put(uobj);
+ return (void *)filp;
+ }
+
+ uobj_file->uobj.id = new_fd;
+ uobj_file->uobj.object = filp;
+ uobj_file->ufile = ucontext->ufile;
+ INIT_LIST_HEAD(&uobj->list);
+ kref_get(&uobj_file->ufile->ref);
+
+ return uobj;
+}
+
+struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ return type->type_class->alloc_begin(type, ucontext);
+}
+
+static void uverbs_uobject_add(struct ib_uobject *uobject)
+{
+ mutex_lock(&uobject->context->uobjects_lock);
+ list_add(&uobject->list, &uobject->context->uobjects);
+ mutex_unlock(&uobject->context->uobjects_lock);
+}
+
+static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ const struct uverbs_obj_idr_type *idr_type =
+ container_of(uobj->type, struct uverbs_obj_idr_type,
+ type);
+ int ret = idr_type->destroy_object(uobj, why);
+
+ /*
+ * We can only fail gracefully if the user requested to destroy the
+ * object. In the rest of the cases, just remove whatever you can.
+ */
+ if (why == RDMA_REMOVE_DESTROY && ret)
+ return ret;
+
+ ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ uverbs_idr_remove_uobj(uobj);
+
+ return ret;
+}
+
+static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
+{
+ struct ib_uobject_file *uobj_file =
+ container_of(uobj, struct ib_uobject_file, uobj);
+ struct file *filp = uobj->object;
+ int id = uobj_file->uobj.id;
+
+ /* Unsuccessful NEW */
+ fput(filp);
+ put_unused_fd(id);
+}
+
+static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(uobj->type, struct uverbs_obj_fd_type, type);
+ struct ib_uobject_file *uobj_file =
+ container_of(uobj, struct ib_uobject_file, uobj);
+ int ret = fd_type->context_closed(uobj_file, why);
+
+ if (why == RDMA_REMOVE_DESTROY && ret)
+ return ret;
+
+ if (why == RDMA_REMOVE_DURING_CLEANUP) {
+ alloc_abort_fd_uobject(uobj);
+ return ret;
+ }
+
+ uobj_file->uobj.context = NULL;
+ return ret;
+}
+
+static void lockdep_check(struct ib_uobject *uobj, bool exclusive)
+{
+#ifdef CONFIG_LOCKDEP
+ if (exclusive)
+ WARN_ON(atomic_read(&uobj->usecnt) > 0);
+ else
+ WARN_ON(atomic_read(&uobj->usecnt) == -1);
+#endif
+}
+
+static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ int ret;
+ struct ib_ucontext *ucontext = uobj->context;
+
+ ret = uobj->type->type_class->remove_commit(uobj, why);
+ if (ret && why == RDMA_REMOVE_DESTROY) {
+ /* We couldn't remove the object, so just unlock the uobject */
+ atomic_set(&uobj->usecnt, 0);
+ uobj->type->type_class->lookup_put(uobj, true);
+ } else {
+ mutex_lock(&ucontext->uobjects_lock);
+ list_del(&uobj->list);
+ mutex_unlock(&ucontext->uobjects_lock);
+ /* put the ref we took when we created the object */
+ uverbs_uobject_put(uobj);
+ }
+
+ return ret;
+}
+
+/* This is called only for user requested DESTROY reasons */
+int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
+{
+ int ret;
+ struct ib_ucontext *ucontext = uobj->context;
+
+ /* put the ref count we took at lookup_get */
+ uverbs_uobject_put(uobj);
+ /* Cleanup is running. Calling this should have been impossible */
+ if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
+ WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
+ return 0;
+ }
+ lockdep_check(uobj, true);
+ ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
+
+ up_read(&ucontext->cleanup_rwsem);
+ return ret;
+}
+
+static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
+{
+ uverbs_uobject_add(uobj);
+ spin_lock(&uobj->context->ufile->idr_lock);
+ /*
+ * We already allocated this IDR with a NULL object, so
+ * this shouldn't fail.
+ */
+ WARN_ON(idr_replace(&uobj->context->ufile->idr,
+ uobj, uobj->id));
+ spin_unlock(&uobj->context->ufile->idr_lock);
+}
+
+static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
+{
+ struct ib_uobject_file *uobj_file =
+ container_of(uobj, struct ib_uobject_file, uobj);
+
+ uverbs_uobject_add(&uobj_file->uobj);
+ fd_install(uobj_file->uobj.id, uobj->object);
+ /* This shouldn't be used anymore. Use the file object instead */
+ uobj_file->uobj.id = 0;
+ /* Get another reference as we export this to the fops */
+ uverbs_uobject_get(&uobj_file->uobj);
+}
+
+int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
+{
+ /* Cleanup is running. Calling this should have been impossible */
+ if (!down_read_trylock(&uobj->context->cleanup_rwsem)) {
+ int ret;
+
+ WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n");
+ ret = uobj->type->type_class->remove_commit(uobj,
+ RDMA_REMOVE_DURING_CLEANUP);
+ if (ret)
+ pr_warn("ib_uverbs: cleanup of idr object %d failed\n",
+ uobj->id);
+ return ret;
+ }
+
+ uobj->type->type_class->alloc_commit(uobj);
+ up_read(&uobj->context->cleanup_rwsem);
+
+ return 0;
+}
+
+static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
+{
+ uverbs_idr_remove_uobj(uobj);
+ ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ uverbs_uobject_put(uobj);
+}
+
+void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
+{
+ uobj->type->type_class->alloc_abort(uobj);
+}
+
+static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive)
+{
+}
+
+static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive)
+{
+ struct file *filp = uobj->object;
+
+ WARN_ON(exclusive);
+ /* This indirectly calls uverbs_close_fd and free the object */
+ fput(filp);
+}
+
+void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive)
+{
+ lockdep_check(uobj, exclusive);
+ uobj->type->type_class->lookup_put(uobj, exclusive);
+ /*
+ * In order to unlock an object, either decrease its usecnt for
+ * read access or zero it in case of exclusive access. See
+ * uverbs_try_lock_object for locking schema information.
+ */
+ if (!exclusive)
+ atomic_dec(&uobj->usecnt);
+ else
+ atomic_set(&uobj->usecnt, 0);
+
+ uverbs_uobject_put(uobj);
+}
+
+const struct uverbs_obj_type_class uverbs_idr_class = {
+ .alloc_begin = alloc_begin_idr_uobject,
+ .lookup_get = lookup_get_idr_uobject,
+ .alloc_commit = alloc_commit_idr_uobject,
+ .alloc_abort = alloc_abort_idr_uobject,
+ .lookup_put = lookup_put_idr_uobject,
+ .remove_commit = remove_commit_idr_uobject,
+ /*
+ * When we destroy an object, we first just lock it for WRITE and
+ * actually DESTROY it in the finalize stage. So, the problematic
+ * scenario is when we just started the finalize stage of the
+ * destruction (nothing was executed yet). Now, the other thread
+ * fetched the object for READ access, but it didn't lock it yet.
+ * The DESTROY thread continues and starts destroying the object.
+ * When the other thread continue - without the RCU, it would
+ * access freed memory. However, the rcu_read_lock delays the free
+ * until the rcu_read_lock of the READ operation quits. Since the
+ * exclusive lock of the object is still taken by the DESTROY flow, the
+ * READ operation will get -EBUSY and it'll just bail out.
+ */
+ .needs_kfree_rcu = true,
+};
+
+static void _uverbs_close_fd(struct ib_uobject_file *uobj_file)
+{
+ struct ib_ucontext *ucontext;
+ struct ib_uverbs_file *ufile = uobj_file->ufile;
+ int ret;
+
+ mutex_lock(&uobj_file->ufile->cleanup_mutex);
+
+ /* uobject was either already cleaned up or is cleaned up right now anyway */
+ if (!uobj_file->uobj.context ||
+ !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem))
+ goto unlock;
+
+ ucontext = uobj_file->uobj.context;
+ ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE);
+ up_read(&ucontext->cleanup_rwsem);
+ if (ret)
+ pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n");
+unlock:
+ mutex_unlock(&ufile->cleanup_mutex);
+}
+
+void uverbs_close_fd(struct file *f)
+{
+ struct ib_uobject_file *uobj_file = f->private_data;
+ struct kref *uverbs_file_ref = &uobj_file->ufile->ref;
+
+ _uverbs_close_fd(uobj_file);
+ uverbs_uobject_put(&uobj_file->uobj);
+ kref_put(uverbs_file_ref, ib_uverbs_release_file);
+}
+
+void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
+{
+ enum rdma_remove_reason reason = device_removed ?
+ RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE;
+ unsigned int cur_order = 0;
+
+ ucontext->cleanup_reason = reason;
+ /*
+ * Waits for all remove_commit and alloc_commit to finish. Logically, We
+ * want to hold this forever as the context is going to be destroyed,
+ * but we'll release it since it causes a "held lock freed" BUG message.
+ */
+ down_write(&ucontext->cleanup_rwsem);
+
+ while (!list_empty(&ucontext->uobjects)) {
+ struct ib_uobject *obj, *next_obj;
+ unsigned int next_order = UINT_MAX;
+
+ /*
+ * This shouldn't run while executing other commands on this
+ * context. Thus, the only thing we should take care of is
+ * releasing a FD while traversing this list. The FD could be
+ * closed and released from the _release fop of this FD.
+ * In order to mitigate this, we add a lock.
+ * We take and release the lock per order traversal in order
+ * to let other threads (which might still use the FDs) chance
+ * to run.
+ */
+ mutex_lock(&ucontext->uobjects_lock);
+ list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects,
+ list) {
+ if (obj->type->destroy_order == cur_order) {
+ int ret;
+
+ /*
+ * if we hit this WARN_ON, that means we are
+ * racing with a lookup_get.
+ */
+ WARN_ON(uverbs_try_lock_object(obj, true));
+ ret = obj->type->type_class->remove_commit(obj,
+ reason);
+ list_del(&obj->list);
+ if (ret)
+ pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n",
+ obj->id, cur_order);
+ /* put the ref we took when we created the object */
+ uverbs_uobject_put(obj);
+ } else {
+ next_order = min(next_order,
+ obj->type->destroy_order);
+ }
+ }
+ mutex_unlock(&ucontext->uobjects_lock);
+ cur_order = next_order;
+ }
+ up_write(&ucontext->cleanup_rwsem);
+}
+
+void uverbs_initialize_ucontext(struct ib_ucontext *ucontext)
+{
+ ucontext->cleanup_reason = 0;
+ mutex_init(&ucontext->uobjects_lock);
+ INIT_LIST_HEAD(&ucontext->uobjects);
+ init_rwsem(&ucontext->cleanup_rwsem);
+}
+
+const struct uverbs_obj_type_class uverbs_fd_class = {
+ .alloc_begin = alloc_begin_fd_uobject,
+ .lookup_get = lookup_get_fd_uobject,
+ .alloc_commit = alloc_commit_fd_uobject,
+ .alloc_abort = alloc_abort_fd_uobject,
+ .lookup_put = lookup_put_fd_uobject,
+ .remove_commit = remove_commit_fd_uobject,
+ .needs_kfree_rcu = false,
+};
+
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
new file mode 100644
index 000000000000..1b82e7ff7fe8
--- /dev/null
+++ b/drivers/infiniband/core/rdma_core.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RDMA_CORE_H
+#define RDMA_CORE_H
+
+#include <linux/idr.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/ib_verbs.h>
+#include <linux/mutex.h>
+
+/*
+ * These functions initialize the context and cleanups its uobjects.
+ * The context has a list of objects which is protected by a mutex
+ * on the context. initialize_ucontext should be called when we create
+ * a context.
+ * cleanup_ucontext removes all uobjects from the context and puts them.
+ */
+void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed);
+void uverbs_initialize_ucontext(struct ib_ucontext *ucontext);
+
+/*
+ * uverbs_uobject_get is called in order to increase the reference count on
+ * an uobject. This is useful when a handler wants to keep the uobject's memory
+ * alive, regardless if this uobject is still alive in the context's objects
+ * repository. Objects are put via uverbs_uobject_put.
+ */
+void uverbs_uobject_get(struct ib_uobject *uobject);
+
+/*
+ * In order to indicate we no longer needs this uobject, uverbs_uobject_put
+ * is called. When the reference count is decreased, the uobject is freed.
+ * For example, this is used when attaching a completion channel to a CQ.
+ */
+void uverbs_uobject_put(struct ib_uobject *uobject);
+
+/* Indicate this fd is no longer used by this consumer, but its memory isn't
+ * necessarily released yet. When the last reference is put, we release the
+ * memory. After this call is executed, calling uverbs_uobject_get isn't
+ * allowed.
+ * This must be called from the release file_operations of the file!
+ */
+void uverbs_close_fd(struct file *f);
+
+#endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 0621f4455732..94a9eefb3cfc 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -42,6 +42,8 @@
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
+static struct workqueue_struct *gid_cache_wq;
+
enum gid_op_type {
GID_DEL = 0,
GID_ADD
@@ -144,7 +146,6 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
struct net_device *real_dev;
int res;
@@ -152,11 +153,11 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
return 0;
rcu_read_lock();
- real_dev = rdma_vlan_dev_real_dev(event_ndev);
+ real_dev = rdma_vlan_dev_real_dev(cookie);
if (!real_dev)
- real_dev = event_ndev;
+ real_dev = cookie;
- res = ((rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+ res = ((rdma_is_upper_dev_rcu(rdma_ndev, cookie) &&
(is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
REQUIRED_BOND_STATES)) ||
real_dev == rdma_ndev);
@@ -192,17 +193,16 @@ static int pass_all_filter(struct ib_device *ib_dev, u8 port,
static int upper_device_filter(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
int res;
if (!rdma_ndev)
return 0;
- if (rdma_ndev == event_ndev)
+ if (rdma_ndev == cookie)
return 1;
rcu_read_lock();
- res = rdma_is_upper_dev_rcu(rdma_ndev, event_ndev);
+ res = rdma_is_upper_dev_rcu(rdma_ndev, cookie);
rcu_read_unlock();
return res;
@@ -379,18 +379,14 @@ static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
static void add_netdev_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- enum_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev);
- _add_netdev_ips(ib_dev, port, event_ndev);
+ enum_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
+ _add_netdev_ips(ib_dev, port, cookie);
}
static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
+ ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie);
}
static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
@@ -460,7 +456,7 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
u8 port,
struct net_device *ndev))
{
- struct net_device *ndev = (struct net_device *)cookie;
+ struct net_device *ndev = cookie;
struct upper_list *upper_iter;
struct upper_list *upper_temp;
LIST_HEAD(upper_list);
@@ -519,9 +515,7 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
static void del_netdev_default_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- bond_delete_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev);
+ bond_delete_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
}
/* The following functions operate on all IB devices. netdevice_event and
@@ -568,7 +562,7 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds,
}
INIT_WORK(&ndev_work->work, netdevice_event_work_handler);
- queue_work(ib_wq, &ndev_work->work);
+ queue_work(gid_cache_wq, &ndev_work->work);
return NOTIFY_DONE;
}
@@ -701,7 +695,7 @@ static int addr_event(struct notifier_block *this, unsigned long event,
dev_hold(ndev);
work->gid_attr.ndev = ndev;
- queue_work(ib_wq, &work->work);
+ queue_work(gid_cache_wq, &work->work);
return NOTIFY_DONE;
}
@@ -748,6 +742,10 @@ static struct notifier_block nb_inet6addr = {
int __init roce_gid_mgmt_init(void)
{
+ gid_cache_wq = alloc_ordered_workqueue("gid-cache-wq", 0);
+ if (!gid_cache_wq)
+ return -ENOMEM;
+
register_inetaddr_notifier(&nb_inetaddr);
if (IS_ENABLED(CONFIG_IPV6))
register_inet6addr_notifier(&nb_inet6addr);
@@ -772,4 +770,5 @@ void __exit roce_gid_mgmt_cleanup(void)
* ib-core is removed, all physical devices have been removed,
* so no issue with remaining hardware contexts.
*/
+ destroy_workqueue(gid_cache_wq);
}
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 81b742ca1639..70fa4cabe48e 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -56,6 +56,8 @@
#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100
#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000
#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000
+#define IB_SA_CPI_MAX_RETRY_CNT 3
+#define IB_SA_CPI_RETRY_WAIT 1000 /*msecs */
static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
struct ib_sa_sm_ah {
@@ -65,9 +67,23 @@ struct ib_sa_sm_ah {
u8 src_path_mask;
};
+enum rdma_class_port_info_type {
+ RDMA_CLASS_PORT_INFO_IB,
+ RDMA_CLASS_PORT_INFO_OPA
+};
+
+struct rdma_class_port_info {
+ enum rdma_class_port_info_type type;
+ union {
+ struct ib_class_port_info ib;
+ struct opa_class_port_info opa;
+ };
+};
+
struct ib_sa_classport_cache {
bool valid;
- struct ib_class_port_info data;
+ int retry_cnt;
+ struct rdma_class_port_info data;
};
struct ib_sa_port {
@@ -75,6 +91,7 @@ struct ib_sa_port {
struct ib_sa_sm_ah *sm_ah;
struct work_struct update_task;
struct ib_sa_classport_cache classport_info;
+ struct delayed_work ib_cpi_work;
spinlock_t classport_lock; /* protects class port info set */
spinlock_t ah_lock;
u8 port_num;
@@ -103,6 +120,7 @@ struct ib_sa_query {
#define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001
#define IB_SA_CANCEL 0x00000002
+#define IB_SA_QUERY_OPA 0x00000004
struct ib_sa_service_query {
void (*callback)(int, struct ib_sa_service_rec *, void *);
@@ -111,9 +129,10 @@ struct ib_sa_service_query {
};
struct ib_sa_path_query {
- void (*callback)(int, struct ib_sa_path_rec *, void *);
+ void (*callback)(int, struct sa_path_rec *, void *);
void *context;
struct ib_sa_query sa_query;
+ struct sa_path_rec *conv_pr;
};
struct ib_sa_guidinfo_query {
@@ -123,7 +142,7 @@ struct ib_sa_guidinfo_query {
};
struct ib_sa_classport_info_query {
- void (*callback)(int, struct ib_class_port_info *, void *);
+ void (*callback)(void *);
void *context;
struct ib_sa_query sa_query;
};
@@ -170,8 +189,8 @@ static DEFINE_SPINLOCK(tid_lock);
static u32 tid;
#define PATH_REC_FIELD(field) \
- .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \
- .struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \
+ .struct_offset_bytes = offsetof(struct sa_path_rec, field), \
+ .struct_size_bytes = sizeof((struct sa_path_rec *)0)->field, \
.field_name = "sa_path_rec:" #field
static const struct ib_field path_rec_table[] = {
@@ -187,15 +206,15 @@ static const struct ib_field path_rec_table[] = {
.offset_words = 6,
.offset_bits = 0,
.size_bits = 128 },
- { PATH_REC_FIELD(dlid),
+ { PATH_REC_FIELD(ib.dlid),
.offset_words = 10,
.offset_bits = 0,
.size_bits = 16 },
- { PATH_REC_FIELD(slid),
+ { PATH_REC_FIELD(ib.slid),
.offset_words = 10,
.offset_bits = 16,
.size_bits = 16 },
- { PATH_REC_FIELD(raw_traffic),
+ { PATH_REC_FIELD(ib.raw_traffic),
.offset_words = 11,
.offset_bits = 0,
.size_bits = 1 },
@@ -269,6 +288,136 @@ static const struct ib_field path_rec_table[] = {
.size_bits = 48 },
};
+#define OPA_PATH_REC_FIELD(field) \
+ .struct_offset_bytes = \
+ offsetof(struct sa_path_rec, field), \
+ .struct_size_bytes = \
+ sizeof((struct sa_path_rec *)0)->field, \
+ .field_name = "sa_path_rec:" #field
+
+static const struct ib_field opa_path_rec_table[] = {
+ { OPA_PATH_REC_FIELD(service_id),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 64 },
+ { OPA_PATH_REC_FIELD(dgid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_PATH_REC_FIELD(sgid),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_PATH_REC_FIELD(opa.dlid),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_PATH_REC_FIELD(opa.slid),
+ .offset_words = 11,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_PATH_REC_FIELD(opa.raw_traffic),
+ .offset_words = 12,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 12,
+ .offset_bits = 1,
+ .size_bits = 3 },
+ { OPA_PATH_REC_FIELD(flow_label),
+ .offset_words = 12,
+ .offset_bits = 4,
+ .size_bits = 20 },
+ { OPA_PATH_REC_FIELD(hop_limit),
+ .offset_words = 12,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { OPA_PATH_REC_FIELD(traffic_class),
+ .offset_words = 13,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { OPA_PATH_REC_FIELD(reversible),
+ .offset_words = 13,
+ .offset_bits = 8,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(numb_path),
+ .offset_words = 13,
+ .offset_bits = 9,
+ .size_bits = 7 },
+ { OPA_PATH_REC_FIELD(pkey),
+ .offset_words = 13,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_PATH_REC_FIELD(opa.l2_8B),
+ .offset_words = 14,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(opa.l2_10B),
+ .offset_words = 14,
+ .offset_bits = 1,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(opa.l2_9B),
+ .offset_words = 14,
+ .offset_bits = 2,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(opa.l2_16B),
+ .offset_words = 14,
+ .offset_bits = 3,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 4,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(opa.qos_type),
+ .offset_words = 14,
+ .offset_bits = 6,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(opa.qos_priority),
+ .offset_words = 14,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 16,
+ .size_bits = 3 },
+ { OPA_PATH_REC_FIELD(sl),
+ .offset_words = 14,
+ .offset_bits = 19,
+ .size_bits = 5 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { OPA_PATH_REC_FIELD(mtu_selector),
+ .offset_words = 15,
+ .offset_bits = 0,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(mtu),
+ .offset_words = 15,
+ .offset_bits = 2,
+ .size_bits = 6 },
+ { OPA_PATH_REC_FIELD(rate_selector),
+ .offset_words = 15,
+ .offset_bits = 8,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(rate),
+ .offset_words = 15,
+ .offset_bits = 10,
+ .size_bits = 6 },
+ { OPA_PATH_REC_FIELD(packet_life_time_selector),
+ .offset_words = 15,
+ .offset_bits = 16,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(packet_life_time),
+ .offset_words = 15,
+ .offset_bits = 18,
+ .size_bits = 6 },
+ { OPA_PATH_REC_FIELD(preference),
+ .offset_words = 15,
+ .offset_bits = 24,
+ .size_bits = 8 },
+};
+
#define MCMEMBER_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \
.struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \
@@ -406,7 +555,7 @@ static const struct ib_field service_rec_table[] = {
.struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \
.field_name = "ib_class_port_info:" #field
-static const struct ib_field classport_info_rec_table[] = {
+static const struct ib_field ib_classport_info_rec_table[] = {
{ CLASSPORTINFO_REC_FIELD(base_version),
.offset_words = 0,
.offset_bits = 0,
@@ -477,6 +626,88 @@ static const struct ib_field classport_info_rec_table[] = {
.size_bits = 32 },
};
+#define OPA_CLASSPORTINFO_REC_FIELD(field) \
+ .struct_offset_bytes =\
+ offsetof(struct opa_class_port_info, field), \
+ .struct_size_bytes = \
+ sizeof((struct opa_class_port_info *)0)->field, \
+ .field_name = "opa_class_port_info:" #field
+
+static const struct ib_field opa_classport_info_rec_table[] = {
+ { OPA_CLASSPORTINFO_REC_FIELD(base_version),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { OPA_CLASSPORTINFO_REC_FIELD(class_version),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { OPA_CLASSPORTINFO_REC_FIELD(cap_mask),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_gid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_lid),
+ .offset_words = 7,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp),
+ .offset_words = 8,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey),
+ .offset_words = 9,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_gid),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl),
+ .offset_words = 14,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_lid),
+ .offset_words = 15,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp),
+ .offset_words = 16,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_qkey),
+ .offset_words = 17,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_pkey),
+ .offset_words = 18,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey),
+ .offset_words = 18,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd),
+ .offset_words = 19,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { RESERVED,
+ .offset_words = 19,
+ .offset_bits = 8,
+ .size_bits = 24 },
+};
+
#define GUIDINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
.struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
@@ -518,7 +749,7 @@ static inline int ib_sa_query_cancelled(struct ib_sa_query *query)
static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
struct ib_sa_query *query)
{
- struct ib_sa_path_rec *sa_rec = query->mad_buf->context[1];
+ struct sa_path_rec *sa_rec = query->mad_buf->context[1];
struct ib_sa_mad *mad = query->mad_buf->mad;
ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask;
u16 val16;
@@ -528,8 +759,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
query->mad_buf->context[1] = NULL;
/* Construct the family header first */
- header = (struct rdma_ls_resolve_header *)
- skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
+ header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
memcpy(header->device_name, query->port->agent->device->name,
LS_DEVICE_NAME_MAX);
header->port_num = query->port->port_num;
@@ -808,7 +1038,7 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
return -EPERM;
ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
- nlmsg_len(nlh), ib_nl_policy);
+ nlmsg_len(nlh), ib_nl_policy, NULL);
attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT];
if (ret || !attr)
goto settimeout_out;
@@ -860,7 +1090,7 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
return 0;
ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
- nlmsg_len(nlh), ib_nl_policy);
+ nlmsg_len(nlh), ib_nl_policy, NULL);
if (ret)
return 0;
@@ -927,96 +1157,10 @@ static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
- ib_destroy_ah(sm_ah->ah);
+ rdma_destroy_ah(sm_ah->ah);
kfree(sm_ah);
}
-static void update_sm_ah(struct work_struct *work)
-{
- struct ib_sa_port *port =
- container_of(work, struct ib_sa_port, update_task);
- struct ib_sa_sm_ah *new_ah;
- struct ib_port_attr port_attr;
- struct ib_ah_attr ah_attr;
-
- if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
- pr_warn("Couldn't query port\n");
- return;
- }
-
- new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
- if (!new_ah) {
- return;
- }
-
- kref_init(&new_ah->ref);
- new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
-
- new_ah->pkey_index = 0;
- if (ib_find_pkey(port->agent->device, port->port_num,
- IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
- pr_err("Couldn't find index for default PKey\n");
-
- memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = port_attr.sm_lid;
- ah_attr.sl = port_attr.sm_sl;
- ah_attr.port_num = port->port_num;
- if (port_attr.grh_required) {
- ah_attr.ah_flags = IB_AH_GRH;
- ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix);
- ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID);
- }
-
- new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
- if (IS_ERR(new_ah->ah)) {
- pr_warn("Couldn't create new SM AH\n");
- kfree(new_ah);
- return;
- }
-
- spin_lock_irq(&port->ah_lock);
- if (port->sm_ah)
- kref_put(&port->sm_ah->ref, free_sm_ah);
- port->sm_ah = new_ah;
- spin_unlock_irq(&port->ah_lock);
-
-}
-
-static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
-{
- if (event->event == IB_EVENT_PORT_ERR ||
- event->event == IB_EVENT_PORT_ACTIVE ||
- event->event == IB_EVENT_LID_CHANGE ||
- event->event == IB_EVENT_PKEY_CHANGE ||
- event->event == IB_EVENT_SM_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER) {
- unsigned long flags;
- struct ib_sa_device *sa_dev =
- container_of(handler, typeof(*sa_dev), event_handler);
- struct ib_sa_port *port =
- &sa_dev->port[event->element.port_num - sa_dev->start_port];
-
- if (!rdma_cap_ib_sa(handler->device, port->port_num))
- return;
-
- spin_lock_irqsave(&port->ah_lock, flags);
- if (port->sm_ah)
- kref_put(&port->sm_ah->ref, free_sm_ah);
- port->sm_ah = NULL;
- spin_unlock_irqrestore(&port->ah_lock, flags);
-
- if (event->event == IB_EVENT_SM_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER ||
- event->event == IB_EVENT_LID_CHANGE) {
- spin_lock_irqsave(&port->classport_lock, flags);
- port->classport_info.valid = false;
- spin_unlock_irqrestore(&port->classport_lock, flags);
- }
- queue_work(ib_wq, &sa_dev->port[event->element.port_num -
- sa_dev->start_port].update_task);
- }
-}
-
void ib_sa_register_client(struct ib_sa_client *client)
{
atomic_set(&client->users, 1);
@@ -1085,7 +1229,8 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
}
int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
@@ -1093,21 +1238,26 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
struct net_device *ndev = NULL;
memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->dlid = be16_to_cpu(rec->dlid);
- ah_attr->sl = rec->sl;
- ah_attr->src_path_bits = be16_to_cpu(rec->slid) &
- get_src_path_mask(device, port_num);
- ah_attr->port_num = port_num;
- ah_attr->static_rate = rec->rate;
-
+ ah_attr->type = rdma_ah_find_type(device, port_num);
+
+ rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
+ rdma_ah_set_sl(ah_attr, rec->sl);
+ rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) &
+ get_src_path_mask(device, port_num));
+ rdma_ah_set_port_num(ah_attr, port_num);
+ rdma_ah_set_static_rate(ah_attr, rec->rate);
use_roce = rdma_cap_eth_ah(device, port_num);
if (use_roce) {
struct net_device *idev;
struct net_device *resolved_dev;
- struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex,
- .net = rec->net ? rec->net :
- &init_net};
+ struct rdma_dev_addr dev_addr = {
+ .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
+ sa_path_get_ifindex(rec) : 0),
+ .net = sa_path_get_ndev(rec) ?
+ sa_path_get_ndev(rec) :
+ &init_net
+ };
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
@@ -1128,7 +1278,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
dev_addr.network == RDMA_NETWORK_IPV6) &&
- rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
return -EINVAL;
idev = device->get_netdev(device, port_num);
@@ -1159,28 +1309,31 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
}
if (rec->hop_limit > 0 || use_roce) {
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = rec->dgid;
+ enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
- ret = ib_find_cached_gid_by_port(device, &rec->sgid,
- rec->gid_type, port_num, ndev,
- &gid_index);
+ ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
+ port_num, ndev, &gid_index);
if (ret) {
if (ndev)
dev_put(ndev);
return ret;
}
- ah_attr->grh.sgid_index = gid_index;
- ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
- ah_attr->grh.hop_limit = rec->hop_limit;
- ah_attr->grh.traffic_class = rec->traffic_class;
+ rdma_ah_set_grh(ah_attr, &rec->dgid,
+ be32_to_cpu(rec->flow_label),
+ gid_index, rec->hop_limit,
+ rec->traffic_class);
if (ndev)
dev_put(ndev);
}
- if (use_roce)
- memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
+ if (use_roce) {
+ u8 *dmac = sa_path_get_dmac(rec);
+
+ if (!dmac)
+ return -EINVAL;
+ memcpy(ah_attr->roce.dmac, dmac, ETH_ALEN);
+ }
return 0;
}
@@ -1203,7 +1356,9 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
query->sm_ah->pkey_index,
0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
gfp_mask,
- IB_MGMT_BASE_VERSION);
+ ((query->flags & IB_SA_QUERY_OPA) ?
+ OPA_MGMT_BASE_VERSION :
+ IB_MGMT_BASE_VERSION));
if (IS_ERR(query->mad_buf)) {
kref_put(&query->sm_ah->ref, free_sm_ah);
return -ENOMEM;
@@ -1220,16 +1375,21 @@ static void free_mad(struct ib_sa_query *query)
kref_put(&query->sm_ah->ref, free_sm_ah);
}
-static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
+static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
{
+ struct ib_sa_mad *mad = query->mad_buf->mad;
unsigned long flags;
memset(mad, 0, sizeof *mad);
- mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ if (query->flags & IB_SA_QUERY_OPA) {
+ mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION;
+ mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION;
+ } else {
+ mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+ }
mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
- mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
-
spin_lock_irqsave(&tid_lock, flags);
mad->mad_hdr.tid =
cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
@@ -1258,7 +1418,8 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
query->mad_buf->context[0] = query;
query->id = id;
- if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) {
+ if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
+ (!(query->flags & IB_SA_QUERY_OPA))) {
if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) {
if (!ib_nl_make_request(query, gfp_mask))
return id;
@@ -1281,18 +1442,75 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
return ret ? ret : id;
}
-void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
+void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec)
{
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
}
EXPORT_SYMBOL(ib_sa_unpack_path);
-void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute)
+void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
{
ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
}
EXPORT_SYMBOL(ib_sa_pack_path);
+static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
+ struct ib_device *device,
+ u8 port_num)
+{
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ unsigned long flags;
+ bool ret = false;
+
+ if (!sa_dev)
+ return ret;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if (!port->classport_info.valid)
+ goto ret;
+
+ if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA)
+ ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) &
+ OPA_CLASS_PORT_INFO_PR_SUPPORT;
+ret:
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return ret;
+}
+
+enum opa_pr_supported {
+ PR_NOT_SUPPORTED,
+ PR_OPA_SUPPORTED,
+ PR_IB_SUPPORTED
+};
+
+/**
+ * Check if current PR query can be an OPA query.
+ * Retuns PR_NOT_SUPPORTED if a path record query is not
+ * possible, PR_OPA_SUPPORTED if an OPA path record query
+ * is possible and PR_IB_SUPPORTED if an IB path record
+ * query is possible.
+ */
+static int opa_pr_query_possible(struct ib_sa_client *client,
+ struct ib_device *device,
+ u8 port_num,
+ struct sa_path_rec *rec)
+{
+ struct ib_port_attr port_attr;
+
+ if (ib_query_port(device, port_num, &port_attr))
+ return PR_NOT_SUPPORTED;
+
+ if (ib_sa_opa_pathrecord_support(client, device, port_num))
+ return PR_OPA_SUPPORTED;
+
+ if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
+ return PR_NOT_SUPPORTED;
+ else
+ return PR_IB_SUPPORTED;
+}
+
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
@@ -1301,22 +1519,44 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
container_of(sa_query, struct ib_sa_path_query, sa_query);
if (mad) {
- struct ib_sa_path_rec rec;
-
- ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
- mad->data, &rec);
- rec.net = NULL;
- rec.ifindex = 0;
- rec.gid_type = IB_GID_TYPE_IB;
- eth_zero_addr(rec.dmac);
- query->callback(status, &rec, query->context);
+ struct sa_path_rec rec;
+
+ if (sa_query->flags & IB_SA_QUERY_OPA) {
+ ib_unpack(opa_path_rec_table,
+ ARRAY_SIZE(opa_path_rec_table),
+ mad->data, &rec);
+ rec.rec_type = SA_PATH_REC_TYPE_OPA;
+ query->callback(status, &rec, query->context);
+ } else {
+ ib_unpack(path_rec_table,
+ ARRAY_SIZE(path_rec_table),
+ mad->data, &rec);
+ rec.rec_type = SA_PATH_REC_TYPE_IB;
+ sa_path_set_ndev(&rec, NULL);
+ sa_path_set_ifindex(&rec, 0);
+ sa_path_set_dmac_zero(&rec);
+
+ if (query->conv_pr) {
+ struct sa_path_rec opa;
+
+ memset(&opa, 0, sizeof(struct sa_path_rec));
+ sa_convert_path_ib_to_opa(&opa, &rec);
+ query->callback(status, &opa, query->context);
+ } else {
+ query->callback(status, &rec, query->context);
+ }
+ }
} else
query->callback(status, NULL, query->context);
}
static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
{
- kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
+ struct ib_sa_path_query *query =
+ container_of(sa_query, struct ib_sa_path_query, sa_query);
+
+ kfree(query->conv_pr);
+ kfree(query);
}
/**
@@ -1346,11 +1586,11 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
*/
int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec,
+ struct sa_path_rec *rec,
ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
- struct ib_sa_path_rec *resp,
+ struct sa_path_rec *resp,
void *context),
void *context,
struct ib_sa_query **sa_query)
@@ -1360,11 +1600,16 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_sa_port *port;
struct ib_mad_agent *agent;
struct ib_sa_mad *mad;
+ enum opa_pr_supported status;
int ret;
if (!sa_dev)
return -ENODEV;
+ if ((rec->rec_type != SA_PATH_REC_TYPE_IB) &&
+ (rec->rec_type != SA_PATH_REC_TYPE_OPA))
+ return -EINVAL;
+
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
@@ -1373,9 +1618,26 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
return -ENOMEM;
query->sa_query.port = port;
+ if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
+ status = opa_pr_query_possible(client, device, port_num, rec);
+ if (status == PR_NOT_SUPPORTED) {
+ ret = -EINVAL;
+ goto err1;
+ } else if (status == PR_OPA_SUPPORTED) {
+ query->sa_query.flags |= IB_SA_QUERY_OPA;
+ } else {
+ query->conv_pr =
+ kmalloc(sizeof(*query->conv_pr), gfp_mask);
+ if (!query->conv_pr) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+ }
+ }
+
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
- goto err1;
+ goto err2;
ib_sa_client_get(client);
query->sa_query.client = client;
@@ -1383,7 +1645,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
query->sa_query.release = ib_sa_path_rec_release;
@@ -1391,24 +1653,36 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
mad->sa_hdr.comp_mask = comp_mask;
- ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
+ if (query->sa_query.flags & IB_SA_QUERY_OPA) {
+ ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
+ rec, mad->data);
+ } else if (query->conv_pr) {
+ sa_convert_path_opa_to_ib(query->conv_pr, rec);
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ query->conv_pr, mad->data);
+ } else {
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ rec, mad->data);
+ }
*sa_query = &query->sa_query;
query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE;
- query->sa_query.mad_buf->context[1] = rec;
+ query->sa_query.mad_buf->context[1] = (query->conv_pr) ?
+ query->conv_pr : rec;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
- goto err2;
+ goto err3;
return ret;
-err2:
+err3:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
-
+err2:
+ kfree(query->conv_pr);
err1:
kfree(query);
return ret;
@@ -1508,7 +1782,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
query->sa_query.release = ib_sa_service_rec_release;
@@ -1600,7 +1874,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
query->sa_query.release = ib_sa_mcmember_rec_release;
@@ -1697,7 +1971,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
query->sa_query.release = ib_sa_guidinfo_rec_release;
@@ -1728,7 +2002,42 @@ err1:
}
EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
-/* Support get SA ClassPortInfo */
+bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
+ struct ib_device *device,
+ u8 port_num)
+{
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ bool ret = false;
+ unsigned long flags;
+
+ if (!sa_dev)
+ return ret;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if ((port->classport_info.valid) &&
+ (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_IB))
+ ret = ib_get_cpi_capmask2(&port->classport_info.data.ib)
+ & IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT;
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(ib_sa_sendonly_fullmem_support);
+
+struct ib_classport_info_context {
+ struct completion done;
+ struct ib_sa_query *sa_query;
+};
+
+static void ib_classportinfo_cb(void *context)
+{
+ struct ib_classport_info_context *cb_ctx = context;
+
+ complete(&cb_ctx->done);
+}
+
static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
@@ -1736,91 +2045,91 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
unsigned long flags;
struct ib_sa_classport_info_query *query =
container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
+ struct ib_sa_classport_cache *info = &sa_query->port->classport_info;
if (mad) {
- struct ib_class_port_info rec;
+ if (sa_query->flags & IB_SA_QUERY_OPA) {
+ struct opa_class_port_info rec;
- ib_unpack(classport_info_rec_table,
- ARRAY_SIZE(classport_info_rec_table),
- mad->data, &rec);
+ ib_unpack(opa_classport_info_rec_table,
+ ARRAY_SIZE(opa_classport_info_rec_table),
+ mad->data, &rec);
- spin_lock_irqsave(&sa_query->port->classport_lock, flags);
- if (!status && !sa_query->port->classport_info.valid) {
- memcpy(&sa_query->port->classport_info.data, &rec,
- sizeof(sa_query->port->classport_info.data));
+ spin_lock_irqsave(&sa_query->port->classport_lock,
+ flags);
+ if (!status && !info->valid) {
+ memcpy(&info->data.opa, &rec,
+ sizeof(info->data.opa));
- sa_query->port->classport_info.valid = true;
- }
- spin_unlock_irqrestore(&sa_query->port->classport_lock, flags);
+ info->valid = true;
+ info->data.type = RDMA_CLASS_PORT_INFO_OPA;
+ }
+ spin_unlock_irqrestore(&sa_query->port->classport_lock,
+ flags);
- query->callback(status, &rec, query->context);
- } else {
- query->callback(status, NULL, query->context);
+ } else {
+ struct ib_class_port_info rec;
+
+ ib_unpack(ib_classport_info_rec_table,
+ ARRAY_SIZE(ib_classport_info_rec_table),
+ mad->data, &rec);
+
+ spin_lock_irqsave(&sa_query->port->classport_lock,
+ flags);
+ if (!status && !info->valid) {
+ memcpy(&info->data.ib, &rec,
+ sizeof(info->data.ib));
+
+ info->valid = true;
+ info->data.type = RDMA_CLASS_PORT_INFO_IB;
+ }
+ spin_unlock_irqrestore(&sa_query->port->classport_lock,
+ flags);
+ }
}
+ query->callback(query->context);
}
-static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query)
+static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
{
kfree(container_of(sa_query, struct ib_sa_classport_info_query,
sa_query));
}
-int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_class_port_info *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query)
+static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
+ int timeout_ms,
+ void (*callback)(void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
{
- struct ib_sa_classport_info_query *query;
- struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port;
struct ib_mad_agent *agent;
+ struct ib_sa_classport_info_query *query;
struct ib_sa_mad *mad;
- struct ib_class_port_info cached_class_port_info;
+ gfp_t gfp_mask = GFP_KERNEL;
int ret;
- unsigned long flags;
- if (!sa_dev)
- return -ENODEV;
-
- port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
- /* Use cached ClassPortInfo attribute if valid instead of sending mad */
- spin_lock_irqsave(&port->classport_lock, flags);
- if (port->classport_info.valid && callback) {
- memcpy(&cached_class_port_info, &port->classport_info.data,
- sizeof(cached_class_port_info));
- spin_unlock_irqrestore(&port->classport_lock, flags);
- callback(0, &cached_class_port_info, context);
- return 0;
- }
- spin_unlock_irqrestore(&port->classport_lock, flags);
-
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
query->sa_query.port = port;
+ query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device,
+ port->port_num) ?
+ IB_SA_QUERY_OPA : 0;
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
- goto err1;
+ goto err_free;
- ib_sa_client_get(client);
- query->sa_query.client = client;
- query->callback = callback;
- query->context = context;
+ query->callback = callback;
+ query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
- query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL;
-
- query->sa_query.release = ib_sa_portclass_info_rec_release;
- /* support GET only */
+ query->sa_query.callback = ib_sa_classport_info_rec_callback;
+ query->sa_query.release = ib_sa_classport_info_rec_release;
mad->mad_hdr.method = IB_MGMT_METHOD_GET;
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
mad->sa_hdr.comp_mask = 0;
@@ -1828,20 +2137,71 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
- goto err2;
+ goto err_free_mad;
return ret;
-err2:
+err_free_mad:
*sa_query = NULL;
- ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
-err1:
+err_free:
kfree(query);
return ret;
}
-EXPORT_SYMBOL(ib_sa_classport_info_rec_query);
+
+static void update_ib_cpi(struct work_struct *work)
+{
+ struct ib_sa_port *port =
+ container_of(work, struct ib_sa_port, ib_cpi_work.work);
+ struct ib_classport_info_context *cb_context;
+ unsigned long flags;
+ int ret;
+
+ /* If the classport info is valid, nothing
+ * to do here.
+ */
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if (port->classport_info.valid) {
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+
+ cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL);
+ if (!cb_context)
+ goto err_nomem;
+
+ init_completion(&cb_context->done);
+
+ ret = ib_sa_classport_info_rec_query(port, 3000,
+ ib_classportinfo_cb, cb_context,
+ &cb_context->sa_query);
+ if (ret < 0)
+ goto free_cb_err;
+ wait_for_completion(&cb_context->done);
+free_cb_err:
+ kfree(cb_context);
+ spin_lock_irqsave(&port->classport_lock, flags);
+
+ /* If the classport info is still not valid, the query should have
+ * failed for some reason. Retry issuing the query
+ */
+ if (!port->classport_info.valid) {
+ port->classport_info.retry_cnt++;
+ if (port->classport_info.retry_cnt <=
+ IB_SA_CPI_MAX_RETRY_CNT) {
+ unsigned long delay =
+ msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
+
+ queue_delayed_work(ib_wq, &port->ib_cpi_work, delay);
+ }
+ }
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+
+err_nomem:
+ return;
+}
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
@@ -1870,7 +2230,8 @@ static void send_handler(struct ib_mad_agent *agent,
spin_unlock_irqrestore(&idr_lock, flags);
free_mad(query);
- ib_sa_client_put(query->client);
+ if (query->client)
+ ib_sa_client_put(query->client);
query->release(query);
}
@@ -1897,6 +2258,102 @@ static void recv_handler(struct ib_mad_agent *mad_agent,
ib_free_recv_mad(mad_recv_wc);
}
+static void update_sm_ah(struct work_struct *work)
+{
+ struct ib_sa_port *port =
+ container_of(work, struct ib_sa_port, update_task);
+ struct ib_sa_sm_ah *new_ah;
+ struct ib_port_attr port_attr;
+ struct rdma_ah_attr ah_attr;
+
+ if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
+ pr_warn("Couldn't query port\n");
+ return;
+ }
+
+ new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL);
+ if (!new_ah)
+ return;
+
+ kref_init(&new_ah->ref);
+ new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
+
+ new_ah->pkey_index = 0;
+ if (ib_find_pkey(port->agent->device, port->port_num,
+ IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
+ pr_err("Couldn't find index for default PKey\n");
+
+ memset(&ah_attr, 0, sizeof(ah_attr));
+ ah_attr.type = rdma_ah_find_type(port->agent->device,
+ port->port_num);
+ rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
+ rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
+ rdma_ah_set_port_num(&ah_attr, port->port_num);
+ if (port_attr.grh_required) {
+ rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
+
+ rdma_ah_set_subnet_prefix(&ah_attr,
+ cpu_to_be64(port_attr.subnet_prefix));
+ rdma_ah_set_interface_id(&ah_attr,
+ cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
+ }
+
+ new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr);
+ if (IS_ERR(new_ah->ah)) {
+ pr_warn("Couldn't create new SM AH\n");
+ kfree(new_ah);
+ return;
+ }
+
+ spin_lock_irq(&port->ah_lock);
+ if (port->sm_ah)
+ kref_put(&port->sm_ah->ref, free_sm_ah);
+ port->sm_ah = new_ah;
+ spin_unlock_irq(&port->ah_lock);
+}
+
+static void ib_sa_event(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ if (event->event == IB_EVENT_PORT_ERR ||
+ event->event == IB_EVENT_PORT_ACTIVE ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_SM_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER) {
+ unsigned long flags;
+ struct ib_sa_device *sa_dev =
+ container_of(handler, typeof(*sa_dev), event_handler);
+ u8 port_num = event->element.port_num - sa_dev->start_port;
+ struct ib_sa_port *port = &sa_dev->port[port_num];
+
+ if (!rdma_cap_ib_sa(handler->device, port->port_num))
+ return;
+
+ spin_lock_irqsave(&port->ah_lock, flags);
+ if (port->sm_ah)
+ kref_put(&port->sm_ah->ref, free_sm_ah);
+ port->sm_ah = NULL;
+ spin_unlock_irqrestore(&port->ah_lock, flags);
+
+ if (event->event == IB_EVENT_SM_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PORT_ACTIVE) {
+ unsigned long delay =
+ msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
+
+ spin_lock_irqsave(&port->classport_lock, flags);
+ port->classport_info.valid = false;
+ port->classport_info.retry_cnt = 0;
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ queue_delayed_work(ib_wq,
+ &port->ib_cpi_work, delay);
+ }
+ queue_work(ib_wq, &sa_dev->port[port_num].update_task);
+ }
+}
+
static void ib_sa_add_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev;
@@ -1934,6 +2391,8 @@ static void ib_sa_add_one(struct ib_device *device)
goto err;
INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
+ INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
+ update_ib_cpi);
count++;
}
@@ -1980,11 +2439,11 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data)
return;
ib_unregister_event_handler(&sa_dev->event_handler);
-
flush_workqueue(ib_wq);
for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
if (rdma_cap_ib_sa(device, i + 1)) {
+ cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work);
ib_unregister_mad_agent(sa_dev->port[i].agent);
if (sa_dev->port[i].sm_ah)
kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
new file mode 100644
index 000000000000..70ad19c4c73e
--- /dev/null
+++ b/drivers/infiniband/core/security.c
@@ -0,0 +1,709 @@
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifdef CONFIG_SECURITY_INFINIBAND
+
+#include <linux/security.h>
+#include <linux/completion.h>
+#include <linux/list.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
+#include "core_priv.h"
+#include "mad_priv.h"
+
+static struct pkey_index_qp_list *get_pkey_idx_qp_list(struct ib_port_pkey *pp)
+{
+ struct pkey_index_qp_list *pkey = NULL;
+ struct pkey_index_qp_list *tmp_pkey;
+ struct ib_device *dev = pp->sec->dev;
+
+ spin_lock(&dev->port_pkey_list[pp->port_num].list_lock);
+ list_for_each_entry(tmp_pkey,
+ &dev->port_pkey_list[pp->port_num].pkey_list,
+ pkey_index_list) {
+ if (tmp_pkey->pkey_index == pp->pkey_index) {
+ pkey = tmp_pkey;
+ break;
+ }
+ }
+ spin_unlock(&dev->port_pkey_list[pp->port_num].list_lock);
+ return pkey;
+}
+
+static int get_pkey_and_subnet_prefix(struct ib_port_pkey *pp,
+ u16 *pkey,
+ u64 *subnet_prefix)
+{
+ struct ib_device *dev = pp->sec->dev;
+ int ret;
+
+ ret = ib_get_cached_pkey(dev, pp->port_num, pp->pkey_index, pkey);
+ if (ret)
+ return ret;
+
+ ret = ib_get_cached_subnet_prefix(dev, pp->port_num, subnet_prefix);
+
+ return ret;
+}
+
+static int enforce_qp_pkey_security(u16 pkey,
+ u64 subnet_prefix,
+ struct ib_qp_security *qp_sec)
+{
+ struct ib_qp_security *shared_qp_sec;
+ int ret;
+
+ ret = security_ib_pkey_access(qp_sec->security, subnet_prefix, pkey);
+ if (ret)
+ return ret;
+
+ if (qp_sec->qp == qp_sec->qp->real_qp) {
+ list_for_each_entry(shared_qp_sec,
+ &qp_sec->shared_qp_list,
+ shared_qp_list) {
+ ret = security_ib_pkey_access(shared_qp_sec->security,
+ subnet_prefix,
+ pkey);
+ if (ret)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/* The caller of this function must hold the QP security
+ * mutex of the QP of the security structure in *pps.
+ *
+ * It takes separate ports_pkeys and security structure
+ * because in some cases the pps will be for a new settings
+ * or the pps will be for the real QP and security structure
+ * will be for a shared QP.
+ */
+static int check_qp_port_pkey_settings(struct ib_ports_pkeys *pps,
+ struct ib_qp_security *sec)
+{
+ u64 subnet_prefix;
+ u16 pkey;
+ int ret = 0;
+
+ if (!pps)
+ return 0;
+
+ if (pps->main.state != IB_PORT_PKEY_NOT_VALID) {
+ ret = get_pkey_and_subnet_prefix(&pps->main,
+ &pkey,
+ &subnet_prefix);
+ if (ret)
+ return ret;
+
+ ret = enforce_qp_pkey_security(pkey,
+ subnet_prefix,
+ sec);
+ if (ret)
+ return ret;
+ }
+
+ if (pps->alt.state != IB_PORT_PKEY_NOT_VALID) {
+ ret = get_pkey_and_subnet_prefix(&pps->alt,
+ &pkey,
+ &subnet_prefix);
+ if (ret)
+ return ret;
+
+ ret = enforce_qp_pkey_security(pkey,
+ subnet_prefix,
+ sec);
+ }
+
+ return ret;
+}
+
+/* The caller of this function must hold the QP security
+ * mutex.
+ */
+static void qp_to_error(struct ib_qp_security *sec)
+{
+ struct ib_qp_security *shared_qp_sec;
+ struct ib_qp_attr attr = {
+ .qp_state = IB_QPS_ERR
+ };
+ struct ib_event event = {
+ .event = IB_EVENT_QP_FATAL
+ };
+
+ /* If the QP is in the process of being destroyed
+ * the qp pointer in the security structure is
+ * undefined. It cannot be modified now.
+ */
+ if (sec->destroying)
+ return;
+
+ ib_modify_qp(sec->qp,
+ &attr,
+ IB_QP_STATE);
+
+ if (sec->qp->event_handler && sec->qp->qp_context) {
+ event.element.qp = sec->qp;
+ sec->qp->event_handler(&event,
+ sec->qp->qp_context);
+ }
+
+ list_for_each_entry(shared_qp_sec,
+ &sec->shared_qp_list,
+ shared_qp_list) {
+ struct ib_qp *qp = shared_qp_sec->qp;
+
+ if (qp->event_handler && qp->qp_context) {
+ event.element.qp = qp;
+ event.device = qp->device;
+ qp->event_handler(&event,
+ qp->qp_context);
+ }
+ }
+}
+
+static inline void check_pkey_qps(struct pkey_index_qp_list *pkey,
+ struct ib_device *device,
+ u8 port_num,
+ u64 subnet_prefix)
+{
+ struct ib_port_pkey *pp, *tmp_pp;
+ bool comp;
+ LIST_HEAD(to_error_list);
+ u16 pkey_val;
+
+ if (!ib_get_cached_pkey(device,
+ port_num,
+ pkey->pkey_index,
+ &pkey_val)) {
+ spin_lock(&pkey->qp_list_lock);
+ list_for_each_entry(pp, &pkey->qp_list, qp_list) {
+ if (atomic_read(&pp->sec->error_list_count))
+ continue;
+
+ if (enforce_qp_pkey_security(pkey_val,
+ subnet_prefix,
+ pp->sec)) {
+ atomic_inc(&pp->sec->error_list_count);
+ list_add(&pp->to_error_list,
+ &to_error_list);
+ }
+ }
+ spin_unlock(&pkey->qp_list_lock);
+ }
+
+ list_for_each_entry_safe(pp,
+ tmp_pp,
+ &to_error_list,
+ to_error_list) {
+ mutex_lock(&pp->sec->mutex);
+ qp_to_error(pp->sec);
+ list_del(&pp->to_error_list);
+ atomic_dec(&pp->sec->error_list_count);
+ comp = pp->sec->destroying;
+ mutex_unlock(&pp->sec->mutex);
+
+ if (comp)
+ complete(&pp->sec->error_complete);
+ }
+}
+
+/* The caller of this function must hold the QP security
+ * mutex.
+ */
+static int port_pkey_list_insert(struct ib_port_pkey *pp)
+{
+ struct pkey_index_qp_list *tmp_pkey;
+ struct pkey_index_qp_list *pkey;
+ struct ib_device *dev;
+ u8 port_num = pp->port_num;
+ int ret = 0;
+
+ if (pp->state != IB_PORT_PKEY_VALID)
+ return 0;
+
+ dev = pp->sec->dev;
+
+ pkey = get_pkey_idx_qp_list(pp);
+
+ if (!pkey) {
+ bool found = false;
+
+ pkey = kzalloc(sizeof(*pkey), GFP_KERNEL);
+ if (!pkey)
+ return -ENOMEM;
+
+ spin_lock(&dev->port_pkey_list[port_num].list_lock);
+ /* Check for the PKey again. A racing process may
+ * have created it.
+ */
+ list_for_each_entry(tmp_pkey,
+ &dev->port_pkey_list[port_num].pkey_list,
+ pkey_index_list) {
+ if (tmp_pkey->pkey_index == pp->pkey_index) {
+ kfree(pkey);
+ pkey = tmp_pkey;
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ pkey->pkey_index = pp->pkey_index;
+ spin_lock_init(&pkey->qp_list_lock);
+ INIT_LIST_HEAD(&pkey->qp_list);
+ list_add(&pkey->pkey_index_list,
+ &dev->port_pkey_list[port_num].pkey_list);
+ }
+ spin_unlock(&dev->port_pkey_list[port_num].list_lock);
+ }
+
+ spin_lock(&pkey->qp_list_lock);
+ list_add(&pp->qp_list, &pkey->qp_list);
+ spin_unlock(&pkey->qp_list_lock);
+
+ pp->state = IB_PORT_PKEY_LISTED;
+
+ return ret;
+}
+
+/* The caller of this function must hold the QP security
+ * mutex.
+ */
+static void port_pkey_list_remove(struct ib_port_pkey *pp)
+{
+ struct pkey_index_qp_list *pkey;
+
+ if (pp->state != IB_PORT_PKEY_LISTED)
+ return;
+
+ pkey = get_pkey_idx_qp_list(pp);
+
+ spin_lock(&pkey->qp_list_lock);
+ list_del(&pp->qp_list);
+ spin_unlock(&pkey->qp_list_lock);
+
+ /* The setting may still be valid, i.e. after
+ * a destroy has failed for example.
+ */
+ pp->state = IB_PORT_PKEY_VALID;
+}
+
+static void destroy_qp_security(struct ib_qp_security *sec)
+{
+ security_ib_free_security(sec->security);
+ kfree(sec->ports_pkeys);
+ kfree(sec);
+}
+
+/* The caller of this function must hold the QP security
+ * mutex.
+ */
+static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp,
+ const struct ib_qp_attr *qp_attr,
+ int qp_attr_mask)
+{
+ struct ib_ports_pkeys *new_pps;
+ struct ib_ports_pkeys *qp_pps = qp->qp_sec->ports_pkeys;
+
+ new_pps = kzalloc(sizeof(*new_pps), GFP_KERNEL);
+ if (!new_pps)
+ return NULL;
+
+ if (qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) {
+ if (!qp_pps) {
+ new_pps->main.port_num = qp_attr->port_num;
+ new_pps->main.pkey_index = qp_attr->pkey_index;
+ } else {
+ new_pps->main.port_num = (qp_attr_mask & IB_QP_PORT) ?
+ qp_attr->port_num :
+ qp_pps->main.port_num;
+
+ new_pps->main.pkey_index =
+ (qp_attr_mask & IB_QP_PKEY_INDEX) ?
+ qp_attr->pkey_index :
+ qp_pps->main.pkey_index;
+ }
+ new_pps->main.state = IB_PORT_PKEY_VALID;
+ } else if (qp_pps) {
+ new_pps->main.port_num = qp_pps->main.port_num;
+ new_pps->main.pkey_index = qp_pps->main.pkey_index;
+ if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)
+ new_pps->main.state = IB_PORT_PKEY_VALID;
+ }
+
+ if (qp_attr_mask & IB_QP_ALT_PATH) {
+ new_pps->alt.port_num = qp_attr->alt_port_num;
+ new_pps->alt.pkey_index = qp_attr->alt_pkey_index;
+ new_pps->alt.state = IB_PORT_PKEY_VALID;
+ } else if (qp_pps) {
+ new_pps->alt.port_num = qp_pps->alt.port_num;
+ new_pps->alt.pkey_index = qp_pps->alt.pkey_index;
+ if (qp_pps->alt.state != IB_PORT_PKEY_NOT_VALID)
+ new_pps->alt.state = IB_PORT_PKEY_VALID;
+ }
+
+ new_pps->main.sec = qp->qp_sec;
+ new_pps->alt.sec = qp->qp_sec;
+ return new_pps;
+}
+
+int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev)
+{
+ struct ib_qp *real_qp = qp->real_qp;
+ int ret;
+
+ ret = ib_create_qp_security(qp, dev);
+
+ if (ret)
+ return ret;
+
+ mutex_lock(&real_qp->qp_sec->mutex);
+ ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys,
+ qp->qp_sec);
+
+ if (ret)
+ goto ret;
+
+ if (qp != real_qp)
+ list_add(&qp->qp_sec->shared_qp_list,
+ &real_qp->qp_sec->shared_qp_list);
+ret:
+ mutex_unlock(&real_qp->qp_sec->mutex);
+ if (ret)
+ destroy_qp_security(qp->qp_sec);
+
+ return ret;
+}
+
+void ib_close_shared_qp_security(struct ib_qp_security *sec)
+{
+ struct ib_qp *real_qp = sec->qp->real_qp;
+
+ mutex_lock(&real_qp->qp_sec->mutex);
+ list_del(&sec->shared_qp_list);
+ mutex_unlock(&real_qp->qp_sec->mutex);
+
+ destroy_qp_security(sec);
+}
+
+int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
+{
+ int ret;
+
+ qp->qp_sec = kzalloc(sizeof(*qp->qp_sec), GFP_KERNEL);
+ if (!qp->qp_sec)
+ return -ENOMEM;
+
+ qp->qp_sec->qp = qp;
+ qp->qp_sec->dev = dev;
+ mutex_init(&qp->qp_sec->mutex);
+ INIT_LIST_HEAD(&qp->qp_sec->shared_qp_list);
+ atomic_set(&qp->qp_sec->error_list_count, 0);
+ init_completion(&qp->qp_sec->error_complete);
+ ret = security_ib_alloc_security(&qp->qp_sec->security);
+ if (ret)
+ kfree(qp->qp_sec);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_create_qp_security);
+
+void ib_destroy_qp_security_begin(struct ib_qp_security *sec)
+{
+ mutex_lock(&sec->mutex);
+
+ /* Remove the QP from the lists so it won't get added to
+ * a to_error_list during the destroy process.
+ */
+ if (sec->ports_pkeys) {
+ port_pkey_list_remove(&sec->ports_pkeys->main);
+ port_pkey_list_remove(&sec->ports_pkeys->alt);
+ }
+
+ /* If the QP is already in one or more of those lists
+ * the destroying flag will ensure the to error flow
+ * doesn't operate on an undefined QP.
+ */
+ sec->destroying = true;
+
+ /* Record the error list count to know how many completions
+ * to wait for.
+ */
+ sec->error_comps_pending = atomic_read(&sec->error_list_count);
+
+ mutex_unlock(&sec->mutex);
+}
+
+void ib_destroy_qp_security_abort(struct ib_qp_security *sec)
+{
+ int ret;
+ int i;
+
+ /* If a concurrent cache update is in progress this
+ * QP security could be marked for an error state
+ * transition. Wait for this to complete.
+ */
+ for (i = 0; i < sec->error_comps_pending; i++)
+ wait_for_completion(&sec->error_complete);
+
+ mutex_lock(&sec->mutex);
+ sec->destroying = false;
+
+ /* Restore the position in the lists and verify
+ * access is still allowed in case a cache update
+ * occurred while attempting to destroy.
+ *
+ * Because these setting were listed already
+ * and removed during ib_destroy_qp_security_begin
+ * we know the pkey_index_qp_list for the PKey
+ * already exists so port_pkey_list_insert won't fail.
+ */
+ if (sec->ports_pkeys) {
+ port_pkey_list_insert(&sec->ports_pkeys->main);
+ port_pkey_list_insert(&sec->ports_pkeys->alt);
+ }
+
+ ret = check_qp_port_pkey_settings(sec->ports_pkeys, sec);
+ if (ret)
+ qp_to_error(sec);
+
+ mutex_unlock(&sec->mutex);
+}
+
+void ib_destroy_qp_security_end(struct ib_qp_security *sec)
+{
+ int i;
+
+ /* If a concurrent cache update is occurring we must
+ * wait until this QP security structure is processed
+ * in the QP to error flow before destroying it because
+ * the to_error_list is in use.
+ */
+ for (i = 0; i < sec->error_comps_pending; i++)
+ wait_for_completion(&sec->error_complete);
+
+ destroy_qp_security(sec);
+}
+
+void ib_security_cache_change(struct ib_device *device,
+ u8 port_num,
+ u64 subnet_prefix)
+{
+ struct pkey_index_qp_list *pkey;
+
+ list_for_each_entry(pkey,
+ &device->port_pkey_list[port_num].pkey_list,
+ pkey_index_list) {
+ check_pkey_qps(pkey,
+ device,
+ port_num,
+ subnet_prefix);
+ }
+}
+
+void ib_security_destroy_port_pkey_list(struct ib_device *device)
+{
+ struct pkey_index_qp_list *pkey, *tmp_pkey;
+ int i;
+
+ for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
+ spin_lock(&device->port_pkey_list[i].list_lock);
+ list_for_each_entry_safe(pkey,
+ tmp_pkey,
+ &device->port_pkey_list[i].pkey_list,
+ pkey_index_list) {
+ list_del(&pkey->pkey_index_list);
+ kfree(pkey);
+ }
+ spin_unlock(&device->port_pkey_list[i].list_lock);
+ }
+}
+
+int ib_security_modify_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_udata *udata)
+{
+ int ret = 0;
+ struct ib_ports_pkeys *tmp_pps;
+ struct ib_ports_pkeys *new_pps;
+ bool special_qp = (qp->qp_type == IB_QPT_SMI ||
+ qp->qp_type == IB_QPT_GSI ||
+ qp->qp_type >= IB_QPT_RESERVED1);
+ bool pps_change = ((qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) ||
+ (qp_attr_mask & IB_QP_ALT_PATH));
+
+ if (pps_change && !special_qp) {
+ mutex_lock(&qp->qp_sec->mutex);
+ new_pps = get_new_pps(qp,
+ qp_attr,
+ qp_attr_mask);
+
+ /* Add this QP to the lists for the new port
+ * and pkey settings before checking for permission
+ * in case there is a concurrent cache update
+ * occurring. Walking the list for a cache change
+ * doesn't acquire the security mutex unless it's
+ * sending the QP to error.
+ */
+ ret = port_pkey_list_insert(&new_pps->main);
+
+ if (!ret)
+ ret = port_pkey_list_insert(&new_pps->alt);
+
+ if (!ret)
+ ret = check_qp_port_pkey_settings(new_pps,
+ qp->qp_sec);
+ }
+
+ if (!ret)
+ ret = qp->device->modify_qp(qp->real_qp,
+ qp_attr,
+ qp_attr_mask,
+ udata);
+
+ if (pps_change && !special_qp) {
+ /* Clean up the lists and free the appropriate
+ * ports_pkeys structure.
+ */
+ if (ret) {
+ tmp_pps = new_pps;
+ } else {
+ tmp_pps = qp->qp_sec->ports_pkeys;
+ qp->qp_sec->ports_pkeys = new_pps;
+ }
+
+ if (tmp_pps) {
+ port_pkey_list_remove(&tmp_pps->main);
+ port_pkey_list_remove(&tmp_pps->alt);
+ }
+ kfree(tmp_pps);
+ mutex_unlock(&qp->qp_sec->mutex);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(ib_security_modify_qp);
+
+int ib_security_pkey_access(struct ib_device *dev,
+ u8 port_num,
+ u16 pkey_index,
+ void *sec)
+{
+ u64 subnet_prefix;
+ u16 pkey;
+ int ret;
+
+ ret = ib_get_cached_pkey(dev, port_num, pkey_index, &pkey);
+ if (ret)
+ return ret;
+
+ ret = ib_get_cached_subnet_prefix(dev, port_num, &subnet_prefix);
+
+ if (ret)
+ return ret;
+
+ return security_ib_pkey_access(sec, subnet_prefix, pkey);
+}
+EXPORT_SYMBOL(ib_security_pkey_access);
+
+static int ib_mad_agent_security_change(struct notifier_block *nb,
+ unsigned long event,
+ void *data)
+{
+ struct ib_mad_agent *ag = container_of(nb, struct ib_mad_agent, lsm_nb);
+
+ if (event != LSM_POLICY_CHANGE)
+ return NOTIFY_DONE;
+
+ ag->smp_allowed = !security_ib_endport_manage_subnet(ag->security,
+ ag->device->name,
+ ag->port_num);
+
+ return NOTIFY_OK;
+}
+
+int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
+ enum ib_qp_type qp_type)
+{
+ int ret;
+
+ ret = security_ib_alloc_security(&agent->security);
+ if (ret)
+ return ret;
+
+ if (qp_type != IB_QPT_SMI)
+ return 0;
+
+ ret = security_ib_endport_manage_subnet(agent->security,
+ agent->device->name,
+ agent->port_num);
+ if (ret)
+ return ret;
+
+ agent->lsm_nb.notifier_call = ib_mad_agent_security_change;
+ ret = register_lsm_notifier(&agent->lsm_nb);
+ if (ret)
+ return ret;
+
+ agent->smp_allowed = true;
+ agent->lsm_nb_reg = true;
+ return 0;
+}
+
+void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent)
+{
+ security_ib_free_security(agent->security);
+ if (agent->lsm_nb_reg)
+ unregister_lsm_notifier(&agent->lsm_nb);
+}
+
+int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index)
+{
+ int ret;
+
+ if (map->agent.qp->qp_type == IB_QPT_SMI && !map->agent.smp_allowed)
+ return -EACCES;
+
+ ret = ib_security_pkey_access(map->agent.device,
+ map->agent.port_num,
+ pkey_index,
+ map->agent.security);
+
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+#endif /* CONFIG_SECURITY_INFINIBAND */
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index c1fb545e8d78..7ebe1ef23652 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -253,6 +253,10 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
speed = " EDR";
rate = 250;
break;
+ case IB_SPEED_HDR:
+ speed = " HDR";
+ rate = 500;
+ break;
case IB_SPEED_SDR:
default: /* default to SDR for invalid rates */
rate = 25;
@@ -1258,7 +1262,7 @@ int ib_device_register_sysfs(struct ib_device *device,
int ret;
int i;
- device->dev.parent = device->dma_device;
+ WARN_ON_ONCE(!device->dev.parent);
ret = dev_set_name(class_dev, "%s", device->name);
if (ret)
return ret;
@@ -1301,7 +1305,7 @@ err_put:
free_port_list_attributes(device);
err_unregister:
- device_unregister(class_dev);
+ device_del(class_dev);
err:
return ret;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index e0a995b85a2d..112099c86a19 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -702,10 +702,10 @@ static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
return 0;
}
-static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src)
+static int ib_ucm_path_get(struct sa_path_rec **path, u64 src)
{
struct ib_user_path_rec upath;
- struct ib_sa_path_rec *sa_path;
+ struct sa_path_rec *sa_path;
*path = NULL;
@@ -962,7 +962,7 @@ static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file,
int in_len, int out_len)
{
struct ib_ucm_context *ctx;
- struct ib_sa_path_rec *path = NULL;
+ struct sa_path_rec *path = NULL;
struct ib_ucm_lap cmd;
const void *data = NULL;
int result;
@@ -1205,12 +1205,15 @@ static void ib_ucm_release_dev(struct device *dev)
struct ib_ucm_device *ucm_dev;
ucm_dev = container_of(dev, struct ib_ucm_device, dev);
- cdev_del(&ucm_dev->cdev);
+ kfree(ucm_dev);
+}
+
+static void ib_ucm_free_dev(struct ib_ucm_device *ucm_dev)
+{
if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
clear_bit(ucm_dev->devnum, dev_map);
else
clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map);
- kfree(ucm_dev);
}
static const struct file_operations ucm_fops = {
@@ -1266,7 +1269,9 @@ static void ib_ucm_add_one(struct ib_device *device)
if (!ucm_dev)
return;
+ device_initialize(&ucm_dev->dev);
ucm_dev->ib_dev = device;
+ ucm_dev->dev.release = ib_ucm_release_dev;
devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
if (devnum >= IB_UCM_MAX_DEVICES) {
@@ -1286,16 +1291,14 @@ static void ib_ucm_add_one(struct ib_device *device)
cdev_init(&ucm_dev->cdev, &ucm_fops);
ucm_dev->cdev.owner = THIS_MODULE;
kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum);
- if (cdev_add(&ucm_dev->cdev, base, 1))
- goto err;
ucm_dev->dev.class = &cm_class;
- ucm_dev->dev.parent = device->dma_device;
- ucm_dev->dev.devt = ucm_dev->cdev.dev;
- ucm_dev->dev.release = ib_ucm_release_dev;
+ ucm_dev->dev.parent = device->dev.parent;
+ ucm_dev->dev.devt = base;
+
dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum);
- if (device_register(&ucm_dev->dev))
- goto err_cdev;
+ if (cdev_device_add(&ucm_dev->cdev, &ucm_dev->dev))
+ goto err_devnum;
if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev))
goto err_dev;
@@ -1304,15 +1307,11 @@ static void ib_ucm_add_one(struct ib_device *device)
return;
err_dev:
- device_unregister(&ucm_dev->dev);
-err_cdev:
- cdev_del(&ucm_dev->cdev);
- if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
- clear_bit(devnum, dev_map);
- else
- clear_bit(devnum, overflow_map);
+ cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev);
+err_devnum:
+ ib_ucm_free_dev(ucm_dev);
err:
- kfree(ucm_dev);
+ put_device(&ucm_dev->dev);
return;
}
@@ -1323,7 +1322,9 @@ static void ib_ucm_remove_one(struct ib_device *device, void *client_data)
if (!ucm_dev)
return;
- device_unregister(&ucm_dev->dev);
+ cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev);
+ ib_ucm_free_dev(ucm_dev);
+ put_device(&ucm_dev->dev);
}
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index e12f8faf8c23..276f0ef835bd 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -898,11 +898,18 @@ static ssize_t ucma_query_path(struct ucma_context *ctx,
for (i = 0, out_len -= sizeof(*resp);
i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
i++, out_len -= sizeof(struct ib_path_rec_data)) {
+ struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i];
resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
IB_PATH_BIDIRECTIONAL;
- ib_sa_pack_path(&ctx->cm_id->route.path_rec[i],
- &resp->path_data[i].path_rec);
+ if (rec->rec_type == SA_PATH_REC_TYPE_IB) {
+ ib_sa_pack_path(rec, &resp->path_data[i].path_rec);
+ } else {
+ struct sa_path_rec ib;
+
+ sa_convert_path_opa_to_ib(&ib, rec);
+ ib_sa_pack_path(&ib, &resp->path_data[i].path_rec);
+ }
}
if (copy_to_user(response, resp,
@@ -1197,7 +1204,7 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
static int ucma_set_ib_path(struct ucma_context *ctx,
struct ib_path_rec_data *path_data, size_t optlen)
{
- struct ib_sa_path_rec sa_path;
+ struct sa_path_rec sa_path;
struct rdma_cm_event event;
int ret;
@@ -1215,8 +1222,17 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
memset(&sa_path, 0, sizeof(sa_path));
+ sa_path.rec_type = SA_PATH_REC_TYPE_IB;
ib_sa_unpack_path(path_data->path_rec, &sa_path);
- ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+
+ if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) {
+ struct sa_path_rec opa;
+
+ sa_convert_path_ib_to_opa(&opa, &sa_path);
+ ret = rdma_set_ib_paths(ctx->cm_id, &opa, 1);
+ } else {
+ ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+ }
if (ret)
return ret;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 4609b921f899..21e60b1e2ff4 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -34,7 +34,8 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
#include <linux/export.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
@@ -57,7 +58,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
page = sg_page(sg);
- if (umem->writable && dirty)
+ if (!PageDirty(page) && umem->writable && dirty)
set_page_dirty_lock(page);
put_page(page);
}
@@ -99,9 +100,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (dmasync)
dma_attrs |= DMA_ATTR_WRITE_BARRIER;
- if (!size)
- return ERR_PTR(-EINVAL);
-
/*
* If the combination of the addr and size requested for this memory
* region causes an integer overflow, return error.
@@ -117,11 +115,11 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (!umem)
return ERR_PTR(-ENOMEM);
- umem->context = context;
- umem->length = size;
- umem->address = addr;
- umem->page_size = PAGE_SIZE;
- umem->pid = get_task_pid(current, PIDTYPE_PID);
+ umem->context = context;
+ umem->length = size;
+ umem->address = addr;
+ umem->page_shift = PAGE_SHIFT;
+ umem->pid = get_task_pid(current, PIDTYPE_PID);
/*
* We ask for writable memory if any of the following
* access flags are set. "Local write" and "remote write"
@@ -135,7 +133,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (access & IB_ACCESS_ON_DEMAND) {
put_pid(umem->pid);
- ret = ib_umem_odp_get(context, umem);
+ ret = ib_umem_odp_get(context, umem, access);
if (ret) {
kfree(umem);
return ERR_PTR(ret);
@@ -317,7 +315,6 @@ EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
- int shift;
int i;
int n;
struct scatterlist *sg;
@@ -325,11 +322,9 @@ int ib_umem_page_count(struct ib_umem *umem)
if (umem->odp_data)
return ib_umem_num_pages(umem);
- shift = ilog2(umem->page_size);
-
n = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
- n += sg_dma_len(sg) >> shift;
+ n += sg_dma_len(sg) >> umem->page_shift;
return n;
}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 6b079a31dced..8c4ec564e495 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -32,10 +32,13 @@
#include <linux/types.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
#include <linux/pid.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/vmalloc.h>
+#include <linux/hugetlb.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
@@ -239,7 +242,73 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
.invalidate_range_end = ib_umem_notifier_invalidate_range_end,
};
-int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
+struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
+ unsigned long addr,
+ size_t size)
+{
+ struct ib_umem *umem;
+ struct ib_umem_odp *odp_data;
+ int pages = size >> PAGE_SHIFT;
+ int ret;
+
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+
+ umem->context = context;
+ umem->length = size;
+ umem->address = addr;
+ umem->page_shift = PAGE_SHIFT;
+ umem->writable = 1;
+
+ odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
+ if (!odp_data) {
+ ret = -ENOMEM;
+ goto out_umem;
+ }
+ odp_data->umem = umem;
+
+ mutex_init(&odp_data->umem_mutex);
+ init_completion(&odp_data->notifier_completion);
+
+ odp_data->page_list = vzalloc(pages * sizeof(*odp_data->page_list));
+ if (!odp_data->page_list) {
+ ret = -ENOMEM;
+ goto out_odp_data;
+ }
+
+ odp_data->dma_list = vzalloc(pages * sizeof(*odp_data->dma_list));
+ if (!odp_data->dma_list) {
+ ret = -ENOMEM;
+ goto out_page_list;
+ }
+
+ down_write(&context->umem_rwsem);
+ context->odp_mrs_count++;
+ rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
+ if (likely(!atomic_read(&context->notifier_count)))
+ odp_data->mn_counters_active = true;
+ else
+ list_add(&odp_data->no_private_counters,
+ &context->no_private_counters);
+ up_write(&context->umem_rwsem);
+
+ umem->odp_data = odp_data;
+
+ return umem;
+
+out_page_list:
+ vfree(odp_data->page_list);
+out_odp_data:
+ kfree(odp_data);
+out_umem:
+ kfree(umem);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_alloc_odp_umem);
+
+int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
+ int access)
{
int ret_val;
struct pid *our_pid;
@@ -248,6 +317,24 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
if (!mm)
return -EINVAL;
+ if (access & IB_ACCESS_HUGETLB) {
+ struct vm_area_struct *vma;
+ struct hstate *h;
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, ib_umem_start(umem));
+ if (!vma || !is_vm_hugetlb_page(vma)) {
+ up_read(&mm->mmap_sem);
+ return -EINVAL;
+ }
+ h = hstate_vma(vma);
+ umem->page_shift = huge_page_shift(h);
+ up_read(&mm->mmap_sem);
+ umem->hugetlb = 1;
+ } else {
+ umem->hugetlb = 0;
+ }
+
/* Prevent creating ODP MRs in child processes */
rcu_read_lock();
our_pid = get_task_pid(current->group_leader, PIDTYPE_PID);
@@ -258,7 +345,6 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
goto out_mm;
}
- umem->hugetlb = 0;
umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL);
if (!umem->odp_data) {
ret_val = -ENOMEM;
@@ -270,18 +356,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
init_completion(&umem->odp_data->notifier_completion);
- umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
+ if (ib_umem_num_pages(umem)) {
+ umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
sizeof(*umem->odp_data->page_list));
- if (!umem->odp_data->page_list) {
- ret_val = -ENOMEM;
- goto out_odp_data;
- }
+ if (!umem->odp_data->page_list) {
+ ret_val = -ENOMEM;
+ goto out_odp_data;
+ }
- umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
+ umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
sizeof(*umem->odp_data->dma_list));
- if (!umem->odp_data->dma_list) {
- ret_val = -ENOMEM;
- goto out_page_list;
+ if (!umem->odp_data->dma_list) {
+ ret_val = -ENOMEM;
+ goto out_page_list;
+ }
}
/*
@@ -435,7 +523,6 @@ out:
static int ib_umem_odp_map_dma_single_page(
struct ib_umem *umem,
int page_index,
- u64 base_virt_addr,
struct page *page,
u64 access_mask,
unsigned long current_seq)
@@ -458,7 +545,7 @@ static int ib_umem_odp_map_dma_single_page(
if (!(umem->odp_data->dma_list[page_index])) {
dma_addr = ib_dma_map_page(dev,
page,
- 0, PAGE_SIZE,
+ 0, BIT(umem->page_shift),
DMA_BIDIRECTIONAL);
if (ib_dma_mapping_error(dev, dma_addr)) {
ret = -EFAULT;
@@ -466,6 +553,7 @@ static int ib_umem_odp_map_dma_single_page(
}
umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
umem->odp_data->page_list[page_index] = page;
+ umem->npages++;
stored_page = 1;
} else if (umem->odp_data->page_list[page_index] == page) {
umem->odp_data->dma_list[page_index] |= access_mask;
@@ -485,8 +573,9 @@ out:
if (remove_existing_mapping && umem->context->invalidate_range) {
invalidate_page_trampoline(
umem,
- base_virt_addr + (page_index * PAGE_SIZE),
- base_virt_addr + ((page_index+1)*PAGE_SIZE),
+ ib_umem_start(umem) + (page_index >> umem->page_shift),
+ ib_umem_start(umem) + ((page_index + 1) >>
+ umem->page_shift),
NULL);
ret = -EAGAIN;
}
@@ -505,7 +594,8 @@ out:
* for failure.
* An -EAGAIN error code is returned when a concurrent mmu notifier prevents
* the function from completing its task.
- *
+ * An -ENOENT error code indicates that userspace process is being terminated
+ * and mm was already destroyed.
* @umem: the umem to map and pin
* @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
@@ -524,10 +614,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL;
struct page **local_page_list = NULL;
- u64 off;
- int j, k, ret = 0, start_idx, npages = 0;
- u64 base_virt_addr;
+ u64 page_mask, off;
+ int j, k, ret = 0, start_idx, npages = 0, page_shift;
unsigned int flags = 0;
+ phys_addr_t p = 0;
if (access_mask == 0)
return -EINVAL;
@@ -540,9 +630,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
if (!local_page_list)
return -ENOMEM;
- off = user_virt & (~PAGE_MASK);
- user_virt = user_virt & PAGE_MASK;
- base_virt_addr = user_virt;
+ page_shift = umem->page_shift;
+ page_mask = ~(BIT(page_shift) - 1);
+ off = user_virt & (~page_mask);
+ user_virt = user_virt & page_mask;
bcnt += off; /* Charge for the first page offset as well. */
owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
@@ -553,20 +644,20 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
owning_mm = get_task_mm(owning_process);
if (owning_mm == NULL) {
- ret = -EINVAL;
+ ret = -ENOENT;
goto out_put_task;
}
if (access_mask & ODP_WRITE_ALLOWED_BIT)
flags |= FOLL_WRITE;
- start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT;
+ start_idx = (user_virt - ib_umem_start(umem)) >> page_shift;
k = start_idx;
while (bcnt > 0) {
- const size_t gup_num_pages =
- min_t(size_t, ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
- PAGE_SIZE / sizeof(struct page *));
+ const size_t gup_num_pages = min_t(size_t,
+ (bcnt + BIT(page_shift) - 1) >> page_shift,
+ PAGE_SIZE / sizeof(struct page *));
down_read(&owning_mm->mmap_sem);
/*
@@ -585,14 +676,25 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
break;
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
- user_virt += npages << PAGE_SHIFT;
mutex_lock(&umem->odp_data->umem_mutex);
- for (j = 0; j < npages; ++j) {
+ for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
+ if (user_virt & ~page_mask) {
+ p += PAGE_SIZE;
+ if (page_to_phys(local_page_list[j]) != p) {
+ ret = -EFAULT;
+ break;
+ }
+ put_page(local_page_list[j]);
+ continue;
+ }
+
ret = ib_umem_odp_map_dma_single_page(
- umem, k, base_virt_addr, local_page_list[j],
- access_mask, current_seq);
+ umem, k, local_page_list[j],
+ access_mask, current_seq);
if (ret < 0)
break;
+
+ p = page_to_phys(local_page_list[j]);
k++;
}
mutex_unlock(&umem->odp_data->umem_mutex);
@@ -636,8 +738,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
* invalidations, so we must make sure we free each page only
* once. */
mutex_lock(&umem->odp_data->umem_mutex);
- for (addr = virt; addr < bound; addr += (u64)umem->page_size) {
- idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+ for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
+ idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
if (umem->odp_data->page_list[idx]) {
struct page *page = umem->odp_data->page_list[idx];
dma_addr_t dma = umem->odp_data->dma_list[idx];
@@ -665,6 +767,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
put_page(page);
umem->odp_data->page_list[idx] = NULL;
umem->odp_data->dma_list[idx] = 0;
+ umem->npages--;
}
}
mutex_unlock(&umem->odp_data->umem_mutex);
diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c
index 727d788448f5..d176597b4d78 100644
--- a/drivers/infiniband/core/umem_rbtree.c
+++ b/drivers/infiniband/core/umem_rbtree.c
@@ -78,17 +78,32 @@ int rbt_ib_umem_for_each_in_range(struct rb_root *root,
void *cookie)
{
int ret_val = 0;
- struct umem_odp_node *node;
+ struct umem_odp_node *node, *next;
struct ib_umem_odp *umem;
if (unlikely(start == last))
return ret_val;
- for (node = rbt_ib_umem_iter_first(root, start, last - 1); node;
- node = rbt_ib_umem_iter_next(node, start, last - 1)) {
+ for (node = rbt_ib_umem_iter_first(root, start, last - 1);
+ node; node = next) {
+ next = rbt_ib_umem_iter_next(node, start, last - 1);
umem = container_of(node, struct ib_umem_odp, interval_tree);
ret_val = cb(umem->umem, start, last, cookie) || ret_val;
}
return ret_val;
}
+EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
+
+struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
+ u64 addr, u64 length)
+{
+ struct umem_odp_node *node;
+
+ node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
+ if (node)
+ return container_of(node, struct ib_umem_odp, interval_tree);
+ return NULL;
+
+}
+EXPORT_SYMBOL(rbt_ib_umem_lookup);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 249b403b43a4..36a6f5c8914c 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -197,7 +197,7 @@ static void send_handler(struct ib_mad_agent *agent,
struct ib_umad_packet *packet = send_wc->send_buf->context[0];
dequeue_send(file, packet);
- ib_destroy_ah(packet->msg->ah);
+ rdma_destroy_ah(packet->msg->ah);
ib_free_send_mad(packet->msg);
if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
@@ -235,17 +235,19 @@ static void recv_handler(struct ib_mad_agent *agent,
packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index;
packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
if (packet->mad.hdr.grh_present) {
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
+ const struct ib_global_route *grh;
ib_init_ah_from_wc(agent->device, agent->port_num,
mad_recv_wc->wc, mad_recv_wc->recv_buf.grh,
&ah_attr);
- packet->mad.hdr.gid_index = ah_attr.grh.sgid_index;
- packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit;
- packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class;
- memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16);
- packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label);
+ grh = rdma_ah_read_grh(&ah_attr);
+ packet->mad.hdr.gid_index = grh->sgid_index;
+ packet->mad.hdr.hop_limit = grh->hop_limit;
+ packet->mad.hdr.traffic_class = grh->traffic_class;
+ memcpy(packet->mad.hdr.gid, &grh->dgid, 16);
+ packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label);
}
if (queue_packet(file, agent, packet))
@@ -449,7 +451,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
struct ib_umad_file *file = filp->private_data;
struct ib_umad_packet *packet;
struct ib_mad_agent *agent;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
struct ib_ah *ah;
struct ib_rmpp_mad *rmpp_mad;
__be64 *tid;
@@ -489,20 +491,22 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
}
memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid);
- ah_attr.sl = packet->mad.hdr.sl;
- ah_attr.src_path_bits = packet->mad.hdr.path_bits;
- ah_attr.port_num = file->port->port_num;
+ ah_attr.type = rdma_ah_find_type(file->port->ib_dev,
+ file->port->port_num);
+ rdma_ah_set_dlid(&ah_attr, be16_to_cpu(packet->mad.hdr.lid));
+ rdma_ah_set_sl(&ah_attr, packet->mad.hdr.sl);
+ rdma_ah_set_path_bits(&ah_attr, packet->mad.hdr.path_bits);
+ rdma_ah_set_port_num(&ah_attr, file->port->port_num);
if (packet->mad.hdr.grh_present) {
- ah_attr.ah_flags = IB_AH_GRH;
- memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
- ah_attr.grh.sgid_index = packet->mad.hdr.gid_index;
- ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
- ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
- ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
+ rdma_ah_set_grh(&ah_attr, NULL,
+ be32_to_cpu(packet->mad.hdr.flow_label),
+ packet->mad.hdr.gid_index,
+ packet->mad.hdr.hop_limit,
+ packet->mad.hdr.traffic_class);
+ rdma_ah_set_dgid_raw(&ah_attr, packet->mad.hdr.gid);
}
- ah = ib_create_ah(agent->qp->pd, &ah_attr);
+ ah = rdma_create_ah(agent->qp->pd, &ah_attr);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_up;
@@ -596,7 +600,7 @@ err_send:
err_msg:
ib_free_send_mad(packet->msg);
err_ah:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
err_up:
mutex_unlock(&file->mutex);
err:
@@ -1183,12 +1187,12 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
cdev_init(&port->cdev, &umad_fops);
port->cdev.owner = THIS_MODULE;
- port->cdev.kobj.parent = &umad_dev->kobj;
+ cdev_set_parent(&port->cdev, &umad_dev->kobj);
kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
if (cdev_add(&port->cdev, base, 1))
goto err_cdev;
- port->dev = device_create(umad_class, device->dma_device,
+ port->dev = device_create(umad_class, device->dev.parent,
port->cdev.dev, port,
"umad%d", port->dev_num);
if (IS_ERR(port->dev))
@@ -1202,12 +1206,12 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
base += IB_UMAD_MAX_PORTS;
cdev_init(&port->sm_cdev, &umad_sm_fops);
port->sm_cdev.owner = THIS_MODULE;
- port->sm_cdev.kobj.parent = &umad_dev->kobj;
+ cdev_set_parent(&port->sm_cdev, &umad_dev->kobj);
kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
if (cdev_add(&port->sm_cdev, base, 1))
goto err_sm_cdev;
- port->sm_dev = device_create(umad_class, device->dma_device,
+ port->sm_dev = device_create(umad_class, device->dev.parent,
port->sm_cdev.dev, port,
"issm%d", port->dev_num);
if (IS_ERR(port->sm_dev))
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 455034ac994e..64d494a64daf 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -76,12 +76,13 @@
* an asynchronous event queue file is created and released when the
* event file is closed.
*
- * struct ib_uverbs_event_file: One reference is held by the VFS and
- * released when the file is closed. For asynchronous event files,
- * another reference is held by the corresponding main context file
- * and released when that file is closed. For completion event files,
- * a reference is taken when a CQ is created that uses the file, and
- * released when the CQ is destroyed.
+ * struct ib_uverbs_event_queue: Base structure for
+ * struct ib_uverbs_async_event_file and struct ib_uverbs_completion_event_file.
+ * One reference is held by the VFS and released when the file is closed.
+ * For asynchronous event files, another reference is held by the corresponding
+ * main context file and released when that file is closed. For completion
+ * event files, a reference is taken when a CQ is created that uses the file,
+ * and released when the CQ is destroyed.
*/
struct ib_uverbs_device {
@@ -101,18 +102,26 @@ struct ib_uverbs_device {
struct list_head uverbs_events_file_list;
};
-struct ib_uverbs_event_file {
- struct kref ref;
- int is_async;
- struct ib_uverbs_file *uverbs_file;
+struct ib_uverbs_event_queue {
spinlock_t lock;
int is_closed;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
+};
+
+struct ib_uverbs_async_event_file {
+ struct ib_uverbs_event_queue ev_queue;
+ struct ib_uverbs_file *uverbs_file;
+ struct kref ref;
struct list_head list;
};
+struct ib_uverbs_completion_event_file {
+ struct ib_uobject_file uobj_file;
+ struct ib_uverbs_event_queue ev_queue;
+};
+
struct ib_uverbs_file {
struct kref ref;
struct mutex mutex;
@@ -120,9 +129,13 @@ struct ib_uverbs_file {
struct ib_uverbs_device *device;
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
- struct ib_uverbs_event_file *async_file;
+ struct ib_uverbs_async_event_file *async_file;
struct list_head list;
int is_closed;
+
+ struct idr idr;
+ /* spinlock protects write access to idr */
+ spinlock_t idr_lock;
};
struct ib_uverbs_event {
@@ -159,6 +172,8 @@ struct ib_usrq_object {
struct ib_uqp_object {
struct ib_uevent_object uevent;
+ /* lock for mcast list */
+ struct mutex mcast_lock;
struct list_head mcast_list;
struct ib_uxrcd_object *uxrcd;
};
@@ -176,32 +191,18 @@ struct ib_ucq_object {
u32 async_events_reported;
};
-extern spinlock_t ib_uverbs_idr_lock;
-extern struct idr ib_uverbs_pd_idr;
-extern struct idr ib_uverbs_mr_idr;
-extern struct idr ib_uverbs_mw_idr;
-extern struct idr ib_uverbs_ah_idr;
-extern struct idr ib_uverbs_cq_idr;
-extern struct idr ib_uverbs_qp_idr;
-extern struct idr ib_uverbs_srq_idr;
-extern struct idr ib_uverbs_xrcd_idr;
-extern struct idr ib_uverbs_rule_idr;
-extern struct idr ib_uverbs_wq_idr;
-extern struct idr ib_uverbs_rwq_ind_tbl_idr;
-
-void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
-
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev,
- int is_async);
+extern const struct file_operations uverbs_event_fops;
+void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
+struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
+ struct ib_device *ib_dev);
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_event_file *ev_file,
+ struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj);
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
struct ib_uevent_object *uobj);
+void ib_uverbs_release_file(struct kref *ref);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
@@ -210,9 +211,12 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
-void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
+int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd,
+ enum rdma_remove_reason why);
int uverbs_dealloc_mw(struct ib_mw *mw);
+void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj);
struct ib_uverbs_flow_spec {
union {
@@ -228,6 +232,8 @@ struct ib_uverbs_flow_spec {
struct ib_uverbs_flow_spec_ipv4 ipv4;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
+ struct ib_uverbs_flow_spec_action_tag flow_tag;
+ struct ib_uverbs_flow_spec_action_drop drop;
};
};
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 700782203483..2c98533a0203 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -40,270 +40,29 @@
#include <linux/uaccess.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+
#include "uverbs.h"
#include "core_priv.h"
-struct uverbs_lock_class {
- struct lock_class_key key;
- char name[16];
-};
-
-static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" };
-static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" };
-static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" };
-static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" };
-static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
-static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
-static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
-static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
-static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
-static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" };
-static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" };
-
-/*
- * The ib_uobject locking scheme is as follows:
- *
- * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
- * needs to be held during all idr write operations. When an object is
- * looked up, a reference must be taken on the object's kref before
- * dropping this lock. For read operations, the rcu_read_lock()
- * and rcu_write_lock() but similarly the kref reference is grabbed
- * before the rcu_read_unlock().
- *
- * - Each object also has an rwsem. This rwsem must be held for
- * reading while an operation that uses the object is performed.
- * For example, while registering an MR, the associated PD's
- * uobject.mutex must be held for reading. The rwsem must be held
- * for writing while initializing or destroying an object.
- *
- * - In addition, each object has a "live" flag. If this flag is not
- * set, then lookups of the object will fail even if it is found in
- * the idr. This handles a reader that blocks and does not acquire
- * the rwsem until after the object is destroyed. The destroy
- * operation will set the live flag to 0 and then drop the rwsem;
- * this will allow the reader to acquire the rwsem, see that the
- * live flag is 0, and then drop the rwsem and its reference to
- * object. The underlying storage will not be freed until the last
- * reference to the object is dropped.
- */
-
-static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
- struct ib_ucontext *context, struct uverbs_lock_class *c)
-{
- uobj->user_handle = user_handle;
- uobj->context = context;
- kref_init(&uobj->ref);
- init_rwsem(&uobj->mutex);
- lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name);
- uobj->live = 0;
-}
-
-static void release_uobj(struct kref *kref)
-{
- kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu);
-}
-
-static void put_uobj(struct ib_uobject *uobj)
-{
- kref_put(&uobj->ref, release_uobj);
-}
-
-static void put_uobj_read(struct ib_uobject *uobj)
-{
- up_read(&uobj->mutex);
- put_uobj(uobj);
-}
-
-static void put_uobj_write(struct ib_uobject *uobj)
-{
- up_write(&uobj->mutex);
- put_uobj(uobj);
-}
-
-static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
-{
- int ret;
-
- idr_preload(GFP_KERNEL);
- spin_lock(&ib_uverbs_idr_lock);
-
- ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT);
- if (ret >= 0)
- uobj->id = ret;
-
- spin_unlock(&ib_uverbs_idr_lock);
- idr_preload_end();
-
- return ret < 0 ? ret : 0;
-}
-
-void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
+static struct ib_uverbs_completion_event_file *
+ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
{
- spin_lock(&ib_uverbs_idr_lock);
- idr_remove(idr, uobj->id);
- spin_unlock(&ib_uverbs_idr_lock);
-}
+ struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel),
+ fd, context);
+ struct ib_uobject_file *uobj_file;
-static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
- struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
+ if (IS_ERR(uobj))
+ return (void *)uobj;
- rcu_read_lock();
- uobj = idr_find(idr, id);
- if (uobj) {
- if (uobj->context == context)
- kref_get(&uobj->ref);
- else
- uobj = NULL;
- }
- rcu_read_unlock();
+ uverbs_uobject_get(uobj);
+ uobj_put_read(uobj);
- return uobj;
-}
-
-static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
- struct ib_ucontext *context, int nested)
-{
- struct ib_uobject *uobj;
-
- uobj = __idr_get_uobj(idr, id, context);
- if (!uobj)
- return NULL;
-
- if (nested)
- down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
- else
- down_read(&uobj->mutex);
- if (!uobj->live) {
- put_uobj_read(uobj);
- return NULL;
- }
-
- return uobj;
-}
-
-static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
- struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
-
- uobj = __idr_get_uobj(idr, id, context);
- if (!uobj)
- return NULL;
-
- down_write(&uobj->mutex);
- if (!uobj->live) {
- put_uobj_write(uobj);
- return NULL;
- }
-
- return uobj;
-}
-
-static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
- int nested)
-{
- struct ib_uobject *uobj;
-
- uobj = idr_read_uobj(idr, id, context, nested);
- return uobj ? uobj->object : NULL;
-}
-
-static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
-}
-
-static void put_pd_read(struct ib_pd *pd)
-{
- put_uobj_read(pd->uobject);
-}
-
-static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
-{
- return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
-}
-
-static void put_cq_read(struct ib_cq *cq)
-{
- put_uobj_read(cq->uobject);
-}
-
-static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
-}
-
-static void put_ah_read(struct ib_ah *ah)
-{
- put_uobj_read(ah->uobject);
-}
-
-static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
-}
-
-static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0);
-}
-
-static void put_wq_read(struct ib_wq *wq)
-{
- put_uobj_read(wq->uobject);
-}
-
-static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle,
- struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0);
-}
-
-static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table)
-{
- put_uobj_read(ind_table->uobject);
-}
-
-static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
-
- uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context);
- return uobj ? uobj->object : NULL;
-}
-
-static void put_qp_read(struct ib_qp *qp)
-{
- put_uobj_read(qp->uobject);
-}
-
-static void put_qp_write(struct ib_qp *qp)
-{
- put_uobj_write(qp->uobject);
-}
-
-static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
-}
-
-static void put_srq_read(struct ib_srq *srq)
-{
- put_uobj_read(srq->uobject);
-}
-
-static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
- struct ib_uobject **uobj)
-{
- *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
- return *uobj ? (*uobj)->object : NULL;
-}
-
-static void put_xrcd_read(struct ib_uobject *uobj)
-{
- put_uobj_read(uobj);
+ uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
+ return container_of(uobj_file, struct ib_uverbs_completion_event_file,
+ uobj_file);
}
ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
@@ -316,6 +75,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
struct ib_udata udata;
struct ib_ucontext *ucontext;
struct file *filp;
+ struct ib_rdmacg_object cg_obj;
int ret;
if (out_len < sizeof resp)
@@ -335,24 +95,22 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
+ ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+ if (ret)
+ goto err;
+
ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(ucontext);
- goto err;
+ goto err_alloc;
}
ucontext->device = ib_dev;
- INIT_LIST_HEAD(&ucontext->pd_list);
- INIT_LIST_HEAD(&ucontext->mr_list);
- INIT_LIST_HEAD(&ucontext->mw_list);
- INIT_LIST_HEAD(&ucontext->cq_list);
- INIT_LIST_HEAD(&ucontext->qp_list);
- INIT_LIST_HEAD(&ucontext->srq_list);
- INIT_LIST_HEAD(&ucontext->ah_list);
- INIT_LIST_HEAD(&ucontext->wq_list);
- INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list);
- INIT_LIST_HEAD(&ucontext->xrcd_list);
- INIT_LIST_HEAD(&ucontext->rule_list);
+ ucontext->cg_obj = cg_obj;
+ /* ufile is required when some objects are released */
+ ucontext->ufile = file;
+ uverbs_initialize_ucontext(ucontext);
+
rcu_read_lock();
ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
rcu_read_unlock();
@@ -376,7 +134,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err_free;
resp.async_fd = ret;
- filp = ib_uverbs_alloc_event_file(file, ib_dev, 1);
+ filp = ib_uverbs_alloc_async_event_file(file, ib_dev);
if (IS_ERR(filp)) {
ret = PTR_ERR(filp);
goto err_fd;
@@ -407,6 +165,9 @@ err_free:
put_pid(ucontext->tgid);
ib_dev->dealloc_ucontext(ucontext);
+err_alloc:
+ ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+
err:
mutex_unlock(&file->mutex);
return ret;
@@ -556,12 +317,9 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(pd), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
if (IS_ERR(pd)) {
@@ -575,10 +333,6 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
atomic_set(&pd->usecnt, 0);
uobj->object = pd;
- ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj);
- if (ret)
- goto err_idr;
-
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
@@ -588,24 +342,15 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
goto err_copy;
}
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->pd_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
-
-err_idr:
ib_dealloc_pd(pd);
err:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -616,43 +361,19 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
{
struct ib_uverbs_dealloc_pd cmd;
struct ib_uobject *uobj;
- struct ib_pd *pd;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- pd = uobj->object;
-
- if (atomic_read(&pd->usecnt)) {
- ret = -EBUSY;
- goto err_put;
- }
-
- ret = pd->device->dealloc_pd(uobj->object);
- WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
- if (ret)
- goto err_put;
-
- uobj->live = 0;
- put_uobj_write(uobj);
+ uobj = uobj_get_write(uobj_get_type(pd), cmd.pd_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
+ ret = uobj_remove_commit(uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
-
-err_put:
- put_uobj_write(uobj);
- return ret;
+ return ret ?: in_len;
}
struct xrcd_table_entry {
@@ -789,16 +510,13 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
}
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj) {
- ret = -ENOMEM;
+ obj = (struct ib_uxrcd_object *)uobj_alloc(uobj_get_type(xrcd),
+ file->ucontext);
+ if (IS_ERR(obj)) {
+ ret = PTR_ERR(obj);
goto err_tree_mutex_unlock;
}
- init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class);
-
- down_write(&obj->uobject.mutex);
-
if (!xrcd) {
xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
if (IS_ERR(xrcd)) {
@@ -816,10 +534,6 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
atomic_set(&obj->refcnt, 0);
obj->uobject.object = xrcd;
- ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
- if (ret)
- goto err_idr;
-
memset(&resp, 0, sizeof resp);
resp.xrcd_handle = obj->uobject.id;
@@ -828,7 +542,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
/* create new inode/xrcd table entry */
ret = xrcd_table_insert(file->device, inode, xrcd);
if (ret)
- goto err_insert_xrcd;
+ goto err_dealloc_xrcd;
}
atomic_inc(&xrcd->usecnt);
}
@@ -842,12 +556,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
if (f.file)
fdput(f);
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
- mutex_unlock(&file->mutex);
-
- obj->uobject.live = 1;
- up_write(&obj->uobject.mutex);
+ uobj_alloc_commit(&obj->uobject);
mutex_unlock(&file->device->xrcd_tree_mutex);
return in_len;
@@ -859,14 +568,11 @@ err_copy:
atomic_dec(&xrcd->usecnt);
}
-err_insert_xrcd:
- idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
-
-err_idr:
+err_dealloc_xrcd:
ib_dealloc_xrcd(xrcd);
err:
- put_uobj_write(&obj->uobject);
+ uobj_alloc_abort(&obj->uobject);
err_tree_mutex_unlock:
if (f.file)
@@ -884,75 +590,41 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
{
struct ib_uverbs_close_xrcd cmd;
struct ib_uobject *uobj;
- struct ib_xrcd *xrcd = NULL;
- struct inode *inode = NULL;
- struct ib_uxrcd_object *obj;
- int live;
int ret = 0;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- mutex_lock(&file->device->xrcd_tree_mutex);
- uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
- if (!uobj) {
- ret = -EINVAL;
- goto out;
- }
-
- xrcd = uobj->object;
- inode = xrcd->inode;
- obj = container_of(uobj, struct ib_uxrcd_object, uobject);
- if (atomic_read(&obj->refcnt)) {
- put_uobj_write(uobj);
- ret = -EBUSY;
- goto out;
- }
-
- if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
- ret = ib_dealloc_xrcd(uobj->object);
- if (!ret)
- uobj->live = 0;
+ uobj = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle,
+ file->ucontext);
+ if (IS_ERR(uobj)) {
+ mutex_unlock(&file->device->xrcd_tree_mutex);
+ return PTR_ERR(uobj);
}
- live = uobj->live;
- if (inode && ret)
- atomic_inc(&xrcd->usecnt);
-
- put_uobj_write(uobj);
-
- if (ret)
- goto out;
-
- if (inode && !live)
- xrcd_table_delete(file->device, inode);
-
- idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
- ret = in_len;
-
-out:
- mutex_unlock(&file->device->xrcd_tree_mutex);
- return ret;
+ ret = uobj_remove_commit(uobj);
+ return ret ?: in_len;
}
-void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
- struct ib_xrcd *xrcd)
+int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+ struct ib_xrcd *xrcd,
+ enum rdma_remove_reason why)
{
struct inode *inode;
+ int ret;
inode = xrcd->inode;
if (inode && !atomic_dec_and_test(&xrcd->usecnt))
- return;
+ return 0;
- ib_dealloc_xrcd(xrcd);
+ ret = ib_dealloc_xrcd(xrcd);
- if (inode)
+ if (why == RDMA_REMOVE_DESTROY && ret)
+ atomic_inc(&xrcd->usecnt);
+ else if (inode)
xrcd_table_delete(dev, inode);
+
+ return ret;
}
ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
@@ -985,14 +657,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(mr), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -1020,9 +689,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
atomic_inc(&pd->usecnt);
uobj->object = mr;
- ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
- if (ret)
- goto err_unreg;
memset(&resp, 0, sizeof resp);
resp.lkey = mr->lkey;
@@ -1035,29 +701,20 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->mr_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
+ uobj_put_obj_read(pd);
- up_write(&uobj->mutex);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
-
-err_unreg:
ib_dereg_mr(mr);
err_put:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_free:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -1093,11 +750,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
(cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
return -EINVAL;
- uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
- file->ucontext);
-
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
mr = uobj->object;
@@ -1108,7 +764,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
}
if (cmd.flags & IB_MR_REREG_PD) {
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto put_uobjs;
@@ -1141,11 +797,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
put_uobj_pd:
if (cmd.flags & IB_MR_REREG_PD)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
put_uobjs:
-
- put_uobj_write(mr->uobject);
+ uobj_put_write(uobj);
return ret;
}
@@ -1156,37 +811,20 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_dereg_mr cmd;
- struct ib_mr *mr;
struct ib_uobject *uobj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- mr = uobj->object;
+ ret = uobj_remove_commit(uobj);
- ret = ib_dereg_mr(mr);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ return ret ?: in_len;
}
ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
@@ -1208,14 +846,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
+ uobj = uobj_alloc(uobj_get_type(mw), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- init_uobj(uobj, 0, file->ucontext, &mw_lock_class);
- down_write(&uobj->mutex);
-
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -1238,9 +873,6 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
atomic_inc(&pd->usecnt);
uobj->object = mw;
- ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj);
- if (ret)
- goto err_unalloc;
memset(&resp, 0, sizeof(resp));
resp.rkey = mw->rkey;
@@ -1252,29 +884,17 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->mw_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_put_obj_read(pd);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-
-err_unalloc:
uverbs_dealloc_mw(mw);
-
err_put:
- put_pd_read(pd);
-
+ uobj_put_obj_read(pd);
err_free:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -1284,37 +904,19 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_dealloc_mw cmd;
- struct ib_mw *mw;
struct ib_uobject *uobj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
-
- mw = uobj->object;
+ uobj = uobj_get_write(uobj_get_type(mw), cmd.mw_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- ret = uverbs_dealloc_mw(mw);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ ret = uobj_remove_commit(uobj);
+ return ret ?: in_len;
}
ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
@@ -1324,8 +926,8 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
{
struct ib_uverbs_create_comp_channel cmd;
struct ib_uverbs_create_comp_channel_resp resp;
- struct file *filp;
- int ret;
+ struct ib_uobject *uobj;
+ struct ib_uverbs_completion_event_file *ev_file;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -1333,25 +935,23 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- ret = get_unused_fd_flags(O_CLOEXEC);
- if (ret < 0)
- return ret;
- resp.fd = ret;
+ uobj = uobj_alloc(uobj_get_type(comp_channel), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- filp = ib_uverbs_alloc_event_file(file, ib_dev, 0);
- if (IS_ERR(filp)) {
- put_unused_fd(resp.fd);
- return PTR_ERR(filp);
- }
+ resp.fd = uobj->id;
+
+ ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj_file.uobj);
+ ib_uverbs_init_event_queue(&ev_file->ev_queue);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
- put_unused_fd(resp.fd);
- fput(filp);
+ uobj_alloc_abort(uobj);
return -EFAULT;
}
- fd_install(resp.fd, filp);
+ uobj_alloc_commit(uobj);
return in_len;
}
@@ -1369,7 +969,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
void *context)
{
struct ib_ucq_object *obj;
- struct ib_uverbs_event_file *ev_file = NULL;
+ struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_cq *cq;
int ret;
struct ib_uverbs_ex_create_cq_resp resp;
@@ -1378,21 +978,21 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd->comp_vector >= file->device->num_comp_vectors)
return ERR_PTR(-EINVAL);
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return ERR_PTR(-ENOMEM);
-
- init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class);
- down_write(&obj->uobject.mutex);
+ obj = (struct ib_ucq_object *)uobj_alloc(uobj_get_type(cq),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return obj;
if (cmd->comp_channel >= 0) {
- ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel);
- if (!ev_file) {
- ret = -EINVAL;
+ ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel,
+ file->ucontext);
+ if (IS_ERR(ev_file)) {
+ ret = PTR_ERR(ev_file);
goto err;
}
}
+ obj->uobject.user_handle = cmd->user_handle;
obj->uverbs_file = file;
obj->comp_events_reported = 0;
obj->async_events_reported = 0;
@@ -1405,8 +1005,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
attr.flags = cmd->flags;
- cq = ib_dev->create_cq(ib_dev, &attr,
- file->ucontext, uhw);
+ cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_file;
@@ -1416,14 +1015,10 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
cq->uobject = &obj->uobject;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
- cq->cq_context = ev_file;
+ cq->cq_context = &ev_file->ev_queue;
atomic_set(&cq->usecnt, 0);
obj->uobject.object = cq;
- ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject);
- if (ret)
- goto err_free;
-
memset(&resp, 0, sizeof resp);
resp.base.cq_handle = obj->uobject.id;
resp.base.cqe = cq->cqe;
@@ -1435,20 +1030,11 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (ret)
goto err_cb;
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->cq_list);
- mutex_unlock(&file->mutex);
-
- obj->uobject.live = 1;
-
- up_write(&obj->uobject.mutex);
+ uobj_alloc_commit(&obj->uobject);
return obj;
err_cb:
- idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
-
-err_free:
ib_destroy_cq(cq);
err_file:
@@ -1456,7 +1042,7 @@ err_file:
ib_uverbs_release_ucq(file, ev_file, obj);
err:
- put_uobj_write(&obj->uobject);
+ uobj_alloc_abort(&obj->uobject);
return ERR_PTR(ret);
}
@@ -1579,7 +1165,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1594,7 +1180,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
ret = -EFAULT;
out:
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
return ret ? ret : in_len;
}
@@ -1641,7 +1227,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1673,7 +1259,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
ret = in_len;
out_put:
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
return ret;
}
@@ -1688,14 +1274,14 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
ib_req_notify_cq(cq, cmd.solicited_only ?
IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
return in_len;
}
@@ -1710,42 +1296,36 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
struct ib_uobject *uobj;
struct ib_cq *cq;
struct ib_ucq_object *obj;
- struct ib_uverbs_event_file *ev_file;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(cq), cmd.cq_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
cq = uobj->object;
- ev_file = cq->cq_context;
obj = container_of(cq->uobject, struct ib_ucq_object, uobject);
- ret = ib_destroy_cq(cq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ memset(&resp, 0, sizeof(resp));
- if (ret)
+ ret = uobj_remove_commit(uobj);
+ if (ret) {
+ uverbs_uobject_put(uobj);
return ret;
+ }
- idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_ucq(file, ev_file, obj);
-
- memset(&resp, 0, sizeof resp);
resp.comp_events_reported = obj->comp_events_reported;
resp.async_events_reported = obj->async_events_reported;
- put_uobj(uobj);
-
+ uverbs_uobject_put(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
return -EFAULT;
@@ -1767,7 +1347,7 @@ static int create_qp(struct ib_uverbs_file *file,
struct ib_device *device;
struct ib_pd *pd = NULL;
struct ib_xrcd *xrcd = NULL;
- struct ib_uobject *uninitialized_var(xrcd_uobj);
+ struct ib_uobject *xrcd_uobj = ERR_PTR(-ENOENT);
struct ib_cq *scq = NULL, *rcq = NULL;
struct ib_srq *srq = NULL;
struct ib_qp *qp;
@@ -1781,18 +1361,20 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
- obj = kzalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+ obj->uxrcd = NULL;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
+ mutex_init(&obj->mcast_lock);
- init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
- &qp_lock_class);
- down_write(&obj->uevent.uobject.mutex);
if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
sizeof(cmd->rwq_ind_tbl_handle) &&
(cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
- ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle,
- file->ucontext);
+ ind_tbl = uobj_get_obj_read(rwq_ind_table,
+ cmd->rwq_ind_tbl_handle,
+ file->ucontext);
if (!ind_tbl) {
ret = -EINVAL;
goto err_put;
@@ -1816,8 +1398,15 @@ static int create_qp(struct ib_uverbs_file *file,
has_sq = false;
if (cmd->qp_type == IB_QPT_XRC_TGT) {
- xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
- &xrcd_uobj);
+ xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->pd_handle,
+ file->ucontext);
+
+ if (IS_ERR(xrcd_uobj)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
if (!xrcd) {
ret = -EINVAL;
goto err_put;
@@ -1829,8 +1418,8 @@ static int create_qp(struct ib_uverbs_file *file,
cmd->max_recv_sge = 0;
} else {
if (cmd->is_srq) {
- srq = idr_read_srq(cmd->srq_handle,
- file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd->srq_handle,
+ file->ucontext);
if (!srq || srq->srq_type != IB_SRQT_BASIC) {
ret = -EINVAL;
goto err_put;
@@ -1839,8 +1428,8 @@ static int create_qp(struct ib_uverbs_file *file,
if (!ind_tbl) {
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
- rcq = idr_read_cq(cmd->recv_cq_handle,
- file->ucontext, 0);
+ rcq = uobj_get_obj_read(cq, cmd->recv_cq_handle,
+ file->ucontext);
if (!rcq) {
ret = -EINVAL;
goto err_put;
@@ -1850,10 +1439,11 @@ static int create_qp(struct ib_uverbs_file *file,
}
if (has_sq)
- scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
+ scq = uobj_get_obj_read(cq, cmd->send_cq_handle,
+ file->ucontext);
if (!ind_tbl)
rcq = rcq ?: scq;
- pd = idr_read_pd(cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
@@ -1891,7 +1481,8 @@ static int create_qp(struct ib_uverbs_file *file,
IB_QP_CREATE_CROSS_CHANNEL |
IB_QP_CREATE_MANAGED_SEND |
IB_QP_CREATE_MANAGED_RECV |
- IB_QP_CREATE_SCATTER_FCS)) {
+ IB_QP_CREATE_SCATTER_FCS |
+ IB_QP_CREATE_CVLAN_STRIPPING)) {
ret = -EINVAL;
goto err_put;
}
@@ -1915,6 +1506,10 @@ static int create_qp(struct ib_uverbs_file *file,
}
if (cmd->qp_type != IB_QPT_XRC_TGT) {
+ ret = ib_create_qp_security(qp, device);
+ if (ret)
+ goto err_cb;
+
qp->real_qp = qp;
qp->device = device;
qp->pd = pd;
@@ -1939,9 +1534,6 @@ static int create_qp(struct ib_uverbs_file *file,
qp->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = qp;
- ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
memset(&resp, 0, sizeof resp);
resp.base.qpn = qp->qp_num;
@@ -1963,50 +1555,41 @@ static int create_qp(struct ib_uverbs_file *file,
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
uobject);
atomic_inc(&obj->uxrcd->refcnt);
- put_xrcd_read(xrcd_uobj);
+ uobj_put_read(xrcd_uobj);
}
if (pd)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
if (scq)
- put_cq_read(scq);
+ uobj_put_obj_read(scq);
if (rcq && rcq != scq)
- put_cq_read(rcq);
+ uobj_put_obj_read(rcq);
if (srq)
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ind_tbl)
- put_rwq_indirection_table_read(ind_tbl);
+ uobj_put_obj_read(ind_tbl);
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
-
- up_write(&obj->uevent.uobject.mutex);
+ uobj_alloc_commit(&obj->uevent.uobject);
return 0;
err_cb:
- idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
-
-err_destroy:
ib_destroy_qp(qp);
err_put:
- if (xrcd)
- put_xrcd_read(xrcd_uobj);
+ if (!IS_ERR(xrcd_uobj))
+ uobj_put_read(xrcd_uobj);
if (pd)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
if (scq)
- put_cq_read(scq);
+ uobj_put_obj_read(scq);
if (rcq && rcq != scq)
- put_cq_read(rcq);
+ uobj_put_obj_read(rcq);
if (srq)
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ind_tbl)
- put_rwq_indirection_table_read(ind_tbl);
+ uobj_put_obj_read(ind_tbl);
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return ret;
}
@@ -2142,17 +1725,22 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
- down_write(&obj->uevent.uobject.mutex);
+ xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd.pd_handle,
+ file->ucontext);
+ if (IS_ERR(xrcd_uobj)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
- xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
if (!xrcd) {
ret = -EINVAL;
- goto err_put;
+ goto err_xrcd;
}
attr.event_handler = ib_uverbs_qp_event_handler;
@@ -2167,15 +1755,11 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
qp = ib_open_qp(xrcd, &attr);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
- goto err_put;
+ goto err_xrcd;
}
- qp->uobject = &obj->uevent.uobject;
-
obj->uevent.uobject.object = qp;
- ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
+ obj->uevent.uobject.user_handle = cmd.user_handle;
memset(&resp, 0, sizeof resp);
resp.qpn = qp->qp_num;
@@ -2184,32 +1768,25 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_remove;
+ goto err_destroy;
}
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
- put_xrcd_read(xrcd_uobj);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
- mutex_unlock(&file->mutex);
+ qp->uobject = &obj->uevent.uobject;
+ uobj_put_read(xrcd_uobj);
- obj->uevent.uobject.live = 1;
- up_write(&obj->uevent.uobject.mutex);
+ uobj_alloc_commit(&obj->uevent.uobject);
return in_len;
-err_remove:
- idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
-
err_destroy:
ib_destroy_qp(qp);
-
+err_xrcd:
+ uobj_put_read(xrcd_uobj);
err_put:
- put_xrcd_read(xrcd_uobj);
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return ret;
}
@@ -2223,6 +1800,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
struct ib_qp *qp;
struct ib_qp_attr *attr;
struct ib_qp_init_attr *init_attr;
+ const struct ib_global_route *grh;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -2235,7 +1813,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
goto out;
}
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -2243,7 +1821,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
if (ret)
goto out;
@@ -2272,29 +1850,39 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
resp.alt_port_num = attr->alt_port_num;
resp.alt_timeout = attr->alt_timeout;
- memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
- resp.dest.flow_label = attr->ah_attr.grh.flow_label;
- resp.dest.sgid_index = attr->ah_attr.grh.sgid_index;
- resp.dest.hop_limit = attr->ah_attr.grh.hop_limit;
- resp.dest.traffic_class = attr->ah_attr.grh.traffic_class;
- resp.dest.dlid = attr->ah_attr.dlid;
- resp.dest.sl = attr->ah_attr.sl;
- resp.dest.src_path_bits = attr->ah_attr.src_path_bits;
- resp.dest.static_rate = attr->ah_attr.static_rate;
- resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
- resp.dest.port_num = attr->ah_attr.port_num;
-
- memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
- resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
- resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
- resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
- resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
- resp.alt_dest.dlid = attr->alt_ah_attr.dlid;
- resp.alt_dest.sl = attr->alt_ah_attr.sl;
- resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
- resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate;
- resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
- resp.alt_dest.port_num = attr->alt_ah_attr.port_num;
+ resp.dest.dlid = rdma_ah_get_dlid(&attr->ah_attr);
+ resp.dest.sl = rdma_ah_get_sl(&attr->ah_attr);
+ resp.dest.src_path_bits = rdma_ah_get_path_bits(&attr->ah_attr);
+ resp.dest.static_rate = rdma_ah_get_static_rate(&attr->ah_attr);
+ resp.dest.is_global = !!(rdma_ah_get_ah_flags(&attr->ah_attr) &
+ IB_AH_GRH);
+ if (resp.dest.is_global) {
+ grh = rdma_ah_read_grh(&attr->ah_attr);
+ memcpy(resp.dest.dgid, grh->dgid.raw, 16);
+ resp.dest.flow_label = grh->flow_label;
+ resp.dest.sgid_index = grh->sgid_index;
+ resp.dest.hop_limit = grh->hop_limit;
+ resp.dest.traffic_class = grh->traffic_class;
+ }
+ resp.dest.port_num = rdma_ah_get_port_num(&attr->ah_attr);
+
+ resp.alt_dest.dlid = rdma_ah_get_dlid(&attr->alt_ah_attr);
+ resp.alt_dest.sl = rdma_ah_get_sl(&attr->alt_ah_attr);
+ resp.alt_dest.src_path_bits = rdma_ah_get_path_bits(&attr->alt_ah_attr);
+ resp.alt_dest.static_rate
+ = rdma_ah_get_static_rate(&attr->alt_ah_attr);
+ resp.alt_dest.is_global
+ = !!(rdma_ah_get_ah_flags(&attr->alt_ah_attr) &
+ IB_AH_GRH);
+ if (resp.alt_dest.is_global) {
+ grh = rdma_ah_read_grh(&attr->alt_ah_attr);
+ memcpy(resp.alt_dest.dgid, grh->dgid.raw, 16);
+ resp.alt_dest.flow_label = grh->flow_label;
+ resp.alt_dest.sgid_index = grh->sgid_index;
+ resp.alt_dest.hop_limit = grh->hop_limit;
+ resp.alt_dest.traffic_class = grh->traffic_class;
+ }
+ resp.alt_dest.port_num = rdma_ah_get_port_num(&attr->alt_ah_attr);
resp.max_send_wr = init_attr->cap.max_send_wr;
resp.max_recv_wr = init_attr->cap.max_recv_wr;
@@ -2339,12 +1927,18 @@ static int modify_qp(struct ib_uverbs_file *file,
if (!attr)
return -ENOMEM;
- qp = idr_read_qp(cmd->base.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd->base.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
}
+ if ((cmd->base.attr_mask & IB_QP_PORT) &&
+ !rdma_is_port_valid(qp->device, cmd->base.port_num)) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+
attr->qp_state = cmd->base.qp_state;
attr->cur_qp_state = cmd->base.cur_qp_state;
attr->path_mtu = cmd->base.path_mtu;
@@ -2368,51 +1962,55 @@ static int modify_qp(struct ib_uverbs_file *file,
attr->alt_timeout = cmd->base.alt_timeout;
attr->rate_limit = cmd->rate_limit;
- memcpy(attr->ah_attr.grh.dgid.raw, cmd->base.dest.dgid, 16);
- attr->ah_attr.grh.flow_label = cmd->base.dest.flow_label;
- attr->ah_attr.grh.sgid_index = cmd->base.dest.sgid_index;
- attr->ah_attr.grh.hop_limit = cmd->base.dest.hop_limit;
- attr->ah_attr.grh.traffic_class = cmd->base.dest.traffic_class;
- attr->ah_attr.dlid = cmd->base.dest.dlid;
- attr->ah_attr.sl = cmd->base.dest.sl;
- attr->ah_attr.src_path_bits = cmd->base.dest.src_path_bits;
- attr->ah_attr.static_rate = cmd->base.dest.static_rate;
- attr->ah_attr.ah_flags = cmd->base.dest.is_global ?
- IB_AH_GRH : 0;
- attr->ah_attr.port_num = cmd->base.dest.port_num;
-
- memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->base.alt_dest.dgid, 16);
- attr->alt_ah_attr.grh.flow_label = cmd->base.alt_dest.flow_label;
- attr->alt_ah_attr.grh.sgid_index = cmd->base.alt_dest.sgid_index;
- attr->alt_ah_attr.grh.hop_limit = cmd->base.alt_dest.hop_limit;
- attr->alt_ah_attr.grh.traffic_class = cmd->base.alt_dest.traffic_class;
- attr->alt_ah_attr.dlid = cmd->base.alt_dest.dlid;
- attr->alt_ah_attr.sl = cmd->base.alt_dest.sl;
- attr->alt_ah_attr.src_path_bits = cmd->base.alt_dest.src_path_bits;
- attr->alt_ah_attr.static_rate = cmd->base.alt_dest.static_rate;
- attr->alt_ah_attr.ah_flags = cmd->base.alt_dest.is_global ?
- IB_AH_GRH : 0;
- attr->alt_ah_attr.port_num = cmd->base.alt_dest.port_num;
-
- if (qp->real_qp == qp) {
- if (cmd->base.attr_mask & IB_QP_AV) {
- ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
- if (ret)
- goto release_qp;
- }
- ret = qp->device->modify_qp(qp, attr,
- modify_qp_mask(qp->qp_type,
- cmd->base.attr_mask),
- udata);
+ attr->ah_attr.type = rdma_ah_find_type(qp->device,
+ cmd->base.dest.port_num);
+ if (cmd->base.dest.is_global) {
+ rdma_ah_set_grh(&attr->ah_attr, NULL,
+ cmd->base.dest.flow_label,
+ cmd->base.dest.sgid_index,
+ cmd->base.dest.hop_limit,
+ cmd->base.dest.traffic_class);
+ rdma_ah_set_dgid_raw(&attr->ah_attr, cmd->base.dest.dgid);
+ } else {
+ rdma_ah_set_ah_flags(&attr->ah_attr, 0);
+ }
+ rdma_ah_set_dlid(&attr->ah_attr, cmd->base.dest.dlid);
+ rdma_ah_set_sl(&attr->ah_attr, cmd->base.dest.sl);
+ rdma_ah_set_path_bits(&attr->ah_attr, cmd->base.dest.src_path_bits);
+ rdma_ah_set_static_rate(&attr->ah_attr, cmd->base.dest.static_rate);
+ rdma_ah_set_port_num(&attr->ah_attr,
+ cmd->base.dest.port_num);
+
+ attr->alt_ah_attr.type = rdma_ah_find_type(qp->device,
+ cmd->base.dest.port_num);
+ if (cmd->base.alt_dest.is_global) {
+ rdma_ah_set_grh(&attr->alt_ah_attr, NULL,
+ cmd->base.alt_dest.flow_label,
+ cmd->base.alt_dest.sgid_index,
+ cmd->base.alt_dest.hop_limit,
+ cmd->base.alt_dest.traffic_class);
+ rdma_ah_set_dgid_raw(&attr->alt_ah_attr,
+ cmd->base.alt_dest.dgid);
} else {
- ret = ib_modify_qp(qp, attr,
- modify_qp_mask(qp->qp_type,
- cmd->base.attr_mask));
+ rdma_ah_set_ah_flags(&attr->alt_ah_attr, 0);
}
-release_qp:
- put_qp_read(qp);
+ rdma_ah_set_dlid(&attr->alt_ah_attr, cmd->base.alt_dest.dlid);
+ rdma_ah_set_sl(&attr->alt_ah_attr, cmd->base.alt_dest.sl);
+ rdma_ah_set_path_bits(&attr->alt_ah_attr,
+ cmd->base.alt_dest.src_path_bits);
+ rdma_ah_set_static_rate(&attr->alt_ah_attr,
+ cmd->base.alt_dest.static_rate);
+ rdma_ah_set_port_num(&attr->alt_ah_attr,
+ cmd->base.alt_dest.port_num);
+
+ ret = ib_modify_qp_with_udata(qp, attr,
+ modify_qp_mask(qp->qp_type,
+ cmd->base.attr_mask),
+ udata);
+release_qp:
+ uobj_put_obj_read(qp);
out:
kfree(attr);
@@ -2489,7 +2087,6 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
struct ib_uverbs_destroy_qp cmd;
struct ib_uverbs_destroy_qp_resp resp;
struct ib_uobject *uobj;
- struct ib_qp *qp;
struct ib_uqp_object *obj;
int ret = -EINVAL;
@@ -2498,40 +2095,26 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof resp);
- uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- qp = uobj->object;
- obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
-
- if (!list_empty(&obj->mcast_list)) {
- put_uobj_write(uobj);
- return -EBUSY;
- }
-
- ret = ib_destroy_qp(qp);
- if (!ret)
- uobj->live = 0;
+ uobj = uobj_get_write(uobj_get_type(qp), cmd.qp_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- put_uobj_write(uobj);
+ obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
- if (ret)
+ ret = uobj_remove_commit(uobj);
+ if (ret) {
+ uverbs_uobject_put(uobj);
return ret;
-
- if (obj->uxrcd)
- atomic_dec(&obj->uxrcd->refcnt);
-
- idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_uevent(file, &obj->uevent);
+ }
resp.events_reported = obj->uevent.events_reported;
-
- put_uobj(uobj);
+ uverbs_uobject_put(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
@@ -2542,9 +2125,13 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
static void *alloc_wr(size_t wr_size, __u32 num_sge)
{
+ if (num_sge >= (U32_MAX - ALIGN(wr_size, sizeof (struct ib_sge))) /
+ sizeof (struct ib_sge))
+ return NULL;
+
return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
num_sge * sizeof (struct ib_sge), GFP_KERNEL);
-};
+}
ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
@@ -2575,7 +2162,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (!user_wr)
return -ENOMEM;
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
@@ -2611,7 +2198,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put;
}
- ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
+ ud->ah = uobj_get_obj_read(ah, user_wr->wr.ud.ah,
+ file->ucontext);
if (!ud->ah) {
kfree(ud);
ret = -EINVAL;
@@ -2718,11 +2306,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
ret = -EFAULT;
out_put:
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
while (wr) {
if (is_ud && ud_wr(wr)->ah)
- put_ah_read(ud_wr(wr)->ah);
+ uobj_put_obj_read(ud_wr(wr)->ah);
next = wr->next;
kfree(wr);
wr = next;
@@ -2771,6 +2359,13 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
goto err;
}
+ if (user_wr->num_sge >=
+ (U32_MAX - ALIGN(sizeof *next, sizeof (struct ib_sge))) /
+ sizeof (struct ib_sge)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
user_wr->num_sge * sizeof (struct ib_sge),
GFP_KERNEL);
@@ -2839,21 +2434,21 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
resp.bad_wr = 0;
ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
- put_qp_read(qp);
-
- if (ret)
+ uobj_put_obj_read(qp);
+ if (ret) {
for (next = wr; next; next = next->next) {
++resp.bad_wr;
if (next == bad_wr)
break;
}
+ }
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
@@ -2889,14 +2484,14 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
if (!srq)
goto out;
resp.bad_wr = 0;
ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ret)
for (next = wr; next; next = next->next) {
@@ -2929,9 +2524,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_ah *ah;
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
int ret;
struct ib_udata udata;
+ u8 *dmac;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -2939,35 +2535,42 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
+ if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num))
+ return -EINVAL;
+
INIT_UDATA(&udata, buf + sizeof(cmd),
(unsigned long)cmd.response + sizeof(resp),
in_len - sizeof(cmd), out_len - sizeof(resp));
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(ah), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err;
}
- attr.dlid = cmd.attr.dlid;
- attr.sl = cmd.attr.sl;
- attr.src_path_bits = cmd.attr.src_path_bits;
- attr.static_rate = cmd.attr.static_rate;
- attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0;
- attr.port_num = cmd.attr.port_num;
- attr.grh.flow_label = cmd.attr.grh.flow_label;
- attr.grh.sgid_index = cmd.attr.grh.sgid_index;
- attr.grh.hop_limit = cmd.attr.grh.hop_limit;
- attr.grh.traffic_class = cmd.attr.grh.traffic_class;
- memset(&attr.dmac, 0, sizeof(attr.dmac));
- memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
+ attr.type = rdma_ah_find_type(ib_dev, cmd.attr.port_num);
+ rdma_ah_set_dlid(&attr, cmd.attr.dlid);
+ rdma_ah_set_sl(&attr, cmd.attr.sl);
+ rdma_ah_set_path_bits(&attr, cmd.attr.src_path_bits);
+ rdma_ah_set_static_rate(&attr, cmd.attr.static_rate);
+ rdma_ah_set_port_num(&attr, cmd.attr.port_num);
+
+ if (cmd.attr.is_global) {
+ rdma_ah_set_grh(&attr, NULL, cmd.attr.grh.flow_label,
+ cmd.attr.grh.sgid_index,
+ cmd.attr.grh.hop_limit,
+ cmd.attr.grh.traffic_class);
+ rdma_ah_set_dgid_raw(&attr, cmd.attr.grh.dgid);
+ } else {
+ rdma_ah_set_ah_flags(&attr, 0);
+ }
+ dmac = rdma_ah_retrieve_dmac(&attr);
+ if (dmac)
+ memset(dmac, 0, ETH_ALEN);
ah = pd->device->create_ah(pd, &attr, &udata);
@@ -2980,12 +2583,9 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
ah->pd = pd;
atomic_inc(&pd->usecnt);
ah->uobject = uobj;
+ uobj->user_handle = cmd.user_handle;
uobj->object = ah;
- ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj);
- if (ret)
- goto err_destroy;
-
resp.ah_handle = uobj->id;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
@@ -2994,29 +2594,19 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->ah_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_put_obj_read(pd);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
-
-err_destroy:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
err_put:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -3025,36 +2615,19 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
const char __user *buf, int in_len, int out_len)
{
struct ib_uverbs_destroy_ah cmd;
- struct ib_ah *ah;
struct ib_uobject *uobj;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- ah = uobj->object;
-
- ret = ib_destroy_ah(ah);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
+ uobj = uobj_get_write(uobj_get_type(ah), cmd.ah_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ ret = uobj_remove_commit(uobj);
+ return ret ?: in_len;
}
ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
@@ -3071,12 +2644,13 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = idr_write_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
if (cmd.mlid == mcast->lid &&
!memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
@@ -3100,7 +2674,8 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
kfree(mcast);
out_put:
- put_qp_write(qp);
+ mutex_unlock(&obj->mcast_lock);
+ uobj_put_obj_read(qp);
return ret ? ret : in_len;
}
@@ -3115,34 +2690,66 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
struct ib_qp *qp;
struct ib_uverbs_mcast_entry *mcast;
int ret = -EINVAL;
+ bool found = false;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = idr_write_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
- ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
- if (ret)
- goto out_put;
-
obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
if (cmd.mlid == mcast->lid &&
!memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
list_del(&mcast->list);
kfree(mcast);
+ found = true;
break;
}
-out_put:
- put_qp_write(qp);
+ if (!found) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+ ret = ib_detach_mcast(qp, (union ib_gid *)cmd.gid, cmd.mlid);
+
+out_put:
+ mutex_unlock(&obj->mcast_lock);
+ uobj_put_obj_read(qp);
return ret ? ret : in_len;
}
+static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ ib_spec->type = kern_spec->type;
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (kern_spec->flow_tag.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_tag))
+ return -EINVAL;
+
+ ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag);
+ ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id;
+ break;
+ case IB_FLOW_SPEC_ACTION_DROP:
+ if (kern_spec->drop.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_drop))
+ return -EINVAL;
+
+ ib_spec->drop.size = sizeof(struct ib_flow_spec_action_drop);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
{
/* Returns user space filter size, includes padding */
@@ -3167,8 +2774,8 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
return kern_filter_size;
}
-static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
{
ssize_t actual_filter_sz;
ssize_t kern_filter_sz;
@@ -3263,6 +2870,18 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
return 0;
}
+static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ if (kern_spec->reserved)
+ return -EINVAL;
+
+ if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
+ return kern_spec_to_ib_spec_action(kern_spec, ib_spec);
+ else
+ return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
+}
+
int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
@@ -3300,20 +2919,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- obj = kmalloc(sizeof(*obj), GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uwq_object *)uobj_alloc(uobj_get_type(wq),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext,
- &wq_lock_class);
- down_write(&obj->uevent.uobject.mutex);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
err = -EINVAL;
goto err_uobj;
}
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq) {
err = -EINVAL;
goto err_put_pd;
@@ -3325,6 +2942,9 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
wq_init_attr.wq_context = file;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
+ if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) +
+ sizeof(cmd.create_flags)))
+ wq_init_attr.create_flags = cmd.create_flags;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
@@ -3345,9 +2965,6 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
atomic_inc(&cq->usecnt);
wq->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = wq;
- err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
- if (err)
- goto destroy_wq;
memset(&resp, 0, sizeof(resp));
resp.wq_handle = obj->uevent.uobject.id;
@@ -3360,27 +2977,19 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
if (err)
goto err_copy;
- put_pd_read(pd);
- put_cq_read(cq);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
- up_write(&obj->uevent.uobject.mutex);
+ uobj_put_obj_read(pd);
+ uobj_put_obj_read(cq);
+ uobj_alloc_commit(&obj->uevent.uobject);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
-destroy_wq:
ib_destroy_wq(wq);
err_put_cq:
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
err_put_pd:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_uobj:
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return err;
}
@@ -3392,7 +3001,6 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
{
struct ib_uverbs_ex_destroy_wq cmd = {};
struct ib_uverbs_ex_destroy_wq_resp resp = {};
- struct ib_wq *wq;
struct ib_uobject *uobj;
struct ib_uwq_object *obj;
size_t required_cmd_sz;
@@ -3421,36 +3029,25 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
resp.response_length = required_resp_len;
- uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(wq), cmd.wq_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- wq = uobj->object;
obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
- ret = ib_destroy_wq(wq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
- ib_uverbs_release_uevent(file, &obj->uevent);
+ ret = uobj_remove_commit(uobj);
resp.events_reported = obj->uevent.events_reported;
- put_uobj(uobj);
-
- ret = ib_copy_to_udata(ucore, &resp, resp.response_length);
+ uverbs_uobject_put(uobj);
if (ret)
return ret;
- return 0;
+ return ib_copy_to_udata(ucore, &resp, resp.response_length);
}
int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
@@ -3480,17 +3077,21 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (!cmd.attr_mask)
return -EINVAL;
- if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE))
+ if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
- wq = idr_read_wq(cmd.wq_handle, file->ucontext);
+ wq = uobj_get_obj_read(wq, cmd.wq_handle, file->ucontext);
if (!wq)
return -EINVAL;
wq_attr.curr_wq_state = cmd.curr_wq_state;
wq_attr.wq_state = cmd.wq_state;
+ if (cmd.attr_mask & IB_WQ_FLAGS) {
+ wq_attr.flags = cmd.flags;
+ wq_attr.flags_mask = cmd.flags_mask;
+ }
ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
- put_wq_read(wq);
+ uobj_put_obj_read(wq);
return ret;
}
@@ -3568,7 +3169,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
- wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext);
+ wq = uobj_get_obj_read(wq, wqs_handles[num_read_wqs],
+ file->ucontext);
if (!wq) {
err = -EINVAL;
goto put_wqs;
@@ -3577,14 +3179,12 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
wqs[num_read_wqs] = wq;
}
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj) {
- err = -ENOMEM;
+ uobj = uobj_alloc(uobj_get_type(rwq_ind_table), file->ucontext);
+ if (IS_ERR(uobj)) {
+ err = PTR_ERR(uobj);
goto put_wqs;
}
- init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class);
- down_write(&uobj->mutex);
init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
init_attr.ind_tbl = wqs;
rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
@@ -3604,10 +3204,6 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (i = 0; i < num_wq_handles; i++)
atomic_inc(&wqs[i]->usecnt);
- err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
- if (err)
- goto destroy_ind_tbl;
-
resp.ind_tbl_handle = uobj->id;
resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
resp.response_length = required_resp_len;
@@ -3620,26 +3216,18 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
kfree(wqs_handles);
for (j = 0; j < num_read_wqs; j++)
- put_wq_read(wqs[j]);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list);
- mutex_unlock(&file->mutex);
+ uobj_put_obj_read(wqs[j]);
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_alloc_commit(uobj);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
-destroy_ind_tbl:
ib_destroy_rwq_ind_table(rwq_ind_tbl);
err_uobj:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
put_wqs:
for (j = 0; j < num_read_wqs; j++)
- put_wq_read(wqs[j]);
+ uobj_put_obj_read(wqs[j]);
err_free:
kfree(wqs_handles);
kfree(wqs);
@@ -3652,10 +3240,8 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
struct ib_udata *uhw)
{
struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
- struct ib_rwq_ind_table *rwq_ind_tbl;
struct ib_uobject *uobj;
int ret;
- struct ib_wq **ind_tbl;
size_t required_cmd_sz;
required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle);
@@ -3675,31 +3261,12 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
- rwq_ind_tbl = uobj->object;
- ind_tbl = rwq_ind_tbl->ind_tbl;
-
- ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ uobj = uobj_get_write(uobj_get_type(rwq_ind_table), cmd.ind_tbl_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
- kfree(ind_tbl);
- return ret;
+ return uobj_remove_commit(uobj);
}
int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
@@ -3773,15 +3340,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
kern_flow_attr = &cmd.flow_attr;
}
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj) {
- err = -ENOMEM;
+ uobj = uobj_alloc(uobj_get_type(flow), file->ucontext);
+ if (IS_ERR(uobj)) {
+ err = PTR_ERR(uobj);
goto err_free_attr;
}
- init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
- down_write(&uobj->mutex);
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp) {
err = -EINVAL;
goto err_uobj;
@@ -3830,10 +3395,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
flow_id->uobject = uobj;
uobj->object = flow_id;
- err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
- if (err)
- goto destroy_flow;
-
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;
@@ -3842,28 +3403,20 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
if (err)
goto err_copy;
- put_qp_read(qp);
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->rule_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_put_obj_read(qp);
+ uobj_alloc_commit(uobj);
kfree(flow_attr);
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
-destroy_flow:
ib_destroy_flow(flow_id);
err_free:
kfree(flow_attr);
err_put:
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
err_uobj:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
err_free_attr:
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
@@ -3876,7 +3429,6 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
struct ib_udata *uhw)
{
struct ib_uverbs_destroy_flow cmd;
- struct ib_flow *flow_id;
struct ib_uobject *uobj;
int ret;
@@ -3890,26 +3442,12 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EINVAL;
- uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
- flow_id = uobj->object;
-
- ret = ib_destroy_flow(flow_id);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
+ uobj = uobj_get_write(uobj_get_type(flow), cmd.flow_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+ ret = uobj_remove_commit(uobj);
return ret;
}
@@ -3926,31 +3464,37 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
struct ib_srq_init_attr attr;
int ret;
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
-
- init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class);
- down_write(&obj->uevent.uobject.mutex);
+ obj = (struct ib_usrq_object *)uobj_alloc(uobj_get_type(srq),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
if (cmd->srq_type == IB_SRQT_XRC) {
- attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
- if (!attr.ext.xrc.xrcd) {
+ xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->xrcd_handle,
+ file->ucontext);
+ if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err;
}
+ attr.ext.xrc.xrcd = (struct ib_xrcd *)xrcd_uobj->object;
+ if (!attr.ext.xrc.xrcd) {
+ ret = -EINVAL;
+ goto err_put_xrcd;
+ }
+
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
- attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
+ attr.ext.xrc.cq = uobj_get_obj_read(cq, cmd->cq_handle,
+ file->ucontext);
if (!attr.ext.xrc.cq) {
ret = -EINVAL;
goto err_put_xrcd;
}
}
- pd = idr_read_pd(cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_put_cq;
@@ -3990,9 +3534,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
atomic_set(&srq->usecnt, 0);
obj->uevent.uobject.object = srq;
- ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
memset(&resp, 0, sizeof resp);
resp.srq_handle = obj->uevent.uobject.id;
@@ -4008,42 +3550,32 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
}
if (cmd->srq_type == IB_SRQT_XRC) {
- put_uobj_read(xrcd_uobj);
- put_cq_read(attr.ext.xrc.cq);
+ uobj_put_read(xrcd_uobj);
+ uobj_put_obj_read(attr.ext.xrc.cq);
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
-
- up_write(&obj->uevent.uobject.mutex);
+ uobj_put_obj_read(pd);
+ uobj_alloc_commit(&obj->uevent.uobject);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
-
-err_destroy:
ib_destroy_srq(srq);
err_put:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_put_cq:
if (cmd->srq_type == IB_SRQT_XRC)
- put_cq_read(attr.ext.xrc.cq);
+ uobj_put_obj_read(attr.ext.xrc.cq);
err_put_xrcd:
if (cmd->srq_type == IB_SRQT_XRC) {
atomic_dec(&obj->uxrcd->refcnt);
- put_uobj_read(xrcd_uobj);
+ uobj_put_read(xrcd_uobj);
}
err:
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return ret;
}
@@ -4128,7 +3660,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
out_len);
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
@@ -4137,7 +3669,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
return ret ? ret : in_len;
}
@@ -4159,13 +3691,13 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
ret = ib_query_srq(srq, &attr);
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ret)
return ret;
@@ -4191,54 +3723,38 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
struct ib_uverbs_destroy_srq cmd;
struct ib_uverbs_destroy_srq_resp resp;
struct ib_uobject *uobj;
- struct ib_srq *srq;
struct ib_uevent_object *obj;
int ret = -EINVAL;
- struct ib_usrq_object *us;
- enum ib_srq_type srq_type;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- srq = uobj->object;
- obj = container_of(uobj, struct ib_uevent_object, uobject);
- srq_type = srq->srq_type;
+ uobj = uobj_get_write(uobj_get_type(srq), cmd.srq_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- ret = ib_destroy_srq(srq);
- if (!ret)
- uobj->live = 0;
+ obj = container_of(uobj, struct ib_uevent_object, uobject);
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
- put_uobj_write(uobj);
+ memset(&resp, 0, sizeof(resp));
- if (ret)
+ ret = uobj_remove_commit(uobj);
+ if (ret) {
+ uverbs_uobject_put(uobj);
return ret;
-
- if (srq_type == IB_SRQT_XRC) {
- us = container_of(obj, struct ib_usrq_object, uevent);
- atomic_dec(&us->uxrcd->refcnt);
}
-
- idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_uevent(file, obj);
-
- memset(&resp, 0, sizeof resp);
resp.events_reported = obj->events_reported;
+ uverbs_uobject_put(uobj);
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ return -EFAULT;
- put_uobj(uobj);
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
-
- return ret ? ret : in_len;
+ return in_len;
}
int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
@@ -4323,6 +3839,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.max_wq_type_rq = attr.max_wq_type_rq;
resp.response_length += sizeof(resp.max_wq_type_rq);
+
+ if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps))
+ goto end;
+
+ resp.raw_packet_caps = attr.raw_packet_caps;
+ resp.response_length += sizeof(resp.raw_packet_caps);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index b3f95d453fba..3d2609608f58 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -51,6 +51,8 @@
#include <rdma/ib.h>
#include "uverbs.h"
+#include "core_priv.h"
+#include "rdma_core.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
@@ -66,19 +68,6 @@ enum {
static struct class *uverbs_class;
-DEFINE_SPINLOCK(ib_uverbs_idr_lock);
-DEFINE_IDR(ib_uverbs_pd_idr);
-DEFINE_IDR(ib_uverbs_mr_idr);
-DEFINE_IDR(ib_uverbs_mw_idr);
-DEFINE_IDR(ib_uverbs_ah_idr);
-DEFINE_IDR(ib_uverbs_cq_idr);
-DEFINE_IDR(ib_uverbs_qp_idr);
-DEFINE_IDR(ib_uverbs_srq_idr);
-DEFINE_IDR(ib_uverbs_xrcd_idr);
-DEFINE_IDR(ib_uverbs_rule_idr);
-DEFINE_IDR(ib_uverbs_wq_idr);
-DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr);
-
static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -167,37 +156,37 @@ static struct kobj_type ib_uverbs_dev_ktype = {
.release = ib_uverbs_release_dev,
};
-static void ib_uverbs_release_event_file(struct kref *ref)
+static void ib_uverbs_release_async_event_file(struct kref *ref)
{
- struct ib_uverbs_event_file *file =
- container_of(ref, struct ib_uverbs_event_file, ref);
+ struct ib_uverbs_async_event_file *file =
+ container_of(ref, struct ib_uverbs_async_event_file, ref);
kfree(file);
}
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_event_file *ev_file,
+ struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj)
{
struct ib_uverbs_event *evt, *tmp;
if (ev_file) {
- spin_lock_irq(&ev_file->lock);
+ spin_lock_irq(&ev_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&ev_file->lock);
+ spin_unlock_irq(&ev_file->ev_queue.lock);
- kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+ uverbs_uobject_put(&ev_file->uobj_file.uobj);
}
- spin_lock_irq(&file->async_file->lock);
+ spin_lock_irq(&file->async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&file->async_file->lock);
+ spin_unlock_irq(&file->async_file->ev_queue.lock);
}
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
@@ -205,16 +194,16 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
{
struct ib_uverbs_event *evt, *tmp;
- spin_lock_irq(&file->async_file->lock);
+ spin_lock_irq(&file->async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&file->async_file->lock);
+ spin_unlock_irq(&file->async_file->ev_queue.lock);
}
-static void ib_uverbs_detach_umcast(struct ib_qp *qp,
- struct ib_uqp_object *uobj)
+void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj)
{
struct ib_uverbs_mcast_entry *mcast, *tmp;
@@ -226,124 +215,16 @@ static void ib_uverbs_detach_umcast(struct ib_qp *qp,
}
static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
- struct ib_ucontext *context)
+ struct ib_ucontext *context,
+ bool device_removed)
{
- struct ib_uobject *uobj, *tmp;
-
context->closing = 1;
-
- list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
- struct ib_ah *ah = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
- ib_destroy_ah(ah);
- kfree(uobj);
- }
-
- /* Remove MWs before QPs, in order to support type 2A MWs. */
- list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
- struct ib_mw *mw = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
- uverbs_dealloc_mw(mw);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
- struct ib_flow *flow_id = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
- ib_destroy_flow(flow_id);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
- struct ib_qp *qp = uobj->object;
- struct ib_uqp_object *uqp =
- container_of(uobj, struct ib_uqp_object, uevent.uobject);
-
- idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
- if (qp == qp->real_qp)
- ib_uverbs_detach_umcast(qp, uqp);
- ib_destroy_qp(qp);
- ib_uverbs_release_uevent(file, &uqp->uevent);
- kfree(uqp);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) {
- struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object;
- struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
-
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
- ib_destroy_rwq_ind_table(rwq_ind_tbl);
- kfree(ind_tbl);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) {
- struct ib_wq *wq = uobj->object;
- struct ib_uwq_object *uwq =
- container_of(uobj, struct ib_uwq_object, uevent.uobject);
-
- idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
- ib_destroy_wq(wq);
- ib_uverbs_release_uevent(file, &uwq->uevent);
- kfree(uwq);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
- struct ib_srq *srq = uobj->object;
- struct ib_uevent_object *uevent =
- container_of(uobj, struct ib_uevent_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
- ib_destroy_srq(srq);
- ib_uverbs_release_uevent(file, uevent);
- kfree(uevent);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
- struct ib_cq *cq = uobj->object;
- struct ib_uverbs_event_file *ev_file = cq->cq_context;
- struct ib_ucq_object *ucq =
- container_of(uobj, struct ib_ucq_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
- ib_destroy_cq(cq);
- ib_uverbs_release_ucq(file, ev_file, ucq);
- kfree(ucq);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
- struct ib_mr *mr = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
- ib_dereg_mr(mr);
- kfree(uobj);
- }
-
- mutex_lock(&file->device->xrcd_tree_mutex);
- list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
- struct ib_xrcd *xrcd = uobj->object;
- struct ib_uxrcd_object *uxrcd =
- container_of(uobj, struct ib_uxrcd_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
- ib_uverbs_dealloc_xrcd(file->device, xrcd);
- kfree(uxrcd);
- }
- mutex_unlock(&file->device->xrcd_tree_mutex);
-
- list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
- struct ib_pd *pd = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
- ib_dealloc_pd(pd);
- kfree(uobj);
- }
-
+ uverbs_cleanup_ucontext(context, device_removed);
put_pid(context->tgid);
+ ib_rdmacg_uncharge(&context->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_HANDLE);
+
return context->device->dealloc_ucontext(context);
}
@@ -352,7 +233,7 @@ static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
complete(&dev->comp);
}
-static void ib_uverbs_release_file(struct kref *ref)
+void ib_uverbs_release_file(struct kref *ref)
{
struct ib_uverbs_file *file =
container_of(ref, struct ib_uverbs_file, ref);
@@ -372,58 +253,54 @@ static void ib_uverbs_release_file(struct kref *ref)
kfree(file);
}
-static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
- size_t count, loff_t *pos)
+static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
+ struct ib_uverbs_file *uverbs_file,
+ struct file *filp, char __user *buf,
+ size_t count, loff_t *pos,
+ size_t eventsz)
{
- struct ib_uverbs_event_file *file = filp->private_data;
struct ib_uverbs_event *event;
- int eventsz;
int ret = 0;
- spin_lock_irq(&file->lock);
+ spin_lock_irq(&ev_queue->lock);
- while (list_empty(&file->event_list)) {
- spin_unlock_irq(&file->lock);
+ while (list_empty(&ev_queue->event_list)) {
+ spin_unlock_irq(&ev_queue->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
- if (wait_event_interruptible(file->poll_wait,
- (!list_empty(&file->event_list) ||
+ if (wait_event_interruptible(ev_queue->poll_wait,
+ (!list_empty(&ev_queue->event_list) ||
/* The barriers built into wait_event_interruptible()
* and wake_up() guarentee this will see the null set
* without using RCU
*/
- !file->uverbs_file->device->ib_dev)))
+ !uverbs_file->device->ib_dev)))
return -ERESTARTSYS;
/* If device was disassociated and no event exists set an error */
- if (list_empty(&file->event_list) &&
- !file->uverbs_file->device->ib_dev)
+ if (list_empty(&ev_queue->event_list) &&
+ !uverbs_file->device->ib_dev)
return -EIO;
- spin_lock_irq(&file->lock);
+ spin_lock_irq(&ev_queue->lock);
}
- event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
-
- if (file->is_async)
- eventsz = sizeof (struct ib_uverbs_async_event_desc);
- else
- eventsz = sizeof (struct ib_uverbs_comp_event_desc);
+ event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
if (eventsz > count) {
ret = -EINVAL;
event = NULL;
} else {
- list_del(file->event_list.next);
+ list_del(ev_queue->event_list.next);
if (event->counter) {
++(*event->counter);
list_del(&event->obj_list);
}
}
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&ev_queue->lock);
if (event) {
if (copy_to_user(buf, event, eventsz))
@@ -437,87 +314,158 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
return ret;
}
-static unsigned int ib_uverbs_event_poll(struct file *filp,
+static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+
+ return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp,
+ buf, count, pos,
+ sizeof(struct ib_uverbs_async_event_desc));
+}
+
+static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
+
+ return ib_uverbs_event_read(&comp_ev_file->ev_queue,
+ comp_ev_file->uobj_file.ufile, filp,
+ buf, count, pos,
+ sizeof(struct ib_uverbs_comp_event_desc));
+}
+
+static unsigned int ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
+ struct file *filp,
struct poll_table_struct *wait)
{
unsigned int pollflags = 0;
- struct ib_uverbs_event_file *file = filp->private_data;
- poll_wait(filp, &file->poll_wait, wait);
+ poll_wait(filp, &ev_queue->poll_wait, wait);
- spin_lock_irq(&file->lock);
- if (!list_empty(&file->event_list))
+ spin_lock_irq(&ev_queue->lock);
+ if (!list_empty(&ev_queue->event_list))
pollflags = POLLIN | POLLRDNORM;
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&ev_queue->lock);
return pollflags;
}
-static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
+static unsigned int ib_uverbs_async_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ return ib_uverbs_event_poll(filp->private_data, filp, wait);
+}
+
+static unsigned int ib_uverbs_comp_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
{
- struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
- return fasync_helper(fd, filp, on, &file->async_queue);
+ return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait);
}
-static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
+static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
{
- struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_event_queue *ev_queue = filp->private_data;
+
+ return fasync_helper(fd, filp, on, &ev_queue->async_queue);
+}
+
+static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
+{
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
+
+ return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
+}
+
+static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+ struct ib_uverbs_file *uverbs_file = file->uverbs_file;
struct ib_uverbs_event *entry, *tmp;
int closed_already = 0;
- mutex_lock(&file->uverbs_file->device->lists_mutex);
- spin_lock_irq(&file->lock);
- closed_already = file->is_closed;
- file->is_closed = 1;
- list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
+ mutex_lock(&uverbs_file->device->lists_mutex);
+ spin_lock_irq(&file->ev_queue.lock);
+ closed_already = file->ev_queue.is_closed;
+ file->ev_queue.is_closed = 1;
+ list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
if (entry->counter)
list_del(&entry->obj_list);
kfree(entry);
}
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&file->ev_queue.lock);
if (!closed_already) {
list_del(&file->list);
- if (file->is_async)
- ib_unregister_event_handler(&file->uverbs_file->
- event_handler);
+ ib_unregister_event_handler(&uverbs_file->event_handler);
}
- mutex_unlock(&file->uverbs_file->device->lists_mutex);
+ mutex_unlock(&uverbs_file->device->lists_mutex);
- kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&file->ref, ib_uverbs_release_event_file);
+ kref_put(&uverbs_file->ref, ib_uverbs_release_file);
+ kref_put(&file->ref, ib_uverbs_release_async_event_file);
return 0;
}
-static const struct file_operations uverbs_event_fops = {
+static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_completion_event_file *file = filp->private_data;
+ struct ib_uverbs_event *entry, *tmp;
+
+ spin_lock_irq(&file->ev_queue.lock);
+ list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
+ if (entry->counter)
+ list_del(&entry->obj_list);
+ kfree(entry);
+ }
+ spin_unlock_irq(&file->ev_queue.lock);
+
+ uverbs_close_fd(filp);
+
+ return 0;
+}
+
+const struct file_operations uverbs_event_fops = {
.owner = THIS_MODULE,
- .read = ib_uverbs_event_read,
- .poll = ib_uverbs_event_poll,
- .release = ib_uverbs_event_close,
- .fasync = ib_uverbs_event_fasync,
+ .read = ib_uverbs_comp_event_read,
+ .poll = ib_uverbs_comp_event_poll,
+ .release = ib_uverbs_comp_event_close,
+ .fasync = ib_uverbs_comp_event_fasync,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations uverbs_async_event_fops = {
+ .owner = THIS_MODULE,
+ .read = ib_uverbs_async_event_read,
+ .poll = ib_uverbs_async_event_poll,
+ .release = ib_uverbs_async_event_close,
+ .fasync = ib_uverbs_async_event_fasync,
.llseek = no_llseek,
};
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
- struct ib_uverbs_event_file *file = cq_context;
+ struct ib_uverbs_event_queue *ev_queue = cq_context;
struct ib_ucq_object *uobj;
struct ib_uverbs_event *entry;
unsigned long flags;
- if (!file)
+ if (!ev_queue)
return;
- spin_lock_irqsave(&file->lock, flags);
- if (file->is_closed) {
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_lock_irqsave(&ev_queue->lock, flags);
+ if (ev_queue->is_closed) {
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
@@ -526,12 +474,12 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
entry->desc.comp.cq_handle = cq->uobject->user_handle;
entry->counter = &uobj->comp_events_reported;
- list_add_tail(&entry->list, &file->event_list);
+ list_add_tail(&entry->list, &ev_queue->event_list);
list_add_tail(&entry->obj_list, &uobj->comp_list);
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
- wake_up_interruptible(&file->poll_wait);
- kill_fasync(&file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&ev_queue->poll_wait);
+ kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
}
static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
@@ -542,15 +490,15 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
struct ib_uverbs_event *entry;
unsigned long flags;
- spin_lock_irqsave(&file->async_file->lock, flags);
- if (file->async_file->is_closed) {
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_lock_irqsave(&file->async_file->ev_queue.lock, flags);
+ if (file->async_file->ev_queue.is_closed) {
+ spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
return;
}
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
return;
}
@@ -559,13 +507,13 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
entry->desc.async.reserved = 0;
entry->counter = counter;
- list_add_tail(&entry->list, &file->async_file->event_list);
+ list_add_tail(&entry->list, &file->async_file->ev_queue.event_list);
if (obj_list)
list_add_tail(&entry->obj_list, obj_list);
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
- wake_up_interruptible(&file->async_file->poll_wait);
- kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&file->async_file->ev_queue.poll_wait);
+ kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
@@ -583,7 +531,7 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
struct ib_uevent_object *uobj;
/* for XRC target qp's, check that qp is live */
- if (!event->element.qp->uobject || !event->element.qp->uobject->live)
+ if (!event->element.qp->uobject)
return;
uobj = container_of(event->element.qp->uobject,
@@ -628,15 +576,23 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
{
- kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
+ kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file);
file->async_file = NULL;
}
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev,
- int is_async)
+void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
+{
+ spin_lock_init(&ev_queue->lock);
+ INIT_LIST_HEAD(&ev_queue->event_list);
+ init_waitqueue_head(&ev_queue->poll_wait);
+ ev_queue->is_closed = 0;
+ ev_queue->async_queue = NULL;
+}
+
+struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
+ struct ib_device *ib_dev)
{
- struct ib_uverbs_event_file *ev_file;
+ struct ib_uverbs_async_event_file *ev_file;
struct file *filp;
int ret;
@@ -644,16 +600,11 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
if (!ev_file)
return ERR_PTR(-ENOMEM);
- kref_init(&ev_file->ref);
- spin_lock_init(&ev_file->lock);
- INIT_LIST_HEAD(&ev_file->event_list);
- init_waitqueue_head(&ev_file->poll_wait);
+ ib_uverbs_init_event_queue(&ev_file->ev_queue);
ev_file->uverbs_file = uverbs_file;
kref_get(&ev_file->uverbs_file->ref);
- ev_file->async_queue = NULL;
- ev_file->is_closed = 0;
-
- filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
+ kref_init(&ev_file->ref);
+ filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
ev_file, O_RDONLY);
if (IS_ERR(filp))
goto err_put_refs;
@@ -663,64 +614,33 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
&uverbs_file->device->uverbs_events_file_list);
mutex_unlock(&uverbs_file->device->lists_mutex);
- if (is_async) {
- WARN_ON(uverbs_file->async_file);
- uverbs_file->async_file = ev_file;
- kref_get(&uverbs_file->async_file->ref);
- INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
- ib_dev,
- ib_uverbs_event_handler);
- ret = ib_register_event_handler(&uverbs_file->event_handler);
- if (ret)
- goto err_put_file;
-
- /* At that point async file stuff was fully set */
- ev_file->is_async = 1;
- }
+ WARN_ON(uverbs_file->async_file);
+ uverbs_file->async_file = ev_file;
+ kref_get(&uverbs_file->async_file->ref);
+ INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
+ ib_dev,
+ ib_uverbs_event_handler);
+ ret = ib_register_event_handler(&uverbs_file->event_handler);
+ if (ret)
+ goto err_put_file;
+
+ /* At that point async file stuff was fully set */
return filp;
err_put_file:
fput(filp);
- kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file);
+ kref_put(&uverbs_file->async_file->ref,
+ ib_uverbs_release_async_event_file);
uverbs_file->async_file = NULL;
return ERR_PTR(ret);
err_put_refs:
kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+ kref_put(&ev_file->ref, ib_uverbs_release_async_event_file);
return filp;
}
-/*
- * Look up a completion event file by FD. If lookup is successful,
- * takes a ref to the event file struct that it returns; if
- * unsuccessful, returns NULL.
- */
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
-{
- struct ib_uverbs_event_file *ev_file = NULL;
- struct fd f = fdget(fd);
-
- if (!f.file)
- return NULL;
-
- if (f.file->f_op != &uverbs_event_fops)
- goto out;
-
- ev_file = f.file->private_data;
- if (ev_file->is_async) {
- ev_file = NULL;
- goto out;
- }
-
- kref_get(&ev_file->ref);
-
-out:
- fdput(f);
- return ev_file;
-}
-
static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
{
u64 mask;
@@ -966,6 +886,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
}
file->device = dev;
+ spin_lock_init(&file->idr_lock);
+ idr_init(&file->idr);
file->ucontext = NULL;
file->async_file = NULL;
kref_init(&file->ref);
@@ -999,10 +921,11 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
mutex_lock(&file->cleanup_mutex);
if (file->ucontext) {
- ib_uverbs_cleanup_ucontext(file, file->ucontext);
+ ib_uverbs_cleanup_ucontext(file, file->ucontext, false);
file->ucontext = NULL;
}
mutex_unlock(&file->cleanup_mutex);
+ idr_destroy(&file->idr);
mutex_lock(&file->device->lists_mutex);
if (!file->is_closed) {
@@ -1012,7 +935,8 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
mutex_unlock(&file->device->lists_mutex);
if (file->async_file)
- kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
+ kref_put(&file->async_file->ref,
+ ib_uverbs_release_async_event_file);
kref_put(&file->ref, ib_uverbs_release_file);
kobject_put(&dev->kobj);
@@ -1169,12 +1093,12 @@ static void ib_uverbs_add_one(struct ib_device *device)
cdev_init(&uverbs_dev->cdev, NULL);
uverbs_dev->cdev.owner = THIS_MODULE;
uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
- uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj;
+ cdev_set_parent(&uverbs_dev->cdev, &uverbs_dev->kobj);
kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
if (cdev_add(&uverbs_dev->cdev, base, 1))
goto err_cdev;
- uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
+ uverbs_dev->dev = device_create(uverbs_class, device->dev.parent,
uverbs_dev->cdev.dev, uverbs_dev,
"uverbs%d", uverbs_dev->devnum);
if (IS_ERR(uverbs_dev->dev))
@@ -1211,7 +1135,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_device *ib_dev)
{
struct ib_uverbs_file *file;
- struct ib_uverbs_event_file *event_file;
+ struct ib_uverbs_async_event_file *event_file;
struct ib_event event;
/* Pending running commands to terminate */
@@ -1248,7 +1172,9 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
* (e.g mmput).
*/
ib_dev->disassociate_ucontext(ucontext);
- ib_uverbs_cleanup_ucontext(file, ucontext);
+ mutex_lock(&file->cleanup_mutex);
+ ib_uverbs_cleanup_ucontext(file, ucontext, true);
+ mutex_unlock(&file->cleanup_mutex);
}
mutex_lock(&uverbs_dev->lists_mutex);
@@ -1258,21 +1184,20 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
event_file = list_first_entry(&uverbs_dev->
uverbs_events_file_list,
- struct ib_uverbs_event_file,
+ struct ib_uverbs_async_event_file,
list);
- spin_lock_irq(&event_file->lock);
- event_file->is_closed = 1;
- spin_unlock_irq(&event_file->lock);
+ spin_lock_irq(&event_file->ev_queue.lock);
+ event_file->ev_queue.is_closed = 1;
+ spin_unlock_irq(&event_file->ev_queue.lock);
list_del(&event_file->list);
- if (event_file->is_async) {
- ib_unregister_event_handler(&event_file->uverbs_file->
- event_handler);
- event_file->uverbs_file->event_handler.device = NULL;
- }
+ ib_unregister_event_handler(
+ &event_file->uverbs_file->event_handler);
+ event_file->uverbs_file->event_handler.device =
+ NULL;
- wake_up_interruptible(&event_file->poll_wait);
- kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&event_file->ev_queue.poll_wait);
+ kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
mutex_unlock(&uverbs_dev->lists_mutex);
}
@@ -1376,13 +1301,6 @@ static void __exit ib_uverbs_cleanup(void)
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
if (overflow_maj)
unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
- idr_destroy(&ib_uverbs_pd_idr);
- idr_destroy(&ib_uverbs_mr_idr);
- idr_destroy(&ib_uverbs_mw_idr);
- idr_destroy(&ib_uverbs_ah_idr);
- idr_destroy(&ib_uverbs_cq_idr);
- idr_destroy(&ib_uverbs_qp_idr);
- idr_destroy(&ib_uverbs_srq_idr);
}
module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index af020f80d50f..94fd989c9060 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -34,20 +34,25 @@
#include <rdma/ib_marshall.h>
void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
- struct ib_ah_attr *src)
+ struct rdma_ah_attr *src)
{
- memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid);
- dst->grh.flow_label = src->grh.flow_label;
- dst->grh.sgid_index = src->grh.sgid_index;
- dst->grh.hop_limit = src->grh.hop_limit;
- dst->grh.traffic_class = src->grh.traffic_class;
memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));
- dst->dlid = src->dlid;
- dst->sl = src->sl;
- dst->src_path_bits = src->src_path_bits;
- dst->static_rate = src->static_rate;
- dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0;
- dst->port_num = src->port_num;
+ dst->dlid = rdma_ah_get_dlid(src);
+ dst->sl = rdma_ah_get_sl(src);
+ dst->src_path_bits = rdma_ah_get_path_bits(src);
+ dst->static_rate = rdma_ah_get_static_rate(src);
+ dst->is_global = rdma_ah_get_ah_flags(src) &
+ IB_AH_GRH ? 1 : 0;
+ if (dst->is_global) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(src);
+
+ memcpy(dst->grh.dgid, grh->dgid.raw, sizeof(grh->dgid));
+ dst->grh.flow_label = grh->flow_label;
+ dst->grh.sgid_index = grh->sgid_index;
+ dst->grh.hop_limit = grh->hop_limit;
+ dst->grh.traffic_class = grh->traffic_class;
+ }
+ dst->port_num = rdma_ah_get_port_num(src);
dst->reserved = 0;
}
EXPORT_SYMBOL(ib_copy_ah_attr_to_user);
@@ -91,15 +96,15 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
}
EXPORT_SYMBOL(ib_copy_qp_attr_to_user);
-void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
- struct ib_sa_path_rec *src)
+static void __ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+ struct sa_path_rec *src)
{
- memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid);
- memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid);
+ memcpy(dst->dgid, src->dgid.raw, sizeof(src->dgid));
+ memcpy(dst->sgid, src->sgid.raw, sizeof(src->sgid));
- dst->dlid = src->dlid;
- dst->slid = src->slid;
- dst->raw_traffic = src->raw_traffic;
+ dst->dlid = htons(ntohl(sa_path_get_dlid(src)));
+ dst->slid = htons(ntohl(sa_path_get_slid(src)));
+ dst->raw_traffic = sa_path_get_raw_traffic(src);
dst->flow_label = src->flow_label;
dst->hop_limit = src->hop_limit;
dst->traffic_class = src->traffic_class;
@@ -115,17 +120,43 @@ void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
}
+
+void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+ struct sa_path_rec *src)
+{
+ struct sa_path_rec rec;
+
+ if (src->rec_type == SA_PATH_REC_TYPE_OPA) {
+ sa_convert_path_opa_to_ib(&rec, src);
+ __ib_copy_path_rec_to_user(dst, &rec);
+ return;
+ }
+ __ib_copy_path_rec_to_user(dst, src);
+}
EXPORT_SYMBOL(ib_copy_path_rec_to_user);
-void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
+void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
struct ib_user_path_rec *src)
{
+ __be32 slid, dlid;
+
+ memset(dst, 0, sizeof(*dst));
+ if ((ib_is_opa_gid((union ib_gid *)src->sgid)) ||
+ (ib_is_opa_gid((union ib_gid *)src->dgid))) {
+ dst->rec_type = SA_PATH_REC_TYPE_OPA;
+ slid = htonl(opa_get_lid_from_gid((union ib_gid *)src->sgid));
+ dlid = htonl(opa_get_lid_from_gid((union ib_gid *)src->dgid));
+ } else {
+ dst->rec_type = SA_PATH_REC_TYPE_IB;
+ slid = htonl(ntohs(src->slid));
+ dlid = htonl(ntohs(src->dlid));
+ }
memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid);
memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid);
- dst->dlid = src->dlid;
- dst->slid = src->slid;
- dst->raw_traffic = src->raw_traffic;
+ sa_path_set_dlid(dst, dlid);
+ sa_path_set_slid(dst, slid);
+ sa_path_set_raw_traffic(dst, src->raw_traffic);
dst->flow_label = src->flow_label;
dst->hop_limit = src->hop_limit;
dst->traffic_class = src->traffic_class;
@@ -141,9 +172,9 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
- memset(dst->dmac, 0, sizeof(dst->dmac));
- dst->net = NULL;
- dst->ifindex = 0;
- dst->gid_type = IB_GID_TYPE_IB;
+ /* TODO: No need to set this */
+ sa_path_set_dmac_zero(dst);
+ sa_path_set_ndev(dst, NULL);
+ sa_path_set_ifindex(dst, 0);
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
new file mode 100644
index 000000000000..ef293379f37a
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <linux/bug.h>
+#include <linux/file.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_ah(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return rdma_destroy_ah((struct ib_ah *)uobject->object);
+}
+
+static int uverbs_free_flow(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return ib_destroy_flow((struct ib_flow *)uobject->object);
+}
+
+static int uverbs_free_mw(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return uverbs_dealloc_mw((struct ib_mw *)uobject->object);
+}
+
+static int uverbs_free_qp(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_qp *qp = uobject->object;
+ struct ib_uqp_object *uqp =
+ container_of(uobject, struct ib_uqp_object, uevent.uobject);
+ int ret;
+
+ if (why == RDMA_REMOVE_DESTROY) {
+ if (!list_empty(&uqp->mcast_list))
+ return -EBUSY;
+ } else if (qp == qp->real_qp) {
+ ib_uverbs_detach_umcast(qp, uqp);
+ }
+
+ ret = ib_destroy_qp(qp);
+ if (ret && why == RDMA_REMOVE_DESTROY)
+ return ret;
+
+ if (uqp->uxrcd)
+ atomic_dec(&uqp->uxrcd->refcnt);
+
+ ib_uverbs_release_uevent(uobject->context->ufile, &uqp->uevent);
+ return ret;
+}
+
+static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object;
+ struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
+ int ret;
+
+ ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ kfree(ind_tbl);
+ return ret;
+}
+
+static int uverbs_free_wq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_wq *wq = uobject->object;
+ struct ib_uwq_object *uwq =
+ container_of(uobject, struct ib_uwq_object, uevent.uobject);
+ int ret;
+
+ ret = ib_destroy_wq(wq);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
+ return ret;
+}
+
+static int uverbs_free_srq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_srq *srq = uobject->object;
+ struct ib_uevent_object *uevent =
+ container_of(uobject, struct ib_uevent_object, uobject);
+ enum ib_srq_type srq_type = srq->srq_type;
+ int ret;
+
+ ret = ib_destroy_srq(srq);
+
+ if (ret && why == RDMA_REMOVE_DESTROY)
+ return ret;
+
+ if (srq_type == IB_SRQT_XRC) {
+ struct ib_usrq_object *us =
+ container_of(uevent, struct ib_usrq_object, uevent);
+
+ atomic_dec(&us->uxrcd->refcnt);
+ }
+
+ ib_uverbs_release_uevent(uobject->context->ufile, uevent);
+ return ret;
+}
+
+static int uverbs_free_cq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_cq *cq = uobject->object;
+ struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
+ struct ib_ucq_object *ucq =
+ container_of(uobject, struct ib_ucq_object, uobject);
+ int ret;
+
+ ret = ib_destroy_cq(cq);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
+ container_of(ev_queue,
+ struct ib_uverbs_completion_event_file,
+ ev_queue) : NULL,
+ ucq);
+ return ret;
+}
+
+static int uverbs_free_mr(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return ib_dereg_mr((struct ib_mr *)uobject->object);
+}
+
+static int uverbs_free_xrcd(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_xrcd *xrcd = uobject->object;
+ struct ib_uxrcd_object *uxrcd =
+ container_of(uobject, struct ib_uxrcd_object, uobject);
+ int ret;
+
+ mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex);
+ if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt))
+ ret = -EBUSY;
+ else
+ ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device,
+ xrcd, why);
+ mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex);
+
+ return ret;
+}
+
+static int uverbs_free_pd(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_pd *pd = uobject->object;
+
+ if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt))
+ return -EBUSY;
+
+ ib_dealloc_pd((struct ib_pd *)uobject->object);
+ return 0;
+}
+
+static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file,
+ enum rdma_remove_reason why)
+{
+ struct ib_uverbs_completion_event_file *comp_event_file =
+ container_of(uobj_file, struct ib_uverbs_completion_event_file,
+ uobj_file);
+ struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue;
+
+ spin_lock_irq(&event_queue->lock);
+ event_queue->is_closed = 1;
+ spin_unlock_irq(&event_queue->lock);
+
+ if (why == RDMA_REMOVE_DRIVER_REMOVE) {
+ wake_up_interruptible(&event_queue->poll_wait);
+ kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN);
+ }
+ return 0;
+};
+
+const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel = {
+ .type = UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), 0),
+ .context_closed = uverbs_hot_unplug_completion_event_file,
+ .fops = &uverbs_event_fops,
+ .name = "[infinibandevent]",
+ .flags = O_RDONLY,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_cq = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0),
+ .destroy_object = uverbs_free_cq,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_qp = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0),
+ .destroy_object = uverbs_free_qp,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_mw = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_mw,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_mr = {
+ /* 1 is used in order to free the MR after all the MWs */
+ .type = UVERBS_TYPE_ALLOC_IDR(1),
+ .destroy_object = uverbs_free_mr,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_srq = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0),
+ .destroy_object = uverbs_free_srq,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_ah = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_ah,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_flow = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_flow,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_wq = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0),
+ .destroy_object = uverbs_free_wq,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_rwq_ind_tbl,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0),
+ .destroy_object = uverbs_free_xrcd,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_pd = {
+ /* 2 is used in order to free the PD after MRs */
+ .type = UVERBS_TYPE_ALLOC_IDR(2),
+ .destroy_object = uverbs_free_pd,
+};
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 71580cc28c9e..fb98ed67d5bc 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -44,6 +44,7 @@
#include <linux/in.h>
#include <linux/in6.h>
#include <net/addrconf.h>
+#include <linux/security.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
@@ -311,7 +312,7 @@ EXPORT_SYMBOL(ib_dealloc_pd);
/* Address handles */
-struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
{
struct ib_ah *ah;
@@ -321,12 +322,13 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
ah->device = pd->device;
ah->pd = pd;
ah->uobject = NULL;
+ ah->type = ah_attr->type;
atomic_inc(&pd->usecnt);
}
return ah;
}
-EXPORT_SYMBOL(ib_create_ah);
+EXPORT_SYMBOL(rdma_create_ah);
int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
{
@@ -450,9 +452,22 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
}
EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
+/*
+ * This function creates ah from the incoming packet.
+ * Incoming packet has dgid of the receiver node on which this code is
+ * getting executed and, sgid contains the GID of the sender.
+ *
+ * When resolving mac address of destination, the arrived dgid is used
+ * as sgid and, sgid is used as dgid because sgid contains destinations
+ * GID whom to respond to.
+ *
+ * This is why when calling rdma_addr_find_l2_eth_by_grh() function, the
+ * position of arguments dgid and sgid do not match the order of the
+ * parameters.
+ */
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
u32 flow_class;
u16 gid_index;
@@ -464,6 +479,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
union ib_gid sgid;
memset(ah_attr, 0, sizeof *ah_attr);
+ ah_attr->type = rdma_ah_find_type(device, port_num);
if (rdma_cap_eth_ah(device, port_num)) {
if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
net_type = wc->network_hdr_type;
@@ -494,7 +510,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
return -ENODEV;
ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
- ah_attr->dmac,
+ ah_attr->roce.dmac,
wc->wc_flags & IB_WC_WITH_VLAN ?
NULL : &vlan_id,
&if_index, &hoplimit);
@@ -504,11 +520,6 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
}
resolved_dev = dev_get_by_index(&init_net, if_index);
- if (resolved_dev->flags & IFF_LOOPBACK) {
- dev_put(resolved_dev);
- resolved_dev = idev;
- dev_hold(resolved_dev);
- }
rcu_read_lock();
if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
resolved_dev))
@@ -525,15 +536,12 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
return ret;
}
- ah_attr->dlid = wc->slid;
- ah_attr->sl = wc->sl;
- ah_attr->src_path_bits = wc->dlid_path_bits;
- ah_attr->port_num = port_num;
+ rdma_ah_set_dlid(ah_attr, wc->slid);
+ rdma_ah_set_sl(ah_attr, wc->sl);
+ rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
+ rdma_ah_set_port_num(ah_attr, port_num);
if (wc->wc_flags & IB_WC_GRH) {
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = sgid;
-
if (!rdma_cap_eth_ah(device, port_num)) {
if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
ret = ib_find_cached_gid_by_port(device, &dgid,
@@ -547,11 +555,12 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
}
}
- ah_attr->grh.sgid_index = (u8) gid_index;
flow_class = be32_to_cpu(grh->version_tclass_flow);
- ah_attr->grh.flow_label = flow_class & 0xFFFFF;
- ah_attr->grh.hop_limit = hoplimit;
- ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
+ rdma_ah_set_grh(ah_attr, &sgid,
+ flow_class & 0xFFFFF,
+ (u8)gid_index, hoplimit,
+ (flow_class >> 20) & 0xFF);
+
}
return 0;
}
@@ -560,34 +569,37 @@ EXPORT_SYMBOL(ib_init_ah_from_wc);
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
const struct ib_grh *grh, u8 port_num)
{
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
int ret;
ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
if (ret)
return ERR_PTR(ret);
- return ib_create_ah(pd, &ah_attr);
+ return rdma_create_ah(pd, &ah_attr);
}
EXPORT_SYMBOL(ib_create_ah_from_wc);
-int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
+ if (ah->type != ah_attr->type)
+ return -EINVAL;
+
return ah->device->modify_ah ?
ah->device->modify_ah(ah, ah_attr) :
-ENOSYS;
}
-EXPORT_SYMBOL(ib_modify_ah);
+EXPORT_SYMBOL(rdma_modify_ah);
-int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
return ah->device->query_ah ?
ah->device->query_ah(ah, ah_attr) :
-ENOSYS;
}
-EXPORT_SYMBOL(ib_query_ah);
+EXPORT_SYMBOL(rdma_query_ah);
-int ib_destroy_ah(struct ib_ah *ah)
+int rdma_destroy_ah(struct ib_ah *ah)
{
struct ib_pd *pd;
int ret;
@@ -599,7 +611,7 @@ int ib_destroy_ah(struct ib_ah *ah)
return ret;
}
-EXPORT_SYMBOL(ib_destroy_ah);
+EXPORT_SYMBOL(rdma_destroy_ah);
/* Shared receive queues */
@@ -710,12 +722,20 @@ static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
{
struct ib_qp *qp;
unsigned long flags;
+ int err;
qp = kzalloc(sizeof *qp, GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
qp->real_qp = real_qp;
+ err = ib_open_shared_qp_security(qp, real_qp->device);
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ qp->real_qp = real_qp;
atomic_inc(&real_qp->usecnt);
qp->device = real_qp->device;
qp->event_handler = event_handler;
@@ -801,6 +821,12 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
if (IS_ERR(qp))
return qp;
+ ret = ib_create_qp_security(qp, device);
+ if (ret) {
+ ib_destroy_qp(qp);
+ return ERR_PTR(ret);
+ }
+
qp->device = device;
qp->real_qp = qp;
qp->uobject = NULL;
@@ -869,6 +895,7 @@ static const struct {
} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = {
[IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
[IB_QPS_INIT] = {
.valid = 1,
.req_param = {
@@ -1201,20 +1228,22 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
EXPORT_SYMBOL(ib_modify_qp_is_ok);
int ib_resolve_eth_dmac(struct ib_device *device,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
int ret = 0;
+ struct ib_global_route *grh;
- if (ah_attr->port_num < rdma_start_port(device) ||
- ah_attr->port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr)))
return -EINVAL;
- if (!rdma_cap_eth_ah(device, ah_attr->port_num))
+ if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE)
return 0;
- if (rdma_link_local_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
- rdma_get_ll_mac((struct in6_addr *)ah_attr->grh.dgid.raw,
- ah_attr->dmac);
+ grh = rdma_ah_retrieve_grh(ah_attr);
+
+ if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw)) {
+ rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
+ ah_attr->roce.dmac);
} else {
union ib_gid sgid;
struct ib_gid_attr sgid_attr;
@@ -1222,8 +1251,8 @@ int ib_resolve_eth_dmac(struct ib_device *device,
int hop_limit;
ret = ib_query_gid(device,
- ah_attr->port_num,
- ah_attr->grh.sgid_index,
+ rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index,
&sgid, &sgid_attr);
if (ret || !sgid_attr.ndev) {
@@ -1234,34 +1263,50 @@ int ib_resolve_eth_dmac(struct ib_device *device,
ifindex = sgid_attr.ndev->ifindex;
- ret = rdma_addr_find_l2_eth_by_grh(&sgid,
- &ah_attr->grh.dgid,
- ah_attr->dmac,
- NULL, &ifindex, &hop_limit);
+ ret =
+ rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+ ah_attr->roce.dmac,
+ NULL, &ifindex, &hop_limit);
dev_put(sgid_attr.ndev);
- ah_attr->grh.hop_limit = hop_limit;
+ grh->hop_limit = hop_limit;
}
out:
return ret;
}
EXPORT_SYMBOL(ib_resolve_eth_dmac);
-int ib_modify_qp(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr,
- int qp_attr_mask)
+/**
+ * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
+ * @qp: The QP to modify.
+ * @attr: On input, specifies the QP attributes to modify. On output,
+ * the current values of selected QP attributes are returned.
+ * @attr_mask: A bit-mask used to specify which attributes of the QP
+ * are being modified.
+ * @udata: pointer to user's input output buffer information
+ * are being modified.
+ * It returns 0 on success and returns appropriate error code on error.
+ */
+int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
{
+ int ret;
- if (qp_attr_mask & IB_QP_AV) {
- int ret;
-
- ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr);
+ if (attr_mask & IB_QP_AV) {
+ ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
if (ret)
return ret;
}
+ return ib_security_modify_qp(qp, attr, attr_mask, udata);
+}
+EXPORT_SYMBOL(ib_modify_qp_with_udata);
- return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
+int ib_modify_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask)
+{
+ return ib_modify_qp_with_udata(qp, qp_attr, qp_attr_mask, NULL);
}
EXPORT_SYMBOL(ib_modify_qp);
@@ -1290,6 +1335,7 @@ int ib_close_qp(struct ib_qp *qp)
spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
atomic_dec(&real_qp->usecnt);
+ ib_close_shared_qp_security(qp->qp_sec);
kfree(qp);
return 0;
@@ -1330,6 +1376,7 @@ int ib_destroy_qp(struct ib_qp *qp)
struct ib_cq *scq, *rcq;
struct ib_srq *srq;
struct ib_rwq_ind_table *ind_tbl;
+ struct ib_qp_security *sec;
int ret;
WARN_ON_ONCE(qp->mrs_used > 0);
@@ -1345,6 +1392,9 @@ int ib_destroy_qp(struct ib_qp *qp)
rcq = qp->recv_cq;
srq = qp->srq;
ind_tbl = qp->rwq_ind_tbl;
+ sec = qp->qp_sec;
+ if (sec)
+ ib_destroy_qp_security_begin(sec);
if (!qp->uobject)
rdma_rw_cleanup_mrs(qp);
@@ -1361,6 +1411,11 @@ int ib_destroy_qp(struct ib_qp *qp)
atomic_dec(&srq->usecnt);
if (ind_tbl)
atomic_dec(&ind_tbl->usecnt);
+ if (sec)
+ ib_destroy_qp_security_end(sec);
+ } else {
+ if (sec)
+ ib_destroy_qp_security_abort(sec);
}
return ret;
@@ -1520,7 +1575,9 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
if (!qp->device->attach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
+ lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ lid == be16_to_cpu(IB_LID_PERMISSIVE))
return -EINVAL;
ret = qp->device->attach_mcast(qp, gid, lid);
@@ -1536,7 +1593,9 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
if (!qp->device->detach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
+ lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ lid == be16_to_cpu(IB_LID_PERMISSIVE))
return -EINVAL;
ret = qp->device->detach_mcast(qp, gid, lid);
@@ -1949,17 +2008,12 @@ static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
*/
static void __ib_drain_sq(struct ib_qp *qp)
{
+ struct ib_cq *cq = qp->send_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe sdrain;
struct ib_send_wr swr = {}, *bad_swr;
int ret;
- if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) {
- WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT,
- "IB_POLL_DIRECT poll_ctx not supported for drain\n");
- return;
- }
-
swr.wr_cqe = &sdrain.cqe;
sdrain.cqe.done = ib_drain_qp_done;
init_completion(&sdrain.done);
@@ -1976,7 +2030,11 @@ static void __ib_drain_sq(struct ib_qp *qp)
return;
}
- wait_for_completion(&sdrain.done);
+ if (cq->poll_ctx == IB_POLL_DIRECT)
+ while (wait_for_completion_timeout(&sdrain.done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ else
+ wait_for_completion(&sdrain.done);
}
/*
@@ -1984,17 +2042,12 @@ static void __ib_drain_sq(struct ib_qp *qp)
*/
static void __ib_drain_rq(struct ib_qp *qp)
{
+ struct ib_cq *cq = qp->recv_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe rdrain;
struct ib_recv_wr rwr = {}, *bad_rwr;
int ret;
- if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) {
- WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT,
- "IB_POLL_DIRECT poll_ctx not supported for drain\n");
- return;
- }
-
rwr.wr_cqe = &rdrain.cqe;
rdrain.cqe.done = ib_drain_qp_done;
init_completion(&rdrain.done);
@@ -2011,7 +2064,11 @@ static void __ib_drain_rq(struct ib_qp *qp)
return;
}
- wait_for_completion(&rdrain.done);
+ if (cq->poll_ctx == IB_POLL_DIRECT)
+ while (wait_for_completion_timeout(&rdrain.done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ else
+ wait_for_completion(&rdrain.done);
}
/**
@@ -2028,8 +2085,7 @@ static void __ib_drain_rq(struct ib_qp *qp)
* ensure there is room in the CQ and SQ for the drain work request and
* completion.
*
- * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQ using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
@@ -2057,8 +2113,7 @@ EXPORT_SYMBOL(ib_drain_sq);
* ensure there is room in the CQ and RQ for the drain work request and
* completion.
*
- * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQ using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
@@ -2082,8 +2137,7 @@ EXPORT_SYMBOL(ib_drain_rq);
* ensure there is room in the CQ(s), SQ, and RQ for drain work requests
* and completions.
*
- * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQs using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index ed553de2ca12..34c93abf0fe0 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
obj-$(CONFIG_INFINIBAND_HNS) += hns/
obj-$(CONFIG_INFINIBAND_QEDR) += qedr/
+obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re/
diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig
new file mode 100644
index 000000000000..19982a4a9bba
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/Kconfig
@@ -0,0 +1,9 @@
+config INFINIBAND_BNXT_RE
+ tristate "Broadcom Netxtreme HCA support"
+ depends on ETHERNET && NETDEVICES && PCI && INET && DCB
+ select NET_VENDOR_BROADCOM
+ select BNXT
+ ---help---
+ This driver supports Broadcom NetXtreme-E 10/25/40/50 gigabit
+ RoCE HCAs. To compile this driver as a module, choose M here:
+ the module will be called bnxt_re.
diff --git a/drivers/infiniband/hw/bnxt_re/Makefile b/drivers/infiniband/hw/bnxt_re/Makefile
new file mode 100644
index 000000000000..036f84efbc73
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/Makefile
@@ -0,0 +1,6 @@
+
+ccflags-y := -Idrivers/net/ethernet/broadcom/bnxt
+obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re.o
+bnxt_re-y := main.o ib_verbs.o \
+ qplib_res.o qplib_rcfw.o \
+ qplib_sp.o qplib_fp.o
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
new file mode 100644
index 000000000000..85527532c49d
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -0,0 +1,159 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Slow Path Operators (header)
+ *
+ */
+
+#ifndef __BNXT_RE_H__
+#define __BNXT_RE_H__
+#define ROCE_DRV_MODULE_NAME "bnxt_re"
+#define ROCE_DRV_MODULE_VERSION "1.0.0"
+
+#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver"
+
+#define BNXT_RE_PAGE_SIZE_4K BIT(12)
+#define BNXT_RE_PAGE_SIZE_8K BIT(13)
+#define BNXT_RE_PAGE_SIZE_64K BIT(16)
+#define BNXT_RE_PAGE_SIZE_2M BIT(21)
+#define BNXT_RE_PAGE_SIZE_8M BIT(23)
+#define BNXT_RE_PAGE_SIZE_1G BIT(30)
+
+#define BNXT_RE_MAX_MR_SIZE BIT(30)
+
+#define BNXT_RE_MAX_QPC_COUNT (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT (64 * 1024)
+#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024)
+#define BNXT_RE_MAX_CQ_COUNT (64 * 1024)
+
+#define BNXT_RE_UD_QP_HW_STALL 0x400000
+
+#define BNXT_RE_RQ_WQE_THRESHOLD 32
+
+/*
+ * Setting the default ack delay value to 16, which means
+ * the default timeout is approx. 260ms(4 usec * 2 ^(timeout))
+ */
+
+#define BNXT_RE_DEFAULT_ACK_DELAY 16
+
+struct bnxt_re_work {
+ struct work_struct work;
+ unsigned long event;
+ struct bnxt_re_dev *rdev;
+ struct net_device *vlan_dev;
+};
+
+struct bnxt_re_sqp_entries {
+ struct bnxt_qplib_sge sge;
+ u64 wrid;
+ /* For storing the actual qp1 cqe */
+ struct bnxt_qplib_cqe cqe;
+ struct bnxt_re_qp *qp1_qp;
+};
+
+#define BNXT_RE_MIN_MSIX 2
+#define BNXT_RE_MAX_MSIX 16
+#define BNXT_RE_AEQ_IDX 0
+#define BNXT_RE_NQ_IDX 1
+
+struct bnxt_re_dev {
+ struct ib_device ibdev;
+ struct list_head list;
+ unsigned long flags;
+#define BNXT_RE_FLAG_NETDEV_REGISTERED 0
+#define BNXT_RE_FLAG_IBDEV_REGISTERED 1
+#define BNXT_RE_FLAG_GOT_MSIX 2
+#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 8
+#define BNXT_RE_FLAG_QOS_WORK_REG 16
+ struct net_device *netdev;
+ unsigned int version, major, minor;
+ struct bnxt_en_dev *en_dev;
+ struct bnxt_msix_entry msix_entries[BNXT_RE_MAX_MSIX];
+ int num_msix;
+
+ int id;
+
+ struct delayed_work worker;
+ u8 cur_prio_map;
+
+ /* FP Notification Queue (CQ & SRQ) */
+ struct tasklet_struct nq_task;
+
+ /* RCFW Channel */
+ struct bnxt_qplib_rcfw rcfw;
+
+ /* NQ */
+ struct bnxt_qplib_nq nq;
+
+ /* Device Resources */
+ struct bnxt_qplib_dev_attr dev_attr;
+ struct bnxt_qplib_ctx qplib_ctx;
+ struct bnxt_qplib_res qplib_res;
+ struct bnxt_qplib_dpi dpi_privileged;
+
+ atomic_t qp_count;
+ struct mutex qp_lock; /* protect qp list */
+ struct list_head qp_list;
+
+ atomic_t cq_count;
+ atomic_t srq_count;
+ atomic_t mr_count;
+ atomic_t mw_count;
+ /* Max of 2 lossless traffic class supported per port */
+ u16 cosq[2];
+
+ /* QP for for handling QP1 packets */
+ u32 sqp_id;
+ struct bnxt_re_qp *qp1_sqp;
+ struct bnxt_re_ah *sqp_ah;
+ struct bnxt_re_sqp_entries sqp_tbl[1024];
+};
+
+#define to_bnxt_re_dev(ptr, member) \
+ container_of((ptr), struct bnxt_re_dev, member)
+
+#define BNXT_RE_ROCE_V1_PACKET 0
+#define BNXT_RE_ROCEV2_IPV4_PACKET 2
+#define BNXT_RE_ROCEV2_IPV6_PACKET 3
+
+static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
+{
+ if (rdev)
+ return &rdev->ibdev.dev;
+ return NULL;
+}
+
+#endif
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
new file mode 100644
index 000000000000..f0e01b3ac711
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -0,0 +1,3452 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: IB Verbs interpreter
+ */
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_cache.h>
+
+#include "bnxt_ulp.h"
+
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_sp.h"
+#include "qplib_fp.h"
+#include "qplib_rcfw.h"
+
+#include "bnxt_re.h"
+#include "ib_verbs.h"
+#include <rdma/bnxt_re-abi.h>
+
+static int __from_ib_access_flags(int iflags)
+{
+ int qflags = 0;
+
+ if (iflags & IB_ACCESS_LOCAL_WRITE)
+ qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_READ)
+ qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ;
+ if (iflags & IB_ACCESS_REMOTE_WRITE)
+ qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_ATOMIC)
+ qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC;
+ if (iflags & IB_ACCESS_MW_BIND)
+ qflags |= BNXT_QPLIB_ACCESS_MW_BIND;
+ if (iflags & IB_ZERO_BASED)
+ qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED;
+ if (iflags & IB_ACCESS_ON_DEMAND)
+ qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND;
+ return qflags;
+};
+
+static enum ib_access_flags __to_ib_access_flags(int qflags)
+{
+ enum ib_access_flags iflags = 0;
+
+ if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
+ iflags |= IB_ACCESS_LOCAL_WRITE;
+ if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE)
+ iflags |= IB_ACCESS_REMOTE_WRITE;
+ if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ)
+ iflags |= IB_ACCESS_REMOTE_READ;
+ if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC)
+ iflags |= IB_ACCESS_REMOTE_ATOMIC;
+ if (qflags & BNXT_QPLIB_ACCESS_MW_BIND)
+ iflags |= IB_ACCESS_MW_BIND;
+ if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED)
+ iflags |= IB_ZERO_BASED;
+ if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
+ iflags |= IB_ACCESS_ON_DEMAND;
+ return iflags;
+};
+
+static int bnxt_re_build_sgl(struct ib_sge *ib_sg_list,
+ struct bnxt_qplib_sge *sg_list, int num)
+{
+ int i, total = 0;
+
+ for (i = 0; i < num; i++) {
+ sg_list[i].addr = ib_sg_list[i].addr;
+ sg_list[i].lkey = ib_sg_list[i].lkey;
+ sg_list[i].size = ib_sg_list[i].length;
+ total += sg_list[i].size;
+ }
+ return total;
+}
+
+/* Device */
+struct net_device *bnxt_re_get_netdev(struct ib_device *ibdev, u8 port_num)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct net_device *netdev = NULL;
+
+ rcu_read_lock();
+ if (rdev)
+ netdev = rdev->netdev;
+ if (netdev)
+ dev_hold(netdev);
+
+ rcu_read_unlock();
+ return netdev;
+}
+
+int bnxt_re_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *ib_attr,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+
+ memset(ib_attr, 0, sizeof(*ib_attr));
+
+ ib_attr->fw_ver = (u64)(unsigned long)(dev_attr->fw_ver);
+ bnxt_qplib_get_guid(rdev->netdev->dev_addr,
+ (u8 *)&ib_attr->sys_image_guid);
+ ib_attr->max_mr_size = BNXT_RE_MAX_MR_SIZE;
+ ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K;
+
+ ib_attr->vendor_id = rdev->en_dev->pdev->vendor;
+ ib_attr->vendor_part_id = rdev->en_dev->pdev->device;
+ ib_attr->hw_ver = rdev->en_dev->pdev->subsystem_device;
+ ib_attr->max_qp = dev_attr->max_qp;
+ ib_attr->max_qp_wr = dev_attr->max_qp_wqes;
+ ib_attr->device_cap_flags =
+ IB_DEVICE_CURR_QP_STATE_MOD
+ | IB_DEVICE_RC_RNR_NAK_GEN
+ | IB_DEVICE_SHUTDOWN_PORT
+ | IB_DEVICE_SYS_IMAGE_GUID
+ | IB_DEVICE_LOCAL_DMA_LKEY
+ | IB_DEVICE_RESIZE_MAX_WR
+ | IB_DEVICE_PORT_ACTIVE_EVENT
+ | IB_DEVICE_N_NOTIFY_CQ
+ | IB_DEVICE_MEM_WINDOW
+ | IB_DEVICE_MEM_WINDOW_TYPE_2B
+ | IB_DEVICE_MEM_MGT_EXTENSIONS;
+ ib_attr->max_sge = dev_attr->max_qp_sges;
+ ib_attr->max_sge_rd = dev_attr->max_qp_sges;
+ ib_attr->max_cq = dev_attr->max_cq;
+ ib_attr->max_cqe = dev_attr->max_cq_wqes;
+ ib_attr->max_mr = dev_attr->max_mr;
+ ib_attr->max_pd = dev_attr->max_pd;
+ ib_attr->max_qp_rd_atom = dev_attr->max_qp_rd_atom;
+ ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_init_rd_atom;
+ if (dev_attr->is_atomic) {
+ ib_attr->atomic_cap = IB_ATOMIC_HCA;
+ ib_attr->masked_atomic_cap = IB_ATOMIC_HCA;
+ }
+
+ ib_attr->max_ee_rd_atom = 0;
+ ib_attr->max_res_rd_atom = 0;
+ ib_attr->max_ee_init_rd_atom = 0;
+ ib_attr->max_ee = 0;
+ ib_attr->max_rdd = 0;
+ ib_attr->max_mw = dev_attr->max_mw;
+ ib_attr->max_raw_ipv6_qp = 0;
+ ib_attr->max_raw_ethy_qp = dev_attr->max_raw_ethy_qp;
+ ib_attr->max_mcast_grp = 0;
+ ib_attr->max_mcast_qp_attach = 0;
+ ib_attr->max_total_mcast_qp_attach = 0;
+ ib_attr->max_ah = dev_attr->max_ah;
+
+ ib_attr->max_fmr = 0;
+ ib_attr->max_map_per_fmr = 0;
+
+ ib_attr->max_srq = dev_attr->max_srq;
+ ib_attr->max_srq_wr = dev_attr->max_srq_wqes;
+ ib_attr->max_srq_sge = dev_attr->max_srq_sges;
+
+ ib_attr->max_fast_reg_page_list_len = MAX_PBL_LVL_1_PGS;
+
+ ib_attr->max_pkeys = 1;
+ ib_attr->local_ca_ack_delay = BNXT_RE_DEFAULT_ACK_DELAY;
+ return 0;
+}
+
+int bnxt_re_modify_device(struct ib_device *ibdev,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify)
+{
+ switch (device_modify_mask) {
+ case IB_DEVICE_MODIFY_SYS_IMAGE_GUID:
+ /* Modify the GUID requires the modification of the GID table */
+ /* GUID should be made as READ-ONLY */
+ break;
+ case IB_DEVICE_MODIFY_NODE_DESC:
+ /* Node Desc should be made as READ-ONLY */
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void __to_ib_speed_width(struct net_device *netdev, u8 *speed, u8 *width)
+{
+ struct ethtool_link_ksettings lksettings;
+ u32 espeed;
+
+ if (netdev->ethtool_ops && netdev->ethtool_ops->get_link_ksettings) {
+ memset(&lksettings, 0, sizeof(lksettings));
+ rtnl_lock();
+ netdev->ethtool_ops->get_link_ksettings(netdev, &lksettings);
+ rtnl_unlock();
+ espeed = lksettings.base.speed;
+ } else {
+ espeed = SPEED_UNKNOWN;
+ }
+ switch (espeed) {
+ case SPEED_1000:
+ *speed = IB_SPEED_SDR;
+ *width = IB_WIDTH_1X;
+ break;
+ case SPEED_10000:
+ *speed = IB_SPEED_QDR;
+ *width = IB_WIDTH_1X;
+ break;
+ case SPEED_20000:
+ *speed = IB_SPEED_DDR;
+ *width = IB_WIDTH_4X;
+ break;
+ case SPEED_25000:
+ *speed = IB_SPEED_EDR;
+ *width = IB_WIDTH_1X;
+ break;
+ case SPEED_40000:
+ *speed = IB_SPEED_QDR;
+ *width = IB_WIDTH_4X;
+ break;
+ case SPEED_50000:
+ break;
+ default:
+ *speed = IB_SPEED_SDR;
+ *width = IB_WIDTH_1X;
+ break;
+ }
+}
+
+/* Port */
+int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_attr *port_attr)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+
+ memset(port_attr, 0, sizeof(*port_attr));
+
+ if (netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev)) {
+ port_attr->state = IB_PORT_ACTIVE;
+ port_attr->phys_state = 5;
+ } else {
+ port_attr->state = IB_PORT_DOWN;
+ port_attr->phys_state = 3;
+ }
+ port_attr->max_mtu = IB_MTU_4096;
+ port_attr->active_mtu = iboe_get_mtu(rdev->netdev->mtu);
+ port_attr->gid_tbl_len = dev_attr->max_sgid;
+ port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
+ IB_PORT_DEVICE_MGMT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP |
+ IB_PORT_IP_BASED_GIDS;
+
+ /* Max MSG size set to 2G for now */
+ port_attr->max_msg_sz = 0x80000000;
+ port_attr->bad_pkey_cntr = 0;
+ port_attr->qkey_viol_cntr = 0;
+ port_attr->pkey_tbl_len = dev_attr->max_pkey;
+ port_attr->lid = 0;
+ port_attr->sm_lid = 0;
+ port_attr->lmc = 0;
+ port_attr->max_vl_num = 4;
+ port_attr->sm_sl = 0;
+ port_attr->subnet_timeout = 0;
+ port_attr->init_type_reply = 0;
+ /* call the underlying netdev's ethtool hooks to query speed settings
+ * for which we acquire rtnl_lock _only_ if it's registered with
+ * IB stack to avoid race in the NETDEV_UNREG path
+ */
+ if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
+ __to_ib_speed_width(rdev->netdev, &port_attr->active_speed,
+ &port_attr->active_width);
+ return 0;
+}
+
+int bnxt_re_modify_port(struct ib_device *ibdev, u8 port_num,
+ int port_modify_mask,
+ struct ib_port_modify *port_modify)
+{
+ switch (port_modify_mask) {
+ case IB_PORT_SHUTDOWN:
+ break;
+ case IB_PORT_INIT_TYPE:
+ break;
+ case IB_PORT_RESET_QKEY_CNTR:
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr port_attr;
+
+ if (bnxt_re_query_port(ibdev, port_num, &port_attr))
+ return -EINVAL;
+
+ immutable->pkey_tbl_len = port_attr.pkey_tbl_len;
+ immutable->gid_tbl_len = port_attr.gid_tbl_len;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ return 0;
+}
+
+int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
+ u16 index, u16 *pkey)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+
+ /* Ignore port_num */
+
+ memset(pkey, 0, sizeof(*pkey));
+ return bnxt_qplib_get_pkey(&rdev->qplib_res,
+ &rdev->qplib_res.pkey_tbl, index, pkey);
+}
+
+int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
+ int index, union ib_gid *gid)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ int rc = 0;
+
+ /* Ignore port_num */
+ memset(gid, 0, sizeof(*gid));
+ rc = bnxt_qplib_get_sgid(&rdev->qplib_res,
+ &rdev->qplib_res.sgid_tbl, index,
+ (struct bnxt_qplib_gid *)gid);
+ return rc;
+}
+
+int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
+ unsigned int index, void **context)
+{
+ int rc = 0;
+ struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
+
+ /* Delete the entry from the hardware */
+ ctx = *context;
+ if (!ctx)
+ return -EINVAL;
+
+ if (sgid_tbl && sgid_tbl->active) {
+ if (ctx->idx >= sgid_tbl->max)
+ return -EINVAL;
+ ctx->refcnt--;
+ if (!ctx->refcnt) {
+ rc = bnxt_qplib_del_sgid(sgid_tbl,
+ &sgid_tbl->tbl[ctx->idx],
+ true);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to remove GID: %#x", rc);
+ } else {
+ ctx_tbl = sgid_tbl->ctx;
+ ctx_tbl[ctx->idx] = NULL;
+ kfree(ctx);
+ }
+ }
+ } else {
+ return -EINVAL;
+ }
+ return rc;
+}
+
+int bnxt_re_add_gid(struct ib_device *ibdev, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr, void **context)
+{
+ int rc;
+ u32 tbl_idx = 0;
+ u16 vlan_id = 0xFFFF;
+ struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
+
+ if ((attr->ndev) && is_vlan_dev(attr->ndev))
+ vlan_id = vlan_dev_vlan_id(attr->ndev);
+
+ rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)gid,
+ rdev->qplib_res.netdev->dev_addr,
+ vlan_id, true, &tbl_idx);
+ if (rc == -EALREADY) {
+ ctx_tbl = sgid_tbl->ctx;
+ ctx_tbl[tbl_idx]->refcnt++;
+ *context = ctx_tbl[tbl_idx];
+ return 0;
+ }
+
+ if (rc < 0) {
+ dev_err(rdev_to_dev(rdev), "Failed to add GID: %#x", rc);
+ return rc;
+ }
+
+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ ctx_tbl = sgid_tbl->ctx;
+ ctx->idx = tbl_idx;
+ ctx->refcnt = 1;
+ ctx_tbl[tbl_idx] = ctx;
+
+ return rc;
+}
+
+enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
+ u8 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+#define BNXT_RE_FENCE_PBL_SIZE DIV_ROUND_UP(BNXT_RE_FENCE_BYTES, PAGE_SIZE)
+
+static void bnxt_re_create_fence_wqe(struct bnxt_re_pd *pd)
+{
+ struct bnxt_re_fence_data *fence = &pd->fence;
+ struct ib_mr *ib_mr = &fence->mr->ib_mr;
+ struct bnxt_qplib_swqe *wqe = &fence->bind_wqe;
+
+ memset(wqe, 0, sizeof(*wqe));
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_BIND_MW;
+ wqe->wr_id = BNXT_QPLIB_FENCE_WRID;
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ wqe->bind.zero_based = false;
+ wqe->bind.parent_l_key = ib_mr->lkey;
+ wqe->bind.va = (u64)(unsigned long)fence->va;
+ wqe->bind.length = fence->size;
+ wqe->bind.access_cntl = __from_ib_access_flags(IB_ACCESS_REMOTE_READ);
+ wqe->bind.mw_type = SQ_BIND_MW_TYPE_TYPE1;
+
+ /* Save the initial rkey in fence structure for now;
+ * wqe->bind.r_key will be set at (re)bind time.
+ */
+ fence->bind_rkey = ib_inc_rkey(fence->mw->rkey);
+}
+
+static int bnxt_re_bind_fence_mw(struct bnxt_qplib_qp *qplib_qp)
+{
+ struct bnxt_re_qp *qp = container_of(qplib_qp, struct bnxt_re_qp,
+ qplib_qp);
+ struct ib_pd *ib_pd = qp->ib_qp.pd;
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_fence_data *fence = &pd->fence;
+ struct bnxt_qplib_swqe *fence_wqe = &fence->bind_wqe;
+ struct bnxt_qplib_swqe wqe;
+ int rc;
+
+ memcpy(&wqe, fence_wqe, sizeof(wqe));
+ wqe.bind.r_key = fence->bind_rkey;
+ fence->bind_rkey = ib_inc_rkey(fence->bind_rkey);
+
+ dev_dbg(rdev_to_dev(qp->rdev),
+ "Posting bind fence-WQE: rkey: %#x QP: %d PD: %p\n",
+ wqe.bind.r_key, qp->qplib_qp.id, pd);
+ rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+ if (rc) {
+ dev_err(rdev_to_dev(qp->rdev), "Failed to bind fence-WQE\n");
+ return rc;
+ }
+ bnxt_qplib_post_send_db(&qp->qplib_qp);
+
+ return rc;
+}
+
+static void bnxt_re_destroy_fence_mr(struct bnxt_re_pd *pd)
+{
+ struct bnxt_re_fence_data *fence = &pd->fence;
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct device *dev = &rdev->en_dev->pdev->dev;
+ struct bnxt_re_mr *mr = fence->mr;
+
+ if (fence->mw) {
+ bnxt_re_dealloc_mw(fence->mw);
+ fence->mw = NULL;
+ }
+ if (mr) {
+ if (mr->ib_mr.rkey)
+ bnxt_qplib_dereg_mrw(&rdev->qplib_res, &mr->qplib_mr,
+ true);
+ if (mr->ib_mr.lkey)
+ bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ kfree(mr);
+ fence->mr = NULL;
+ }
+ if (fence->dma_addr) {
+ dma_unmap_single(dev, fence->dma_addr, BNXT_RE_FENCE_BYTES,
+ DMA_BIDIRECTIONAL);
+ fence->dma_addr = 0;
+ }
+}
+
+static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
+{
+ int mr_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_MW_BIND;
+ struct bnxt_re_fence_data *fence = &pd->fence;
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct device *dev = &rdev->en_dev->pdev->dev;
+ struct bnxt_re_mr *mr = NULL;
+ dma_addr_t dma_addr = 0;
+ struct ib_mw *mw;
+ u64 pbl_tbl;
+ int rc;
+
+ dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES,
+ DMA_BIDIRECTIONAL);
+ rc = dma_mapping_error(dev, dma_addr);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to dma-map fence-MR-mem\n");
+ rc = -EIO;
+ fence->dma_addr = 0;
+ goto fail;
+ }
+ fence->dma_addr = dma_addr;
+
+ /* Allocate a MR */
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ fence->mr = mr;
+ mr->rdev = rdev;
+ mr->qplib_mr.pd = &pd->qplib_pd;
+ mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+ mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to alloc fence-HW-MR\n");
+ goto fail;
+ }
+
+ /* Register MR */
+ mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ mr->qplib_mr.va = (u64)(unsigned long)fence->va;
+ mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES;
+ pbl_tbl = dma_addr;
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl,
+ BNXT_RE_FENCE_PBL_SIZE, false);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to register fence-MR\n");
+ goto fail;
+ }
+ mr->ib_mr.rkey = mr->qplib_mr.rkey;
+
+ /* Create a fence MW only for kernel consumers */
+ mw = bnxt_re_alloc_mw(&pd->ib_pd, IB_MW_TYPE_1, NULL);
+ if (IS_ERR(mw)) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to create fence-MW for PD: %p\n", pd);
+ rc = PTR_ERR(mw);
+ goto fail;
+ }
+ fence->mw = mw;
+
+ bnxt_re_create_fence_wqe(pd);
+ return 0;
+
+fail:
+ bnxt_re_destroy_fence_mr(pd);
+ return rc;
+}
+
+/* Protection Domains */
+int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ int rc;
+
+ bnxt_re_destroy_fence_mr(pd);
+
+ if (pd->qplib_pd.id) {
+ rc = bnxt_qplib_dealloc_pd(&rdev->qplib_res,
+ &rdev->qplib_res.pd_tbl,
+ &pd->qplib_pd);
+ if (rc)
+ dev_err(rdev_to_dev(rdev), "Failed to deallocate HW PD");
+ }
+
+ kfree(pd);
+ return 0;
+}
+
+struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *ucontext,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_re_ucontext *ucntx = container_of(ucontext,
+ struct bnxt_re_ucontext,
+ ib_uctx);
+ struct bnxt_re_pd *pd;
+ int rc;
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ pd->rdev = rdev;
+ if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) {
+ dev_err(rdev_to_dev(rdev), "Failed to allocate HW PD");
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ if (udata) {
+ struct bnxt_re_pd_resp resp;
+
+ if (!ucntx->dpi.dbr) {
+ /* Allocate DPI in alloc_pd to avoid failing of
+ * ibv_devinfo and family of application when DPIs
+ * are depleted.
+ */
+ if (bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl,
+ &ucntx->dpi, ucntx)) {
+ rc = -ENOMEM;
+ goto dbfail;
+ }
+ }
+
+ resp.pdid = pd->qplib_pd.id;
+ /* Still allow mapping this DBR to the new user PD. */
+ resp.dpi = ucntx->dpi.dpi;
+ resp.dbr = (u64)ucntx->dpi.umdbr;
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to copy user response\n");
+ goto dbfail;
+ }
+ }
+
+ if (!udata)
+ if (bnxt_re_create_fence_mr(pd))
+ dev_warn(rdev_to_dev(rdev),
+ "Failed to create Fence-MR\n");
+ return &pd->ib_pd;
+dbfail:
+ (void)bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
+ &pd->qplib_pd);
+fail:
+ kfree(pd);
+ return ERR_PTR(rc);
+}
+
+/* Address Handles */
+int bnxt_re_destroy_ah(struct ib_ah *ib_ah)
+{
+ struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
+ struct bnxt_re_dev *rdev = ah->rdev;
+ int rc;
+
+ rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to destroy HW AH");
+ return rc;
+ }
+ kfree(ah);
+ return 0;
+}
+
+struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
+ struct rdma_ah_attr *ah_attr,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_ah *ah;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+ int rc;
+ u16 vlan_tag;
+ u8 nw_type;
+
+ struct ib_gid_attr sgid_attr;
+
+ if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) {
+ dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set");
+ return ERR_PTR(-EINVAL);
+ }
+ ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
+ if (!ah)
+ return ERR_PTR(-ENOMEM);
+
+ ah->rdev = rdev;
+ ah->qplib_ah.pd = &pd->qplib_pd;
+
+ /* Supply the configuration for the HW */
+ memcpy(ah->qplib_ah.dgid.data, grh->dgid.raw,
+ sizeof(union ib_gid));
+ /*
+ * If RoCE V2 is enabled, stack will have two entries for
+ * each GID entry. Avoiding this duplicte entry in HW. Dividing
+ * the GID index by 2 for RoCE V2
+ */
+ ah->qplib_ah.sgid_index = grh->sgid_index / 2;
+ ah->qplib_ah.host_sgid_index = grh->sgid_index;
+ ah->qplib_ah.traffic_class = grh->traffic_class;
+ ah->qplib_ah.flow_label = grh->flow_label;
+ ah->qplib_ah.hop_limit = grh->hop_limit;
+ ah->qplib_ah.sl = rdma_ah_get_sl(ah_attr);
+ if (ib_pd->uobject &&
+ !rdma_is_multicast_addr((struct in6_addr *)
+ grh->dgid.raw) &&
+ !rdma_link_local_addr((struct in6_addr *)
+ grh->dgid.raw)) {
+ union ib_gid sgid;
+
+ rc = ib_get_cached_gid(&rdev->ibdev, 1,
+ grh->sgid_index, &sgid,
+ &sgid_attr);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to query gid at index %d",
+ grh->sgid_index);
+ goto fail;
+ }
+ if (sgid_attr.ndev) {
+ if (is_vlan_dev(sgid_attr.ndev))
+ vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
+ dev_put(sgid_attr.ndev);
+ }
+ /* Get network header type for this GID */
+ nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+ switch (nw_type) {
+ case RDMA_NETWORK_IPV4:
+ ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4;
+ break;
+ case RDMA_NETWORK_IPV6:
+ ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV6;
+ break;
+ default:
+ ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V1;
+ break;
+ }
+ rc = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+ ah_attr->roce.dmac, &vlan_tag,
+ &sgid_attr.ndev->ifindex,
+ NULL);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to get dmac\n");
+ goto fail;
+ }
+ }
+
+ memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN);
+ rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH");
+ goto fail;
+ }
+
+ /* Write AVID to shared page. */
+ if (ib_pd->uobject) {
+ struct ib_ucontext *ib_uctx = ib_pd->uobject->context;
+ struct bnxt_re_ucontext *uctx;
+ unsigned long flag;
+ u32 *wrptr;
+
+ uctx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx);
+ spin_lock_irqsave(&uctx->sh_lock, flag);
+ wrptr = (u32 *)(uctx->shpg + BNXT_RE_AVID_OFFT);
+ *wrptr = ah->qplib_ah.id;
+ wmb(); /* make sure cache is updated. */
+ spin_unlock_irqrestore(&uctx->sh_lock, flag);
+ }
+
+ return &ah->ib_ah;
+
+fail:
+ kfree(ah);
+ return ERR_PTR(rc);
+}
+
+int bnxt_re_modify_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
+{
+ return 0;
+}
+
+int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
+{
+ struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
+
+ ah_attr->type = ib_ah->type;
+ rdma_ah_set_sl(ah_attr, ah->qplib_ah.sl);
+ memcpy(ah_attr->roce.dmac, ah->qplib_ah.dmac, ETH_ALEN);
+ rdma_ah_set_grh(ah_attr, NULL, 0,
+ ah->qplib_ah.host_sgid_index,
+ 0, ah->qplib_ah.traffic_class);
+ rdma_ah_set_dgid_raw(ah_attr, ah->qplib_ah.dgid.data);
+ rdma_ah_set_port_num(ah_attr, 1);
+ rdma_ah_set_static_rate(ah_attr, 0);
+ return 0;
+}
+
+/* Queue Pairs */
+int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_re_dev *rdev = qp->rdev;
+ int rc;
+
+ rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to destroy HW QP");
+ return rc;
+ }
+ if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) {
+ rc = bnxt_qplib_destroy_ah(&rdev->qplib_res,
+ &rdev->sqp_ah->qplib_ah);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to destroy HW AH for shadow QP");
+ return rc;
+ }
+
+ rc = bnxt_qplib_destroy_qp(&rdev->qplib_res,
+ &rdev->qp1_sqp->qplib_qp);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to destroy Shadow QP");
+ return rc;
+ }
+ mutex_lock(&rdev->qp_lock);
+ list_del(&rdev->qp1_sqp->list);
+ atomic_dec(&rdev->qp_count);
+ mutex_unlock(&rdev->qp_lock);
+
+ kfree(rdev->sqp_ah);
+ kfree(rdev->qp1_sqp);
+ }
+
+ if (!IS_ERR_OR_NULL(qp->rumem))
+ ib_umem_release(qp->rumem);
+ if (!IS_ERR_OR_NULL(qp->sumem))
+ ib_umem_release(qp->sumem);
+
+ mutex_lock(&rdev->qp_lock);
+ list_del(&qp->list);
+ atomic_dec(&rdev->qp_count);
+ mutex_unlock(&rdev->qp_lock);
+ kfree(qp);
+ return 0;
+}
+
+static u8 __from_ib_qp_type(enum ib_qp_type type)
+{
+ switch (type) {
+ case IB_QPT_GSI:
+ return CMDQ_CREATE_QP1_TYPE_GSI;
+ case IB_QPT_RC:
+ return CMDQ_CREATE_QP_TYPE_RC;
+ case IB_QPT_UD:
+ return CMDQ_CREATE_QP_TYPE_UD;
+ default:
+ return IB_QPT_MAX;
+ }
+}
+
+static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
+ struct bnxt_re_qp *qp, struct ib_udata *udata)
+{
+ struct bnxt_re_qp_req ureq;
+ struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp;
+ struct ib_umem *umem;
+ int bytes = 0;
+ struct ib_ucontext *context = pd->ib_pd.uobject->context;
+ struct bnxt_re_ucontext *cntx = container_of(context,
+ struct bnxt_re_ucontext,
+ ib_uctx);
+ if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
+ return -EFAULT;
+
+ bytes = (qplib_qp->sq.max_wqe * BNXT_QPLIB_MAX_SQE_ENTRY_SIZE);
+ /* Consider mapping PSN search memory only for RC QPs. */
+ if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC)
+ bytes += (qplib_qp->sq.max_wqe * sizeof(struct sq_psn_search));
+ bytes = PAGE_ALIGN(bytes);
+ umem = ib_umem_get(context, ureq.qpsva, bytes,
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (IS_ERR(umem))
+ return PTR_ERR(umem);
+
+ qp->sumem = umem;
+ qplib_qp->sq.sglist = umem->sg_head.sgl;
+ qplib_qp->sq.nmap = umem->nmap;
+ qplib_qp->qp_handle = ureq.qp_handle;
+
+ if (!qp->qplib_qp.srq) {
+ bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+ bytes = PAGE_ALIGN(bytes);
+ umem = ib_umem_get(context, ureq.qprva, bytes,
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (IS_ERR(umem))
+ goto rqfail;
+ qp->rumem = umem;
+ qplib_qp->rq.sglist = umem->sg_head.sgl;
+ qplib_qp->rq.nmap = umem->nmap;
+ }
+
+ qplib_qp->dpi = &cntx->dpi;
+ return 0;
+rqfail:
+ ib_umem_release(qp->sumem);
+ qp->sumem = NULL;
+ qplib_qp->sq.sglist = NULL;
+ qplib_qp->sq.nmap = 0;
+
+ return PTR_ERR(umem);
+}
+
+static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah
+ (struct bnxt_re_pd *pd,
+ struct bnxt_qplib_res *qp1_res,
+ struct bnxt_qplib_qp *qp1_qp)
+{
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_ah *ah;
+ union ib_gid sgid;
+ int rc;
+
+ ah = kzalloc(sizeof(*ah), GFP_KERNEL);
+ if (!ah)
+ return NULL;
+
+ memset(ah, 0, sizeof(*ah));
+ ah->rdev = rdev;
+ ah->qplib_ah.pd = &pd->qplib_pd;
+
+ rc = bnxt_re_query_gid(&rdev->ibdev, 1, 0, &sgid);
+ if (rc)
+ goto fail;
+
+ /* supply the dgid data same as sgid */
+ memcpy(ah->qplib_ah.dgid.data, &sgid.raw,
+ sizeof(union ib_gid));
+ ah->qplib_ah.sgid_index = 0;
+
+ ah->qplib_ah.traffic_class = 0;
+ ah->qplib_ah.flow_label = 0;
+ ah->qplib_ah.hop_limit = 1;
+ ah->qplib_ah.sl = 0;
+ /* Have DMAC same as SMAC */
+ ether_addr_copy(ah->qplib_ah.dmac, rdev->netdev->dev_addr);
+
+ rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to allocate HW AH for Shadow QP");
+ goto fail;
+ }
+
+ return ah;
+
+fail:
+ kfree(ah);
+ return NULL;
+}
+
+static struct bnxt_re_qp *bnxt_re_create_shadow_qp
+ (struct bnxt_re_pd *pd,
+ struct bnxt_qplib_res *qp1_res,
+ struct bnxt_qplib_qp *qp1_qp)
+{
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_qp *qp;
+ int rc;
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return NULL;
+
+ memset(qp, 0, sizeof(*qp));
+ qp->rdev = rdev;
+
+ /* Initialize the shadow QP structure from the QP1 values */
+ ether_addr_copy(qp->qplib_qp.smac, rdev->netdev->dev_addr);
+
+ qp->qplib_qp.pd = &pd->qplib_pd;
+ qp->qplib_qp.qp_handle = (u64)(unsigned long)(&qp->qplib_qp);
+ qp->qplib_qp.type = IB_QPT_UD;
+
+ qp->qplib_qp.max_inline_data = 0;
+ qp->qplib_qp.sig_type = true;
+
+ /* Shadow QP SQ depth should be same as QP1 RQ depth */
+ qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe;
+ qp->qplib_qp.sq.max_sge = 2;
+ /* Q full delta can be 1 since it is internal QP */
+ qp->qplib_qp.sq.q_full_delta = 1;
+
+ qp->qplib_qp.scq = qp1_qp->scq;
+ qp->qplib_qp.rcq = qp1_qp->rcq;
+
+ qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe;
+ qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge;
+ /* Q full delta can be 1 since it is internal QP */
+ qp->qplib_qp.rq.q_full_delta = 1;
+
+ qp->qplib_qp.mtu = qp1_qp->mtu;
+
+ qp->qplib_qp.sq_hdr_buf_size = 0;
+ qp->qplib_qp.rq_hdr_buf_size = BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6;
+ qp->qplib_qp.dpi = &rdev->dpi_privileged;
+
+ rc = bnxt_qplib_create_qp(qp1_res, &qp->qplib_qp);
+ if (rc)
+ goto fail;
+
+ rdev->sqp_id = qp->qplib_qp.id;
+
+ spin_lock_init(&qp->sq_lock);
+ INIT_LIST_HEAD(&qp->list);
+ mutex_lock(&rdev->qp_lock);
+ list_add_tail(&qp->list, &rdev->qp_list);
+ atomic_inc(&rdev->qp_count);
+ mutex_unlock(&rdev->qp_lock);
+ return qp;
+fail:
+ kfree(qp);
+ return NULL;
+}
+
+struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ struct bnxt_re_qp *qp;
+ struct bnxt_re_cq *cq;
+ int rc, entries;
+
+ if ((qp_init_attr->cap.max_send_wr > dev_attr->max_qp_wqes) ||
+ (qp_init_attr->cap.max_recv_wr > dev_attr->max_qp_wqes) ||
+ (qp_init_attr->cap.max_send_sge > dev_attr->max_qp_sges) ||
+ (qp_init_attr->cap.max_recv_sge > dev_attr->max_qp_sges) ||
+ (qp_init_attr->cap.max_inline_data > dev_attr->max_inline_data))
+ return ERR_PTR(-EINVAL);
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ qp->rdev = rdev;
+ ether_addr_copy(qp->qplib_qp.smac, rdev->netdev->dev_addr);
+ qp->qplib_qp.pd = &pd->qplib_pd;
+ qp->qplib_qp.qp_handle = (u64)(unsigned long)(&qp->qplib_qp);
+ qp->qplib_qp.type = __from_ib_qp_type(qp_init_attr->qp_type);
+ if (qp->qplib_qp.type == IB_QPT_MAX) {
+ dev_err(rdev_to_dev(rdev), "QP type 0x%x not supported",
+ qp->qplib_qp.type);
+ rc = -EINVAL;
+ goto fail;
+ }
+ qp->qplib_qp.max_inline_data = qp_init_attr->cap.max_inline_data;
+ qp->qplib_qp.sig_type = ((qp_init_attr->sq_sig_type ==
+ IB_SIGNAL_ALL_WR) ? true : false);
+
+ qp->qplib_qp.sq.max_sge = qp_init_attr->cap.max_send_sge;
+ if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges)
+ qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges;
+
+ if (qp_init_attr->send_cq) {
+ cq = container_of(qp_init_attr->send_cq, struct bnxt_re_cq,
+ ib_cq);
+ if (!cq) {
+ dev_err(rdev_to_dev(rdev), "Send CQ not found");
+ rc = -EINVAL;
+ goto fail;
+ }
+ qp->qplib_qp.scq = &cq->qplib_cq;
+ }
+
+ if (qp_init_attr->recv_cq) {
+ cq = container_of(qp_init_attr->recv_cq, struct bnxt_re_cq,
+ ib_cq);
+ if (!cq) {
+ dev_err(rdev_to_dev(rdev), "Receive CQ not found");
+ rc = -EINVAL;
+ goto fail;
+ }
+ qp->qplib_qp.rcq = &cq->qplib_cq;
+ }
+
+ if (qp_init_attr->srq) {
+ dev_err(rdev_to_dev(rdev), "SRQ not supported");
+ rc = -ENOTSUPP;
+ goto fail;
+ } else {
+ /* Allocate 1 more than what's provided so posting max doesn't
+ * mean empty
+ */
+ entries = roundup_pow_of_two(qp_init_attr->cap.max_recv_wr + 1);
+ qp->qplib_qp.rq.max_wqe = min_t(u32, entries,
+ dev_attr->max_qp_wqes + 1);
+
+ qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
+ qp_init_attr->cap.max_recv_wr;
+
+ qp->qplib_qp.rq.max_sge = qp_init_attr->cap.max_recv_sge;
+ if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
+ qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
+ }
+
+ qp->qplib_qp.mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
+
+ if (qp_init_attr->qp_type == IB_QPT_GSI) {
+ /* Allocate 1 more than what's provided */
+ entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
+ qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+ dev_attr->max_qp_wqes + 1);
+ qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
+ qp_init_attr->cap.max_send_wr;
+ qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
+ if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
+ qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
+ qp->qplib_qp.sq.max_sge++;
+ if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges)
+ qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges;
+
+ qp->qplib_qp.rq_hdr_buf_size =
+ BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2;
+
+ qp->qplib_qp.sq_hdr_buf_size =
+ BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE_V2;
+ qp->qplib_qp.dpi = &rdev->dpi_privileged;
+ rc = bnxt_qplib_create_qp1(&rdev->qplib_res, &qp->qplib_qp);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to create HW QP1");
+ goto fail;
+ }
+ /* Create a shadow QP to handle the QP1 traffic */
+ rdev->qp1_sqp = bnxt_re_create_shadow_qp(pd, &rdev->qplib_res,
+ &qp->qplib_qp);
+ if (!rdev->qp1_sqp) {
+ rc = -EINVAL;
+ dev_err(rdev_to_dev(rdev),
+ "Failed to create Shadow QP for QP1");
+ goto qp_destroy;
+ }
+ rdev->sqp_ah = bnxt_re_create_shadow_qp_ah(pd, &rdev->qplib_res,
+ &qp->qplib_qp);
+ if (!rdev->sqp_ah) {
+ bnxt_qplib_destroy_qp(&rdev->qplib_res,
+ &rdev->qp1_sqp->qplib_qp);
+ rc = -EINVAL;
+ dev_err(rdev_to_dev(rdev),
+ "Failed to create AH entry for ShadowQP");
+ goto qp_destroy;
+ }
+
+ } else {
+ /* Allocate 128 + 1 more than what's provided */
+ entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr +
+ BNXT_QPLIB_RESERVED_QP_WRS + 1);
+ qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+ dev_attr->max_qp_wqes +
+ BNXT_QPLIB_RESERVED_QP_WRS + 1);
+ qp->qplib_qp.sq.q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1;
+
+ /*
+ * Reserving one slot for Phantom WQE. Application can
+ * post one extra entry in this case. But allowing this to avoid
+ * unexpected Queue full condition
+ */
+
+ qp->qplib_qp.sq.q_full_delta -= 1;
+
+ qp->qplib_qp.max_rd_atomic = dev_attr->max_qp_rd_atom;
+ qp->qplib_qp.max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom;
+ if (udata) {
+ rc = bnxt_re_init_user_qp(rdev, pd, qp, udata);
+ if (rc)
+ goto fail;
+ } else {
+ qp->qplib_qp.dpi = &rdev->dpi_privileged;
+ }
+
+ rc = bnxt_qplib_create_qp(&rdev->qplib_res, &qp->qplib_qp);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to create HW QP");
+ goto fail;
+ }
+ }
+
+ qp->ib_qp.qp_num = qp->qplib_qp.id;
+ spin_lock_init(&qp->sq_lock);
+ spin_lock_init(&qp->rq_lock);
+
+ if (udata) {
+ struct bnxt_re_qp_resp resp;
+
+ resp.qpid = qp->ib_qp.qp_num;
+ resp.rsvd = 0;
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to copy QP udata");
+ goto qp_destroy;
+ }
+ }
+ INIT_LIST_HEAD(&qp->list);
+ mutex_lock(&rdev->qp_lock);
+ list_add_tail(&qp->list, &rdev->qp_list);
+ atomic_inc(&rdev->qp_count);
+ mutex_unlock(&rdev->qp_lock);
+
+ return &qp->ib_qp;
+qp_destroy:
+ bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
+fail:
+ kfree(qp);
+ return ERR_PTR(rc);
+}
+
+static u8 __from_ib_qp_state(enum ib_qp_state state)
+{
+ switch (state) {
+ case IB_QPS_RESET:
+ return CMDQ_MODIFY_QP_NEW_STATE_RESET;
+ case IB_QPS_INIT:
+ return CMDQ_MODIFY_QP_NEW_STATE_INIT;
+ case IB_QPS_RTR:
+ return CMDQ_MODIFY_QP_NEW_STATE_RTR;
+ case IB_QPS_RTS:
+ return CMDQ_MODIFY_QP_NEW_STATE_RTS;
+ case IB_QPS_SQD:
+ return CMDQ_MODIFY_QP_NEW_STATE_SQD;
+ case IB_QPS_SQE:
+ return CMDQ_MODIFY_QP_NEW_STATE_SQE;
+ case IB_QPS_ERR:
+ default:
+ return CMDQ_MODIFY_QP_NEW_STATE_ERR;
+ }
+}
+
+static enum ib_qp_state __to_ib_qp_state(u8 state)
+{
+ switch (state) {
+ case CMDQ_MODIFY_QP_NEW_STATE_RESET:
+ return IB_QPS_RESET;
+ case CMDQ_MODIFY_QP_NEW_STATE_INIT:
+ return IB_QPS_INIT;
+ case CMDQ_MODIFY_QP_NEW_STATE_RTR:
+ return IB_QPS_RTR;
+ case CMDQ_MODIFY_QP_NEW_STATE_RTS:
+ return IB_QPS_RTS;
+ case CMDQ_MODIFY_QP_NEW_STATE_SQD:
+ return IB_QPS_SQD;
+ case CMDQ_MODIFY_QP_NEW_STATE_SQE:
+ return IB_QPS_SQE;
+ case CMDQ_MODIFY_QP_NEW_STATE_ERR:
+ default:
+ return IB_QPS_ERR;
+ }
+}
+
+static u32 __from_ib_mtu(enum ib_mtu mtu)
+{
+ switch (mtu) {
+ case IB_MTU_256:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_256;
+ case IB_MTU_512:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_512;
+ case IB_MTU_1024:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_1024;
+ case IB_MTU_2048:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
+ case IB_MTU_4096:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_4096;
+ default:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
+ }
+}
+
+static enum ib_mtu __to_ib_mtu(u32 mtu)
+{
+ switch (mtu & CREQ_QUERY_QP_RESP_SB_PATH_MTU_MASK) {
+ case CMDQ_MODIFY_QP_PATH_MTU_MTU_256:
+ return IB_MTU_256;
+ case CMDQ_MODIFY_QP_PATH_MTU_MTU_512:
+ return IB_MTU_512;
+ case CMDQ_MODIFY_QP_PATH_MTU_MTU_1024:
+ return IB_MTU_1024;
+ case CMDQ_MODIFY_QP_PATH_MTU_MTU_2048:
+ return IB_MTU_2048;
+ case CMDQ_MODIFY_QP_PATH_MTU_MTU_4096:
+ return IB_MTU_4096;
+ default:
+ return IB_MTU_2048;
+ }
+}
+
+static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
+ struct bnxt_re_qp *qp1_qp,
+ int qp_attr_mask)
+{
+ struct bnxt_re_qp *qp = rdev->qp1_sqp;
+ int rc = 0;
+
+ if (qp_attr_mask & IB_QP_STATE) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE;
+ qp->qplib_qp.state = qp1_qp->qplib_qp.state;
+ }
+ if (qp_attr_mask & IB_QP_PKEY_INDEX) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
+ qp->qplib_qp.pkey_index = qp1_qp->qplib_qp.pkey_index;
+ }
+
+ if (qp_attr_mask & IB_QP_QKEY) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY;
+ /* Using a Random QKEY */
+ qp->qplib_qp.qkey = 0x81818181;
+ }
+ if (qp_attr_mask & IB_QP_SQ_PSN) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN;
+ qp->qplib_qp.sq.psn = qp1_qp->qplib_qp.sq.psn;
+ }
+
+ rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
+ if (rc)
+ dev_err(rdev_to_dev(rdev),
+ "Failed to modify Shadow QP for QP1");
+ return rc;
+}
+
+int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_udata *udata)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_re_dev *rdev = qp->rdev;
+ struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ enum ib_qp_state curr_qp_state, new_qp_state;
+ int rc, entries;
+ int status;
+ union ib_gid sgid;
+ struct ib_gid_attr sgid_attr;
+ u8 nw_type;
+
+ qp->qplib_qp.modify_flags = 0;
+ if (qp_attr_mask & IB_QP_STATE) {
+ curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state);
+ new_qp_state = qp_attr->qp_state;
+ if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state,
+ ib_qp->qp_type, qp_attr_mask,
+ IB_LINK_LAYER_ETHERNET)) {
+ dev_err(rdev_to_dev(rdev),
+ "Invalid attribute mask: %#x specified ",
+ qp_attr_mask);
+ dev_err(rdev_to_dev(rdev),
+ "for qpn: %#x type: %#x",
+ ib_qp->qp_num, ib_qp->qp_type);
+ dev_err(rdev_to_dev(rdev),
+ "curr_qp_state=0x%x, new_qp_state=0x%x\n",
+ curr_qp_state, new_qp_state);
+ return -EINVAL;
+ }
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE;
+ qp->qplib_qp.state = __from_ib_qp_state(qp_attr->qp_state);
+ }
+ if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_EN_SQD_ASYNC_NOTIFY;
+ qp->qplib_qp.en_sqd_async_notify = true;
+ }
+ if (qp_attr_mask & IB_QP_ACCESS_FLAGS) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS;
+ qp->qplib_qp.access =
+ __from_ib_access_flags(qp_attr->qp_access_flags);
+ /* LOCAL_WRITE access must be set to allow RC receive */
+ qp->qplib_qp.access |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
+ }
+ if (qp_attr_mask & IB_QP_PKEY_INDEX) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
+ qp->qplib_qp.pkey_index = qp_attr->pkey_index;
+ }
+ if (qp_attr_mask & IB_QP_QKEY) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY;
+ qp->qplib_qp.qkey = qp_attr->qkey;
+ }
+ if (qp_attr_mask & IB_QP_AV) {
+ const struct ib_global_route *grh =
+ rdma_ah_read_grh(&qp_attr->ah_attr);
+
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID |
+ CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL |
+ CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX |
+ CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT |
+ CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS |
+ CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC |
+ CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID;
+ memcpy(qp->qplib_qp.ah.dgid.data, grh->dgid.raw,
+ sizeof(qp->qplib_qp.ah.dgid.data));
+ qp->qplib_qp.ah.flow_label = grh->flow_label;
+ /* If RoCE V2 is enabled, stack will have two entries for
+ * each GID entry. Avoiding this duplicte entry in HW. Dividing
+ * the GID index by 2 for RoCE V2
+ */
+ qp->qplib_qp.ah.sgid_index = grh->sgid_index / 2;
+ qp->qplib_qp.ah.host_sgid_index = grh->sgid_index;
+ qp->qplib_qp.ah.hop_limit = grh->hop_limit;
+ qp->qplib_qp.ah.traffic_class = grh->traffic_class;
+ qp->qplib_qp.ah.sl = rdma_ah_get_sl(&qp_attr->ah_attr);
+ ether_addr_copy(qp->qplib_qp.ah.dmac,
+ qp_attr->ah_attr.roce.dmac);
+
+ status = ib_get_cached_gid(&rdev->ibdev, 1,
+ grh->sgid_index,
+ &sgid, &sgid_attr);
+ if (!status && sgid_attr.ndev) {
+ memcpy(qp->qplib_qp.smac, sgid_attr.ndev->dev_addr,
+ ETH_ALEN);
+ dev_put(sgid_attr.ndev);
+ nw_type = ib_gid_to_network_type(sgid_attr.gid_type,
+ &sgid);
+ switch (nw_type) {
+ case RDMA_NETWORK_IPV4:
+ qp->qplib_qp.nw_type =
+ CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4;
+ break;
+ case RDMA_NETWORK_IPV6:
+ qp->qplib_qp.nw_type =
+ CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6;
+ break;
+ default:
+ qp->qplib_qp.nw_type =
+ CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1;
+ break;
+ }
+ }
+ }
+
+ if (qp_attr_mask & IB_QP_PATH_MTU) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
+ qp->qplib_qp.path_mtu = __from_ib_mtu(qp_attr->path_mtu);
+ } else if (qp_attr->qp_state == IB_QPS_RTR) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
+ qp->qplib_qp.path_mtu =
+ __from_ib_mtu(iboe_get_mtu(rdev->netdev->mtu));
+ }
+
+ if (qp_attr_mask & IB_QP_TIMEOUT) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT;
+ qp->qplib_qp.timeout = qp_attr->timeout;
+ }
+ if (qp_attr_mask & IB_QP_RETRY_CNT) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_RETRY_CNT;
+ qp->qplib_qp.retry_cnt = qp_attr->retry_cnt;
+ }
+ if (qp_attr_mask & IB_QP_RNR_RETRY) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_RNR_RETRY;
+ qp->qplib_qp.rnr_retry = qp_attr->rnr_retry;
+ }
+ if (qp_attr_mask & IB_QP_MIN_RNR_TIMER) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER;
+ qp->qplib_qp.min_rnr_timer = qp_attr->min_rnr_timer;
+ }
+ if (qp_attr_mask & IB_QP_RQ_PSN) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN;
+ qp->qplib_qp.rq.psn = qp_attr->rq_psn;
+ }
+ if (qp_attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC;
+ /* Cap the max_rd_atomic to device max */
+ qp->qplib_qp.max_rd_atomic = min_t(u32, qp_attr->max_rd_atomic,
+ dev_attr->max_qp_rd_atom);
+ }
+ if (qp_attr_mask & IB_QP_SQ_PSN) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN;
+ qp->qplib_qp.sq.psn = qp_attr->sq_psn;
+ }
+ if (qp_attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ if (qp_attr->max_dest_rd_atomic >
+ dev_attr->max_qp_init_rd_atom) {
+ dev_err(rdev_to_dev(rdev),
+ "max_dest_rd_atomic requested%d is > dev_max%d",
+ qp_attr->max_dest_rd_atomic,
+ dev_attr->max_qp_init_rd_atom);
+ return -EINVAL;
+ }
+
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC;
+ qp->qplib_qp.max_dest_rd_atomic = qp_attr->max_dest_rd_atomic;
+ }
+ if (qp_attr_mask & IB_QP_CAP) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SIZE |
+ CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SIZE |
+ CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SGE |
+ CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SGE |
+ CMDQ_MODIFY_QP_MODIFY_MASK_MAX_INLINE_DATA;
+ if ((qp_attr->cap.max_send_wr >= dev_attr->max_qp_wqes) ||
+ (qp_attr->cap.max_recv_wr >= dev_attr->max_qp_wqes) ||
+ (qp_attr->cap.max_send_sge >= dev_attr->max_qp_sges) ||
+ (qp_attr->cap.max_recv_sge >= dev_attr->max_qp_sges) ||
+ (qp_attr->cap.max_inline_data >=
+ dev_attr->max_inline_data)) {
+ dev_err(rdev_to_dev(rdev),
+ "Create QP failed - max exceeded");
+ return -EINVAL;
+ }
+ entries = roundup_pow_of_two(qp_attr->cap.max_send_wr);
+ qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+ dev_attr->max_qp_wqes + 1);
+ qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
+ qp_attr->cap.max_send_wr;
+ /*
+ * Reserving one slot for Phantom WQE. Some application can
+ * post one extra entry in this case. Allowing this to avoid
+ * unexpected Queue full condition
+ */
+ qp->qplib_qp.sq.q_full_delta -= 1;
+ qp->qplib_qp.sq.max_sge = qp_attr->cap.max_send_sge;
+ if (qp->qplib_qp.rq.max_wqe) {
+ entries = roundup_pow_of_two(qp_attr->cap.max_recv_wr);
+ qp->qplib_qp.rq.max_wqe =
+ min_t(u32, entries, dev_attr->max_qp_wqes + 1);
+ qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
+ qp_attr->cap.max_recv_wr;
+ qp->qplib_qp.rq.max_sge = qp_attr->cap.max_recv_sge;
+ } else {
+ /* SRQ was used prior, just ignore the RQ caps */
+ }
+ }
+ if (qp_attr_mask & IB_QP_DEST_QPN) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID;
+ qp->qplib_qp.dest_qpn = qp_attr->dest_qp_num;
+ }
+ rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to modify HW QP");
+ return rc;
+ }
+ if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp)
+ rc = bnxt_re_modify_shadow_qp(rdev, qp, qp_attr_mask);
+ return rc;
+}
+
+int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_re_dev *rdev = qp->rdev;
+ struct bnxt_qplib_qp qplib_qp;
+ int rc;
+
+ memset(&qplib_qp, 0, sizeof(struct bnxt_qplib_qp));
+ qplib_qp.id = qp->qplib_qp.id;
+ qplib_qp.ah.host_sgid_index = qp->qplib_qp.ah.host_sgid_index;
+
+ rc = bnxt_qplib_query_qp(&rdev->qplib_res, &qplib_qp);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to query HW QP");
+ return rc;
+ }
+ qp_attr->qp_state = __to_ib_qp_state(qplib_qp.state);
+ qp_attr->en_sqd_async_notify = qplib_qp.en_sqd_async_notify ? 1 : 0;
+ qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp.access);
+ qp_attr->pkey_index = qplib_qp.pkey_index;
+ qp_attr->qkey = qplib_qp.qkey;
+ qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp.ah.flow_label,
+ qplib_qp.ah.host_sgid_index,
+ qplib_qp.ah.hop_limit,
+ qplib_qp.ah.traffic_class);
+ rdma_ah_set_dgid_raw(&qp_attr->ah_attr, qplib_qp.ah.dgid.data);
+ rdma_ah_set_sl(&qp_attr->ah_attr, qplib_qp.ah.sl);
+ ether_addr_copy(qp_attr->ah_attr.roce.dmac, qplib_qp.ah.dmac);
+ qp_attr->path_mtu = __to_ib_mtu(qplib_qp.path_mtu);
+ qp_attr->timeout = qplib_qp.timeout;
+ qp_attr->retry_cnt = qplib_qp.retry_cnt;
+ qp_attr->rnr_retry = qplib_qp.rnr_retry;
+ qp_attr->min_rnr_timer = qplib_qp.min_rnr_timer;
+ qp_attr->rq_psn = qplib_qp.rq.psn;
+ qp_attr->max_rd_atomic = qplib_qp.max_rd_atomic;
+ qp_attr->sq_psn = qplib_qp.sq.psn;
+ qp_attr->max_dest_rd_atomic = qplib_qp.max_dest_rd_atomic;
+ qp_init_attr->sq_sig_type = qplib_qp.sig_type ? IB_SIGNAL_ALL_WR :
+ IB_SIGNAL_REQ_WR;
+ qp_attr->dest_qp_num = qplib_qp.dest_qpn;
+
+ qp_attr->cap.max_send_wr = qp->qplib_qp.sq.max_wqe;
+ qp_attr->cap.max_send_sge = qp->qplib_qp.sq.max_sge;
+ qp_attr->cap.max_recv_wr = qp->qplib_qp.rq.max_wqe;
+ qp_attr->cap.max_recv_sge = qp->qplib_qp.rq.max_sge;
+ qp_attr->cap.max_inline_data = qp->qplib_qp.max_inline_data;
+ qp_init_attr->cap = qp_attr->cap;
+
+ return 0;
+}
+
+/* Routine for sending QP1 packets for RoCE V1 an V2
+ */
+static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
+ struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe,
+ int payload_size)
+{
+ struct ib_device *ibdev = &qp->rdev->ibdev;
+ struct bnxt_re_ah *ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah,
+ ib_ah);
+ struct bnxt_qplib_ah *qplib_ah = &ah->qplib_ah;
+ struct bnxt_qplib_sge sge;
+ union ib_gid sgid;
+ u8 nw_type;
+ u16 ether_type;
+ struct ib_gid_attr sgid_attr;
+ union ib_gid dgid;
+ bool is_eth = false;
+ bool is_vlan = false;
+ bool is_grh = false;
+ bool is_udp = false;
+ u8 ip_version = 0;
+ u16 vlan_id = 0xFFFF;
+ void *buf;
+ int i, rc = 0, size;
+
+ memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr));
+
+ rc = ib_get_cached_gid(ibdev, 1,
+ qplib_ah->host_sgid_index, &sgid,
+ &sgid_attr);
+ if (rc) {
+ dev_err(rdev_to_dev(qp->rdev),
+ "Failed to query gid at index %d",
+ qplib_ah->host_sgid_index);
+ return rc;
+ }
+ if (sgid_attr.ndev) {
+ if (is_vlan_dev(sgid_attr.ndev))
+ vlan_id = vlan_dev_vlan_id(sgid_attr.ndev);
+ dev_put(sgid_attr.ndev);
+ }
+ /* Get network header type for this GID */
+ nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+ switch (nw_type) {
+ case RDMA_NETWORK_IPV4:
+ nw_type = BNXT_RE_ROCEV2_IPV4_PACKET;
+ break;
+ case RDMA_NETWORK_IPV6:
+ nw_type = BNXT_RE_ROCEV2_IPV6_PACKET;
+ break;
+ default:
+ nw_type = BNXT_RE_ROCE_V1_PACKET;
+ break;
+ }
+ memcpy(&dgid.raw, &qplib_ah->dgid, 16);
+ is_udp = sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+ if (is_udp) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) {
+ ip_version = 4;
+ ether_type = ETH_P_IP;
+ } else {
+ ip_version = 6;
+ ether_type = ETH_P_IPV6;
+ }
+ is_grh = false;
+ } else {
+ ether_type = ETH_P_IBOE;
+ is_grh = true;
+ }
+
+ is_eth = true;
+ is_vlan = (vlan_id && (vlan_id < 0x1000)) ? true : false;
+
+ ib_ud_header_init(payload_size, !is_eth, is_eth, is_vlan, is_grh,
+ ip_version, is_udp, 0, &qp->qp1_hdr);
+
+ /* ETH */
+ ether_addr_copy(qp->qp1_hdr.eth.dmac_h, ah->qplib_ah.dmac);
+ ether_addr_copy(qp->qp1_hdr.eth.smac_h, qp->qplib_qp.smac);
+
+ /* For vlan, check the sgid for vlan existence */
+
+ if (!is_vlan) {
+ qp->qp1_hdr.eth.type = cpu_to_be16(ether_type);
+ } else {
+ qp->qp1_hdr.vlan.type = cpu_to_be16(ether_type);
+ qp->qp1_hdr.vlan.tag = cpu_to_be16(vlan_id);
+ }
+
+ if (is_grh || (ip_version == 6)) {
+ memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid.raw, sizeof(sgid));
+ memcpy(qp->qp1_hdr.grh.destination_gid.raw, qplib_ah->dgid.data,
+ sizeof(sgid));
+ qp->qp1_hdr.grh.hop_limit = qplib_ah->hop_limit;
+ }
+
+ if (ip_version == 4) {
+ qp->qp1_hdr.ip4.tos = 0;
+ qp->qp1_hdr.ip4.id = 0;
+ qp->qp1_hdr.ip4.frag_off = htons(IP_DF);
+ qp->qp1_hdr.ip4.ttl = qplib_ah->hop_limit;
+
+ memcpy(&qp->qp1_hdr.ip4.saddr, sgid.raw + 12, 4);
+ memcpy(&qp->qp1_hdr.ip4.daddr, qplib_ah->dgid.data + 12, 4);
+ qp->qp1_hdr.ip4.check = ib_ud_ip4_csum(&qp->qp1_hdr);
+ }
+
+ if (is_udp) {
+ qp->qp1_hdr.udp.dport = htons(ROCE_V2_UDP_DPORT);
+ qp->qp1_hdr.udp.sport = htons(0x8CD1);
+ qp->qp1_hdr.udp.csum = 0;
+ }
+
+ /* BTH */
+ if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+ qp->qp1_hdr.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ qp->qp1_hdr.immediate_present = 1;
+ } else {
+ qp->qp1_hdr.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ }
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ qp->qp1_hdr.bth.solicited_event = 1;
+ /* pad_count */
+ qp->qp1_hdr.bth.pad_count = (4 - payload_size) & 3;
+
+ /* P_key for QP1 is for all members */
+ qp->qp1_hdr.bth.pkey = cpu_to_be16(0xFFFF);
+ qp->qp1_hdr.bth.destination_qpn = IB_QP1;
+ qp->qp1_hdr.bth.ack_req = 0;
+ qp->send_psn++;
+ qp->send_psn &= BTH_PSN_MASK;
+ qp->qp1_hdr.bth.psn = cpu_to_be32(qp->send_psn);
+ /* DETH */
+ /* Use the priviledged Q_Key for QP1 */
+ qp->qp1_hdr.deth.qkey = cpu_to_be32(IB_QP1_QKEY);
+ qp->qp1_hdr.deth.source_qpn = IB_QP1;
+
+ /* Pack the QP1 to the transmit buffer */
+ buf = bnxt_qplib_get_qp1_sq_buf(&qp->qplib_qp, &sge);
+ if (buf) {
+ size = ib_ud_header_pack(&qp->qp1_hdr, buf);
+ for (i = wqe->num_sge; i; i--) {
+ wqe->sg_list[i].addr = wqe->sg_list[i - 1].addr;
+ wqe->sg_list[i].lkey = wqe->sg_list[i - 1].lkey;
+ wqe->sg_list[i].size = wqe->sg_list[i - 1].size;
+ }
+
+ /*
+ * Max Header buf size for IPV6 RoCE V2 is 86,
+ * which is same as the QP1 SQ header buffer.
+ * Header buf size for IPV4 RoCE V2 can be 66.
+ * ETH(14) + VLAN(4)+ IP(20) + UDP (8) + BTH(20).
+ * Subtract 20 bytes from QP1 SQ header buf size
+ */
+ if (is_udp && ip_version == 4)
+ sge.size -= 20;
+ /*
+ * Max Header buf size for RoCE V1 is 78.
+ * ETH(14) + VLAN(4) + GRH(40) + BTH(20).
+ * Subtract 8 bytes from QP1 SQ header buf size
+ */
+ if (!is_udp)
+ sge.size -= 8;
+
+ /* Subtract 4 bytes for non vlan packets */
+ if (!is_vlan)
+ sge.size -= 4;
+
+ wqe->sg_list[0].addr = sge.addr;
+ wqe->sg_list[0].lkey = sge.lkey;
+ wqe->sg_list[0].size = sge.size;
+ wqe->num_sge++;
+
+ } else {
+ dev_err(rdev_to_dev(qp->rdev), "QP1 buffer is empty!");
+ rc = -ENOMEM;
+ }
+ return rc;
+}
+
+/* For the MAD layer, it only provides the recv SGE the size of
+ * ib_grh + MAD datagram. No Ethernet headers, Ethertype, BTH, DETH,
+ * nor RoCE iCRC. The Cu+ solution must provide buffer for the entire
+ * receive packet (334 bytes) with no VLAN and then copy the GRH
+ * and the MAD datagram out to the provided SGE.
+ */
+static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp,
+ struct ib_recv_wr *wr,
+ struct bnxt_qplib_swqe *wqe,
+ int payload_size)
+{
+ struct bnxt_qplib_sge ref, sge;
+ u32 rq_prod_index;
+ struct bnxt_re_sqp_entries *sqp_entry;
+
+ rq_prod_index = bnxt_qplib_get_rq_prod_index(&qp->qplib_qp);
+
+ if (!bnxt_qplib_get_qp1_rq_buf(&qp->qplib_qp, &sge))
+ return -ENOMEM;
+
+ /* Create 1 SGE to receive the entire
+ * ethernet packet
+ */
+ /* Save the reference from ULP */
+ ref.addr = wqe->sg_list[0].addr;
+ ref.lkey = wqe->sg_list[0].lkey;
+ ref.size = wqe->sg_list[0].size;
+
+ sqp_entry = &qp->rdev->sqp_tbl[rq_prod_index];
+
+ /* SGE 1 */
+ wqe->sg_list[0].addr = sge.addr;
+ wqe->sg_list[0].lkey = sge.lkey;
+ wqe->sg_list[0].size = BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2;
+ sge.size -= wqe->sg_list[0].size;
+
+ sqp_entry->sge.addr = ref.addr;
+ sqp_entry->sge.lkey = ref.lkey;
+ sqp_entry->sge.size = ref.size;
+ /* Store the wrid for reporting completion */
+ sqp_entry->wrid = wqe->wr_id;
+ /* change the wqe->wrid to table index */
+ wqe->wr_id = rq_prod_index;
+ return 0;
+}
+
+static int is_ud_qp(struct bnxt_re_qp *qp)
+{
+ return qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD;
+}
+
+static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp,
+ struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ struct bnxt_re_ah *ah = NULL;
+
+ if (is_ud_qp(qp)) {
+ ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah, ib_ah);
+ wqe->send.q_key = ud_wr(wr)->remote_qkey;
+ wqe->send.dst_qp = ud_wr(wr)->remote_qpn;
+ wqe->send.avid = ah->qplib_ah.id;
+ }
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND;
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM;
+ wqe->send.imm_data = wr->ex.imm_data;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV;
+ wqe->send.inv_key = wr->ex.invalidate_rkey;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+ if (wr->send_flags & IB_SEND_FENCE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+ if (wr->send_flags & IB_SEND_INLINE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_INLINE;
+
+ return 0;
+}
+
+static int bnxt_re_build_rdma_wqe(struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ switch (wr->opcode) {
+ case IB_WR_RDMA_WRITE:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE;
+ break;
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM;
+ wqe->rdma.imm_data = wr->ex.imm_data;
+ break;
+ case IB_WR_RDMA_READ:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_READ;
+ wqe->rdma.inv_key = wr->ex.invalidate_rkey;
+ break;
+ default:
+ return -EINVAL;
+ }
+ wqe->rdma.remote_va = rdma_wr(wr)->remote_addr;
+ wqe->rdma.r_key = rdma_wr(wr)->rkey;
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+ if (wr->send_flags & IB_SEND_FENCE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+ if (wr->send_flags & IB_SEND_INLINE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_INLINE;
+
+ return 0;
+}
+
+static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ switch (wr->opcode) {
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP;
+ wqe->atomic.swap_data = atomic_wr(wr)->swap;
+ break;
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD;
+ wqe->atomic.cmp_data = atomic_wr(wr)->compare_add;
+ break;
+ default:
+ return -EINVAL;
+ }
+ wqe->atomic.remote_va = atomic_wr(wr)->remote_addr;
+ wqe->atomic.r_key = atomic_wr(wr)->rkey;
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+ if (wr->send_flags & IB_SEND_FENCE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+ return 0;
+}
+
+static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV;
+ wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey;
+
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+ if (wr->send_flags & IB_SEND_FENCE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+
+ return 0;
+}
+
+static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ struct bnxt_re_mr *mr = container_of(wr->mr, struct bnxt_re_mr, ib_mr);
+ struct bnxt_qplib_frpl *qplib_frpl = &mr->qplib_frpl;
+ int access = wr->access;
+
+ wqe->frmr.pbl_ptr = (__le64 *)qplib_frpl->hwq.pbl_ptr[0];
+ wqe->frmr.pbl_dma_ptr = qplib_frpl->hwq.pbl_dma_ptr[0];
+ wqe->frmr.page_list = mr->pages;
+ wqe->frmr.page_list_len = mr->npages;
+ wqe->frmr.levels = qplib_frpl->hwq.level + 1;
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR;
+
+ if (wr->wr.send_flags & IB_SEND_FENCE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ if (wr->wr.send_flags & IB_SEND_SIGNALED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+
+ if (access & IB_ACCESS_LOCAL_WRITE)
+ wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_LOCAL_WRITE;
+ if (access & IB_ACCESS_REMOTE_READ)
+ wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_REMOTE_READ;
+ if (access & IB_ACCESS_REMOTE_WRITE)
+ wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_REMOTE_WRITE;
+ if (access & IB_ACCESS_REMOTE_ATOMIC)
+ wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_REMOTE_ATOMIC;
+ if (access & IB_ACCESS_MW_BIND)
+ wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_WINDOW_BIND;
+
+ wqe->frmr.l_key = wr->key;
+ wqe->frmr.length = wr->mr->length;
+ wqe->frmr.pbl_pg_sz_log = (wr->mr->page_size >> PAGE_SHIFT_4K) - 1;
+ wqe->frmr.va = wr->mr->iova;
+ return 0;
+}
+
+static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev,
+ struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ /* Copy the inline data to the data field */
+ u8 *in_data;
+ u32 i, sge_len;
+ void *sge_addr;
+
+ in_data = wqe->inline_data;
+ for (i = 0; i < wr->num_sge; i++) {
+ sge_addr = (void *)(unsigned long)
+ wr->sg_list[i].addr;
+ sge_len = wr->sg_list[i].length;
+
+ if ((sge_len + wqe->inline_len) >
+ BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
+ dev_err(rdev_to_dev(rdev),
+ "Inline data size requested > supported value");
+ return -EINVAL;
+ }
+ sge_len = wr->sg_list[i].length;
+
+ memcpy(in_data, sge_addr, sge_len);
+ in_data += wr->sg_list[i].length;
+ wqe->inline_len += wr->sg_list[i].length;
+ }
+ return wqe->inline_len;
+}
+
+static int bnxt_re_copy_wr_payload(struct bnxt_re_dev *rdev,
+ struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ int payload_sz = 0;
+
+ if (wr->send_flags & IB_SEND_INLINE)
+ payload_sz = bnxt_re_copy_inline_data(rdev, wr, wqe);
+ else
+ payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe->sg_list,
+ wqe->num_sge);
+
+ return payload_sz;
+}
+
+static void bnxt_ud_qp_hw_stall_workaround(struct bnxt_re_qp *qp)
+{
+ if ((qp->ib_qp.qp_type == IB_QPT_UD ||
+ qp->ib_qp.qp_type == IB_QPT_GSI ||
+ qp->ib_qp.qp_type == IB_QPT_RAW_ETHERTYPE) &&
+ qp->qplib_qp.wqe_cnt == BNXT_RE_UD_QP_HW_STALL) {
+ int qp_attr_mask;
+ struct ib_qp_attr qp_attr;
+
+ qp_attr_mask = IB_QP_STATE;
+ qp_attr.qp_state = IB_QPS_RTS;
+ bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, qp_attr_mask, NULL);
+ qp->qplib_qp.wqe_cnt = 0;
+ }
+}
+
+static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
+ struct bnxt_re_qp *qp,
+ struct ib_send_wr *wr)
+{
+ struct bnxt_qplib_swqe wqe;
+ int rc = 0, payload_sz = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->sq_lock, flags);
+ memset(&wqe, 0, sizeof(wqe));
+ while (wr) {
+ /* House keeping */
+ memset(&wqe, 0, sizeof(wqe));
+
+ /* Common */
+ wqe.num_sge = wr->num_sge;
+ if (wr->num_sge > qp->qplib_qp.sq.max_sge) {
+ dev_err(rdev_to_dev(rdev),
+ "Limit exceeded for Send SGEs");
+ rc = -EINVAL;
+ goto bad;
+ }
+
+ payload_sz = bnxt_re_copy_wr_payload(qp->rdev, wr, &wqe);
+ if (payload_sz < 0) {
+ rc = -EINVAL;
+ goto bad;
+ }
+ wqe.wr_id = wr->wr_id;
+
+ wqe.type = BNXT_QPLIB_SWQE_TYPE_SEND;
+
+ rc = bnxt_re_build_send_wqe(qp, wr, &wqe);
+ if (!rc)
+ rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+bad:
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Post send failed opcode = %#x rc = %d",
+ wr->opcode, rc);
+ break;
+ }
+ wr = wr->next;
+ }
+ bnxt_qplib_post_send_db(&qp->qplib_qp);
+ bnxt_ud_qp_hw_stall_workaround(qp);
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+ return rc;
+}
+
+int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_qplib_swqe wqe;
+ int rc = 0, payload_sz = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->sq_lock, flags);
+ while (wr) {
+ /* House keeping */
+ memset(&wqe, 0, sizeof(wqe));
+
+ /* Common */
+ wqe.num_sge = wr->num_sge;
+ if (wr->num_sge > qp->qplib_qp.sq.max_sge) {
+ dev_err(rdev_to_dev(qp->rdev),
+ "Limit exceeded for Send SGEs");
+ rc = -EINVAL;
+ goto bad;
+ }
+
+ payload_sz = bnxt_re_copy_wr_payload(qp->rdev, wr, &wqe);
+ if (payload_sz < 0) {
+ rc = -EINVAL;
+ goto bad;
+ }
+ wqe.wr_id = wr->wr_id;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ if (ib_qp->qp_type == IB_QPT_GSI) {
+ rc = bnxt_re_build_qp1_send_v2(qp, wr, &wqe,
+ payload_sz);
+ if (rc)
+ goto bad;
+ wqe.rawqp1.lflags |=
+ SQ_SEND_RAWETH_QP1_LFLAGS_ROCE_CRC;
+ }
+ switch (wr->send_flags) {
+ case IB_SEND_IP_CSUM:
+ wqe.rawqp1.lflags |=
+ SQ_SEND_RAWETH_QP1_LFLAGS_IP_CHKSUM;
+ break;
+ default:
+ break;
+ }
+ /* Fall thru to build the wqe */
+ case IB_WR_SEND_WITH_INV:
+ rc = bnxt_re_build_send_wqe(qp, wr, &wqe);
+ break;
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ case IB_WR_RDMA_READ:
+ rc = bnxt_re_build_rdma_wqe(wr, &wqe);
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ rc = bnxt_re_build_atomic_wqe(wr, &wqe);
+ break;
+ case IB_WR_RDMA_READ_WITH_INV:
+ dev_err(rdev_to_dev(qp->rdev),
+ "RDMA Read with Invalidate is not supported");
+ rc = -EINVAL;
+ goto bad;
+ case IB_WR_LOCAL_INV:
+ rc = bnxt_re_build_inv_wqe(wr, &wqe);
+ break;
+ case IB_WR_REG_MR:
+ rc = bnxt_re_build_reg_wqe(reg_wr(wr), &wqe);
+ break;
+ default:
+ /* Unsupported WRs */
+ dev_err(rdev_to_dev(qp->rdev),
+ "WR (%#x) is not supported", wr->opcode);
+ rc = -EINVAL;
+ goto bad;
+ }
+ if (!rc)
+ rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+bad:
+ if (rc) {
+ dev_err(rdev_to_dev(qp->rdev),
+ "post_send failed op:%#x qps = %#x rc = %d\n",
+ wr->opcode, qp->qplib_qp.state, rc);
+ *bad_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+ bnxt_qplib_post_send_db(&qp->qplib_qp);
+ bnxt_ud_qp_hw_stall_workaround(qp);
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+
+ return rc;
+}
+
+static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
+ struct bnxt_re_qp *qp,
+ struct ib_recv_wr *wr)
+{
+ struct bnxt_qplib_swqe wqe;
+ int rc = 0, payload_sz = 0;
+
+ memset(&wqe, 0, sizeof(wqe));
+ while (wr) {
+ /* House keeping */
+ memset(&wqe, 0, sizeof(wqe));
+
+ /* Common */
+ wqe.num_sge = wr->num_sge;
+ if (wr->num_sge > qp->qplib_qp.rq.max_sge) {
+ dev_err(rdev_to_dev(rdev),
+ "Limit exceeded for Receive SGEs");
+ rc = -EINVAL;
+ break;
+ }
+ payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe.sg_list,
+ wr->num_sge);
+ wqe.wr_id = wr->wr_id;
+ wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
+
+ rc = bnxt_qplib_post_recv(&qp->qplib_qp, &wqe);
+ if (rc)
+ break;
+
+ wr = wr->next;
+ }
+ if (!rc)
+ bnxt_qplib_post_recv_db(&qp->qplib_qp);
+ return rc;
+}
+
+int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_qplib_swqe wqe;
+ int rc = 0, payload_sz = 0;
+ unsigned long flags;
+ u32 count = 0;
+
+ spin_lock_irqsave(&qp->rq_lock, flags);
+ while (wr) {
+ /* House keeping */
+ memset(&wqe, 0, sizeof(wqe));
+
+ /* Common */
+ wqe.num_sge = wr->num_sge;
+ if (wr->num_sge > qp->qplib_qp.rq.max_sge) {
+ dev_err(rdev_to_dev(qp->rdev),
+ "Limit exceeded for Receive SGEs");
+ rc = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+
+ payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe.sg_list,
+ wr->num_sge);
+ wqe.wr_id = wr->wr_id;
+ wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
+
+ if (ib_qp->qp_type == IB_QPT_GSI)
+ rc = bnxt_re_build_qp1_shadow_qp_recv(qp, wr, &wqe,
+ payload_sz);
+ if (!rc)
+ rc = bnxt_qplib_post_recv(&qp->qplib_qp, &wqe);
+ if (rc) {
+ *bad_wr = wr;
+ break;
+ }
+
+ /* Ring DB if the RQEs posted reaches a threshold value */
+ if (++count >= BNXT_RE_RQ_WQE_THRESHOLD) {
+ bnxt_qplib_post_recv_db(&qp->qplib_qp);
+ count = 0;
+ }
+
+ wr = wr->next;
+ }
+
+ if (count)
+ bnxt_qplib_post_recv_db(&qp->qplib_qp);
+
+ spin_unlock_irqrestore(&qp->rq_lock, flags);
+
+ return rc;
+}
+
+/* Completion Queues */
+int bnxt_re_destroy_cq(struct ib_cq *ib_cq)
+{
+ struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ struct bnxt_re_dev *rdev = cq->rdev;
+ int rc;
+
+ rc = bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to destroy HW CQ");
+ return rc;
+ }
+ if (!IS_ERR_OR_NULL(cq->umem))
+ ib_umem_release(cq->umem);
+
+ if (cq) {
+ kfree(cq->cql);
+ kfree(cq);
+ }
+ atomic_dec(&rdev->cq_count);
+ rdev->nq.budget--;
+ return 0;
+}
+
+struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ struct bnxt_re_cq *cq = NULL;
+ int rc, entries;
+ int cqe = attr->cqe;
+
+ /* Validate CQ fields */
+ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) {
+ dev_err(rdev_to_dev(rdev), "Failed to create CQ -max exceeded");
+ return ERR_PTR(-EINVAL);
+ }
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq)
+ return ERR_PTR(-ENOMEM);
+
+ cq->rdev = rdev;
+ cq->qplib_cq.cq_handle = (u64)(unsigned long)(&cq->qplib_cq);
+
+ entries = roundup_pow_of_two(cqe + 1);
+ if (entries > dev_attr->max_cq_wqes + 1)
+ entries = dev_attr->max_cq_wqes + 1;
+
+ if (context) {
+ struct bnxt_re_cq_req req;
+ struct bnxt_re_ucontext *uctx = container_of
+ (context,
+ struct bnxt_re_ucontext,
+ ib_uctx);
+ if (ib_copy_from_udata(&req, udata, sizeof(req))) {
+ rc = -EFAULT;
+ goto fail;
+ }
+
+ cq->umem = ib_umem_get(context, req.cq_va,
+ entries * sizeof(struct cq_base),
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (IS_ERR(cq->umem)) {
+ rc = PTR_ERR(cq->umem);
+ goto fail;
+ }
+ cq->qplib_cq.sghead = cq->umem->sg_head.sgl;
+ cq->qplib_cq.nmap = cq->umem->nmap;
+ cq->qplib_cq.dpi = &uctx->dpi;
+ } else {
+ cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL);
+ cq->cql = kcalloc(cq->max_cql, sizeof(struct bnxt_qplib_cqe),
+ GFP_KERNEL);
+ if (!cq->cql) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ cq->qplib_cq.dpi = &rdev->dpi_privileged;
+ cq->qplib_cq.sghead = NULL;
+ cq->qplib_cq.nmap = 0;
+ }
+ cq->qplib_cq.max_wqe = entries;
+ cq->qplib_cq.cnq_hw_ring_id = rdev->nq.ring_id;
+
+ rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to create HW CQ");
+ goto fail;
+ }
+
+ cq->ib_cq.cqe = entries;
+ cq->cq_period = cq->qplib_cq.period;
+ rdev->nq.budget++;
+
+ atomic_inc(&rdev->cq_count);
+
+ if (context) {
+ struct bnxt_re_cq_resp resp;
+
+ resp.cqid = cq->qplib_cq.id;
+ resp.tail = cq->qplib_cq.hwq.cons;
+ resp.phase = cq->qplib_cq.period;
+ resp.rsvd = 0;
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to copy CQ udata");
+ bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
+ goto c2fail;
+ }
+ }
+
+ return &cq->ib_cq;
+
+c2fail:
+ if (context)
+ ib_umem_release(cq->umem);
+fail:
+ kfree(cq->cql);
+ kfree(cq);
+ return ERR_PTR(rc);
+}
+
+static u8 __req_to_ib_wc_status(u8 qstatus)
+{
+ switch (qstatus) {
+ case CQ_REQ_STATUS_OK:
+ return IB_WC_SUCCESS;
+ case CQ_REQ_STATUS_BAD_RESPONSE_ERR:
+ return IB_WC_BAD_RESP_ERR;
+ case CQ_REQ_STATUS_LOCAL_LENGTH_ERR:
+ return IB_WC_LOC_LEN_ERR;
+ case CQ_REQ_STATUS_LOCAL_QP_OPERATION_ERR:
+ return IB_WC_LOC_QP_OP_ERR;
+ case CQ_REQ_STATUS_LOCAL_PROTECTION_ERR:
+ return IB_WC_LOC_PROT_ERR;
+ case CQ_REQ_STATUS_MEMORY_MGT_OPERATION_ERR:
+ return IB_WC_GENERAL_ERR;
+ case CQ_REQ_STATUS_REMOTE_INVALID_REQUEST_ERR:
+ return IB_WC_REM_INV_REQ_ERR;
+ case CQ_REQ_STATUS_REMOTE_ACCESS_ERR:
+ return IB_WC_REM_ACCESS_ERR;
+ case CQ_REQ_STATUS_REMOTE_OPERATION_ERR:
+ return IB_WC_REM_OP_ERR;
+ case CQ_REQ_STATUS_RNR_NAK_RETRY_CNT_ERR:
+ return IB_WC_RNR_RETRY_EXC_ERR;
+ case CQ_REQ_STATUS_TRANSPORT_RETRY_CNT_ERR:
+ return IB_WC_RETRY_EXC_ERR;
+ case CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+ return 0;
+}
+
+static u8 __rawqp1_to_ib_wc_status(u8 qstatus)
+{
+ switch (qstatus) {
+ case CQ_RES_RAWETH_QP1_STATUS_OK:
+ return IB_WC_SUCCESS;
+ case CQ_RES_RAWETH_QP1_STATUS_LOCAL_ACCESS_ERROR:
+ return IB_WC_LOC_ACCESS_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_HW_LOCAL_LENGTH_ERR:
+ return IB_WC_LOC_LEN_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_LOCAL_PROTECTION_ERR:
+ return IB_WC_LOC_PROT_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_LOCAL_QP_OPERATION_ERR:
+ return IB_WC_LOC_QP_OP_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_MEMORY_MGT_OPERATION_ERR:
+ return IB_WC_GENERAL_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_WORK_REQUEST_FLUSHED_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_HW_FLUSH_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+}
+
+static u8 __rc_to_ib_wc_status(u8 qstatus)
+{
+ switch (qstatus) {
+ case CQ_RES_RC_STATUS_OK:
+ return IB_WC_SUCCESS;
+ case CQ_RES_RC_STATUS_LOCAL_ACCESS_ERROR:
+ return IB_WC_LOC_ACCESS_ERR;
+ case CQ_RES_RC_STATUS_LOCAL_LENGTH_ERR:
+ return IB_WC_LOC_LEN_ERR;
+ case CQ_RES_RC_STATUS_LOCAL_PROTECTION_ERR:
+ return IB_WC_LOC_PROT_ERR;
+ case CQ_RES_RC_STATUS_LOCAL_QP_OPERATION_ERR:
+ return IB_WC_LOC_QP_OP_ERR;
+ case CQ_RES_RC_STATUS_MEMORY_MGT_OPERATION_ERR:
+ return IB_WC_GENERAL_ERR;
+ case CQ_RES_RC_STATUS_REMOTE_INVALID_REQUEST_ERR:
+ return IB_WC_REM_INV_REQ_ERR;
+ case CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ case CQ_RES_RC_STATUS_HW_FLUSH_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+}
+
+static void bnxt_re_process_req_wc(struct ib_wc *wc, struct bnxt_qplib_cqe *cqe)
+{
+ switch (cqe->type) {
+ case BNXT_QPLIB_SWQE_TYPE_SEND:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM:
+ wc->opcode = IB_WC_SEND;
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV:
+ wc->opcode = IB_WC_SEND;
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_READ:
+ wc->opcode = IB_WC_RDMA_READ;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP:
+ wc->opcode = IB_WC_COMP_SWAP;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD:
+ wc->opcode = IB_WC_FETCH_ADD;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV:
+ wc->opcode = IB_WC_LOCAL_INV;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_REG_MR:
+ wc->opcode = IB_WC_REG_MR;
+ break;
+ default:
+ wc->opcode = IB_WC_SEND;
+ break;
+ }
+
+ wc->status = __req_to_ib_wc_status(cqe->status);
+}
+
+static int bnxt_re_check_packet_type(u16 raweth_qp1_flags,
+ u16 raweth_qp1_flags2)
+{
+ bool is_udp = false, is_ipv6 = false, is_ipv4 = false;
+
+ /* raweth_qp1_flags Bit 9-6 indicates itype */
+ if ((raweth_qp1_flags & CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ROCE)
+ != CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ROCE)
+ return -1;
+
+ if (raweth_qp1_flags2 &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_CS_CALC &&
+ raweth_qp1_flags2 &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_L4_CS_CALC) {
+ is_udp = true;
+ /* raweth_qp1_flags2 Bit 8 indicates ip_type. 0-v4 1 - v6 */
+ (raweth_qp1_flags2 &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_TYPE) ?
+ (is_ipv6 = true) : (is_ipv4 = true);
+ return ((is_ipv6) ?
+ BNXT_RE_ROCEV2_IPV6_PACKET :
+ BNXT_RE_ROCEV2_IPV4_PACKET);
+ } else {
+ return BNXT_RE_ROCE_V1_PACKET;
+ }
+}
+
+static int bnxt_re_to_ib_nw_type(int nw_type)
+{
+ u8 nw_hdr_type = 0xFF;
+
+ switch (nw_type) {
+ case BNXT_RE_ROCE_V1_PACKET:
+ nw_hdr_type = RDMA_NETWORK_ROCE_V1;
+ break;
+ case BNXT_RE_ROCEV2_IPV4_PACKET:
+ nw_hdr_type = RDMA_NETWORK_IPV4;
+ break;
+ case BNXT_RE_ROCEV2_IPV6_PACKET:
+ nw_hdr_type = RDMA_NETWORK_IPV6;
+ break;
+ }
+ return nw_hdr_type;
+}
+
+static bool bnxt_re_is_loopback_packet(struct bnxt_re_dev *rdev,
+ void *rq_hdr_buf)
+{
+ u8 *tmp_buf = NULL;
+ struct ethhdr *eth_hdr;
+ u16 eth_type;
+ bool rc = false;
+
+ tmp_buf = (u8 *)rq_hdr_buf;
+ /*
+ * If dest mac is not same as I/F mac, this could be a
+ * loopback address or multicast address, check whether
+ * it is a loopback packet
+ */
+ if (!ether_addr_equal(tmp_buf, rdev->netdev->dev_addr)) {
+ tmp_buf += 4;
+ /* Check the ether type */
+ eth_hdr = (struct ethhdr *)tmp_buf;
+ eth_type = ntohs(eth_hdr->h_proto);
+ switch (eth_type) {
+ case ETH_P_IBOE:
+ rc = true;
+ break;
+ case ETH_P_IP:
+ case ETH_P_IPV6: {
+ u32 len;
+ struct udphdr *udp_hdr;
+
+ len = (eth_type == ETH_P_IP ? sizeof(struct iphdr) :
+ sizeof(struct ipv6hdr));
+ tmp_buf += sizeof(struct ethhdr) + len;
+ udp_hdr = (struct udphdr *)tmp_buf;
+ if (ntohs(udp_hdr->dest) ==
+ ROCE_V2_UDP_DPORT)
+ rc = true;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *qp1_qp,
+ struct bnxt_qplib_cqe *cqe)
+{
+ struct bnxt_re_dev *rdev = qp1_qp->rdev;
+ struct bnxt_re_sqp_entries *sqp_entry = NULL;
+ struct bnxt_re_qp *qp = rdev->qp1_sqp;
+ struct ib_send_wr *swr;
+ struct ib_ud_wr udwr;
+ struct ib_recv_wr rwr;
+ int pkt_type = 0;
+ u32 tbl_idx;
+ void *rq_hdr_buf;
+ dma_addr_t rq_hdr_buf_map;
+ dma_addr_t shrq_hdr_buf_map;
+ u32 offset = 0;
+ u32 skip_bytes = 0;
+ struct ib_sge s_sge[2];
+ struct ib_sge r_sge[2];
+ int rc;
+
+ memset(&udwr, 0, sizeof(udwr));
+ memset(&rwr, 0, sizeof(rwr));
+ memset(&s_sge, 0, sizeof(s_sge));
+ memset(&r_sge, 0, sizeof(r_sge));
+
+ swr = &udwr.wr;
+ tbl_idx = cqe->wr_id;
+
+ rq_hdr_buf = qp1_qp->qplib_qp.rq_hdr_buf +
+ (tbl_idx * qp1_qp->qplib_qp.rq_hdr_buf_size);
+ rq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&qp1_qp->qplib_qp,
+ tbl_idx);
+
+ /* Shadow QP header buffer */
+ shrq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&qp->qplib_qp,
+ tbl_idx);
+ sqp_entry = &rdev->sqp_tbl[tbl_idx];
+
+ /* Store this cqe */
+ memcpy(&sqp_entry->cqe, cqe, sizeof(struct bnxt_qplib_cqe));
+ sqp_entry->qp1_qp = qp1_qp;
+
+ /* Find packet type from the cqe */
+
+ pkt_type = bnxt_re_check_packet_type(cqe->raweth_qp1_flags,
+ cqe->raweth_qp1_flags2);
+ if (pkt_type < 0) {
+ dev_err(rdev_to_dev(rdev), "Invalid packet\n");
+ return -EINVAL;
+ }
+
+ /* Adjust the offset for the user buffer and post in the rq */
+
+ if (pkt_type == BNXT_RE_ROCEV2_IPV4_PACKET)
+ offset = 20;
+
+ /*
+ * QP1 loopback packet has 4 bytes of internal header before
+ * ether header. Skip these four bytes.
+ */
+ if (bnxt_re_is_loopback_packet(rdev, rq_hdr_buf))
+ skip_bytes = 4;
+
+ /* First send SGE . Skip the ether header*/
+ s_sge[0].addr = rq_hdr_buf_map + BNXT_QPLIB_MAX_QP1_RQ_ETH_HDR_SIZE
+ + skip_bytes;
+ s_sge[0].lkey = 0xFFFFFFFF;
+ s_sge[0].length = offset ? BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV4 :
+ BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6;
+
+ /* Second Send SGE */
+ s_sge[1].addr = s_sge[0].addr + s_sge[0].length +
+ BNXT_QPLIB_MAX_QP1_RQ_BDETH_HDR_SIZE;
+ if (pkt_type != BNXT_RE_ROCE_V1_PACKET)
+ s_sge[1].addr += 8;
+ s_sge[1].lkey = 0xFFFFFFFF;
+ s_sge[1].length = 256;
+
+ /* First recv SGE */
+
+ r_sge[0].addr = shrq_hdr_buf_map;
+ r_sge[0].lkey = 0xFFFFFFFF;
+ r_sge[0].length = 40;
+
+ r_sge[1].addr = sqp_entry->sge.addr + offset;
+ r_sge[1].lkey = sqp_entry->sge.lkey;
+ r_sge[1].length = BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6 + 256 - offset;
+
+ /* Create receive work request */
+ rwr.num_sge = 2;
+ rwr.sg_list = r_sge;
+ rwr.wr_id = tbl_idx;
+ rwr.next = NULL;
+
+ rc = bnxt_re_post_recv_shadow_qp(rdev, qp, &rwr);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to post Rx buffers to shadow QP");
+ return -ENOMEM;
+ }
+
+ swr->num_sge = 2;
+ swr->sg_list = s_sge;
+ swr->wr_id = tbl_idx;
+ swr->opcode = IB_WR_SEND;
+ swr->next = NULL;
+
+ udwr.ah = &rdev->sqp_ah->ib_ah;
+ udwr.remote_qpn = rdev->qp1_sqp->qplib_qp.id;
+ udwr.remote_qkey = rdev->qp1_sqp->qplib_qp.qkey;
+
+ /* post data received in the send queue */
+ rc = bnxt_re_post_send_shadow_qp(rdev, qp, swr);
+
+ return 0;
+}
+
+static void bnxt_re_process_res_rawqp1_wc(struct ib_wc *wc,
+ struct bnxt_qplib_cqe *cqe)
+{
+ wc->opcode = IB_WC_RECV;
+ wc->status = __rawqp1_to_ib_wc_status(cqe->status);
+ wc->wc_flags |= IB_WC_GRH;
+}
+
+static void bnxt_re_process_res_rc_wc(struct ib_wc *wc,
+ struct bnxt_qplib_cqe *cqe)
+{
+ wc->opcode = IB_WC_RECV;
+ wc->status = __rc_to_ib_wc_status(cqe->status);
+
+ if (cqe->flags & CQ_RES_RC_FLAGS_IMM)
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ if (cqe->flags & CQ_RES_RC_FLAGS_INV)
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ if ((cqe->flags & (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM)) ==
+ (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM))
+ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+}
+
+static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp,
+ struct ib_wc *wc,
+ struct bnxt_qplib_cqe *cqe)
+{
+ u32 tbl_idx;
+ struct bnxt_re_dev *rdev = qp->rdev;
+ struct bnxt_re_qp *qp1_qp = NULL;
+ struct bnxt_qplib_cqe *orig_cqe = NULL;
+ struct bnxt_re_sqp_entries *sqp_entry = NULL;
+ int nw_type;
+
+ tbl_idx = cqe->wr_id;
+
+ sqp_entry = &rdev->sqp_tbl[tbl_idx];
+ qp1_qp = sqp_entry->qp1_qp;
+ orig_cqe = &sqp_entry->cqe;
+
+ wc->wr_id = sqp_entry->wrid;
+ wc->byte_len = orig_cqe->length;
+ wc->qp = &qp1_qp->ib_qp;
+
+ wc->ex.imm_data = orig_cqe->immdata;
+ wc->src_qp = orig_cqe->src_qp;
+ memcpy(wc->smac, orig_cqe->smac, ETH_ALEN);
+ wc->port_num = 1;
+ wc->vendor_err = orig_cqe->status;
+
+ wc->opcode = IB_WC_RECV;
+ wc->status = __rawqp1_to_ib_wc_status(orig_cqe->status);
+ wc->wc_flags |= IB_WC_GRH;
+
+ nw_type = bnxt_re_check_packet_type(orig_cqe->raweth_qp1_flags,
+ orig_cqe->raweth_qp1_flags2);
+ if (nw_type >= 0) {
+ wc->network_hdr_type = bnxt_re_to_ib_nw_type(nw_type);
+ wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
+ }
+}
+
+static void bnxt_re_process_res_ud_wc(struct ib_wc *wc,
+ struct bnxt_qplib_cqe *cqe)
+{
+ wc->opcode = IB_WC_RECV;
+ wc->status = __rc_to_ib_wc_status(cqe->status);
+
+ if (cqe->flags & CQ_RES_RC_FLAGS_IMM)
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ if (cqe->flags & CQ_RES_RC_FLAGS_INV)
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ if ((cqe->flags & (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM)) ==
+ (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM))
+ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+}
+
+static int send_phantom_wqe(struct bnxt_re_qp *qp)
+{
+ struct bnxt_qplib_qp *lib_qp = &qp->qplib_qp;
+ unsigned long flags;
+ int rc = 0;
+
+ spin_lock_irqsave(&qp->sq_lock, flags);
+
+ rc = bnxt_re_bind_fence_mw(lib_qp);
+ if (!rc) {
+ lib_qp->sq.phantom_wqe_cnt++;
+ dev_dbg(&lib_qp->sq.hwq.pdev->dev,
+ "qp %#x sq->prod %#x sw_prod %#x phantom_wqe_cnt %d\n",
+ lib_qp->id, lib_qp->sq.hwq.prod,
+ HWQ_CMP(lib_qp->sq.hwq.prod, &lib_qp->sq.hwq),
+ lib_qp->sq.phantom_wqe_cnt);
+ }
+
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+ return rc;
+}
+
+int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
+{
+ struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ struct bnxt_re_qp *qp;
+ struct bnxt_qplib_cqe *cqe;
+ int i, ncqe, budget;
+ struct bnxt_qplib_q *sq;
+ struct bnxt_qplib_qp *lib_qp;
+ u32 tbl_idx;
+ struct bnxt_re_sqp_entries *sqp_entry = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ budget = min_t(u32, num_entries, cq->max_cql);
+ num_entries = budget;
+ if (!cq->cql) {
+ dev_err(rdev_to_dev(cq->rdev), "POLL CQ : no CQL to use");
+ goto exit;
+ }
+ cqe = &cq->cql[0];
+ while (budget) {
+ lib_qp = NULL;
+ ncqe = bnxt_qplib_poll_cq(&cq->qplib_cq, cqe, budget, &lib_qp);
+ if (lib_qp) {
+ sq = &lib_qp->sq;
+ if (sq->send_phantom) {
+ qp = container_of(lib_qp,
+ struct bnxt_re_qp, qplib_qp);
+ if (send_phantom_wqe(qp) == -ENOMEM)
+ dev_err(rdev_to_dev(cq->rdev),
+ "Phantom failed! Scheduled to send again\n");
+ else
+ sq->send_phantom = false;
+ }
+ }
+
+ if (!ncqe)
+ break;
+
+ for (i = 0; i < ncqe; i++, cqe++) {
+ /* Transcribe each qplib_wqe back to ib_wc */
+ memset(wc, 0, sizeof(*wc));
+
+ wc->wr_id = cqe->wr_id;
+ wc->byte_len = cqe->length;
+ qp = container_of
+ ((struct bnxt_qplib_qp *)
+ (unsigned long)(cqe->qp_handle),
+ struct bnxt_re_qp, qplib_qp);
+ if (!qp) {
+ dev_err(rdev_to_dev(cq->rdev),
+ "POLL CQ : bad QP handle");
+ continue;
+ }
+ wc->qp = &qp->ib_qp;
+ wc->ex.imm_data = cqe->immdata;
+ wc->src_qp = cqe->src_qp;
+ memcpy(wc->smac, cqe->smac, ETH_ALEN);
+ wc->port_num = 1;
+ wc->vendor_err = cqe->status;
+
+ switch (cqe->opcode) {
+ case CQ_BASE_CQE_TYPE_REQ:
+ if (qp->qplib_qp.id ==
+ qp->rdev->qp1_sqp->qplib_qp.id) {
+ /* Handle this completion with
+ * the stored completion
+ */
+ memset(wc, 0, sizeof(*wc));
+ continue;
+ }
+ bnxt_re_process_req_wc(wc, cqe);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_RAWETH_QP1:
+ if (!cqe->status) {
+ int rc = 0;
+
+ rc = bnxt_re_process_raw_qp_pkt_rx
+ (qp, cqe);
+ if (!rc) {
+ memset(wc, 0, sizeof(*wc));
+ continue;
+ }
+ cqe->status = -1;
+ }
+ /* Errors need not be looped back.
+ * But change the wr_id to the one
+ * stored in the table
+ */
+ tbl_idx = cqe->wr_id;
+ sqp_entry = &cq->rdev->sqp_tbl[tbl_idx];
+ wc->wr_id = sqp_entry->wrid;
+ bnxt_re_process_res_rawqp1_wc(wc, cqe);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_RC:
+ bnxt_re_process_res_rc_wc(wc, cqe);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_UD:
+ if (qp->qplib_qp.id ==
+ qp->rdev->qp1_sqp->qplib_qp.id) {
+ /* Handle this completion with
+ * the stored completion
+ */
+ if (cqe->status) {
+ continue;
+ } else {
+ bnxt_re_process_res_shadow_qp_wc
+ (qp, wc, cqe);
+ break;
+ }
+ }
+ bnxt_re_process_res_ud_wc(wc, cqe);
+ break;
+ default:
+ dev_err(rdev_to_dev(cq->rdev),
+ "POLL CQ : type 0x%x not handled",
+ cqe->opcode);
+ continue;
+ }
+ wc++;
+ budget--;
+ }
+ }
+exit:
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+ return num_entries - budget;
+}
+
+int bnxt_re_req_notify_cq(struct ib_cq *ib_cq,
+ enum ib_cq_notify_flags ib_cqn_flags)
+{
+ struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ int type = 0;
+
+ /* Trigger on the very next completion */
+ if (ib_cqn_flags & IB_CQ_NEXT_COMP)
+ type = DBR_DBR_TYPE_CQ_ARMALL;
+ /* Trigger on the next solicited completion */
+ else if (ib_cqn_flags & IB_CQ_SOLICITED)
+ type = DBR_DBR_TYPE_CQ_ARMSE;
+
+ /* Poll to see if there are missed events */
+ if ((ib_cqn_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
+ !(bnxt_qplib_is_cq_empty(&cq->qplib_cq)))
+ return 1;
+
+ bnxt_qplib_req_notify_cq(&cq->qplib_cq, type);
+
+ return 0;
+}
+
+/* Memory Regions */
+struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_mr *mr;
+ u64 pbl = 0;
+ int rc;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->rdev = rdev;
+ mr->qplib_mr.pd = &pd->qplib_pd;
+ mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+ mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+
+ /* Allocate and register 0 as the address */
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc)
+ goto fail;
+
+ mr->qplib_mr.hwq.level = PBL_LVL_MAX;
+ mr->qplib_mr.total_size = -1; /* Infinte length */
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl, 0, false);
+ if (rc)
+ goto fail_mr;
+
+ mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ if (mr_access_flags & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_ATOMIC))
+ mr->ib_mr.rkey = mr->ib_mr.lkey;
+ atomic_inc(&rdev->mr_count);
+
+ return &mr->ib_mr;
+
+fail_mr:
+ bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+fail:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
+{
+ struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+ struct bnxt_re_dev *rdev = mr->rdev;
+ int rc;
+
+ rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Dereg MR failed: %#x\n", rc);
+ return rc;
+ }
+
+ if (mr->npages && mr->pages) {
+ rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
+ &mr->qplib_frpl);
+ kfree(mr->pages);
+ mr->npages = 0;
+ mr->pages = NULL;
+ }
+ if (!IS_ERR_OR_NULL(mr->ib_umem))
+ ib_umem_release(mr->ib_umem);
+
+ kfree(mr);
+ atomic_dec(&rdev->mr_count);
+ return rc;
+}
+
+static int bnxt_re_set_page(struct ib_mr *ib_mr, u64 addr)
+{
+ struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+
+ if (unlikely(mr->npages == mr->qplib_frpl.max_pg_ptrs))
+ return -ENOMEM;
+
+ mr->pages[mr->npages++] = addr;
+ return 0;
+}
+
+int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+
+ mr->npages = 0;
+ return ib_sg_to_pages(ib_mr, sg, sg_nents, sg_offset, bnxt_re_set_page);
+}
+
+struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
+ u32 max_num_sg)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_mr *mr = NULL;
+ int rc;
+
+ if (type != IB_MR_TYPE_MEM_REG) {
+ dev_dbg(rdev_to_dev(rdev), "MR type 0x%x not supported", type);
+ return ERR_PTR(-EINVAL);
+ }
+ if (max_num_sg > MAX_PBL_LVL_1_PGS)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->rdev = rdev;
+ mr->qplib_mr.pd = &pd->qplib_pd;
+ mr->qplib_mr.flags = BNXT_QPLIB_FR_PMR;
+ mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc)
+ goto fail;
+
+ mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ mr->ib_mr.rkey = mr->ib_mr.lkey;
+
+ mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
+ if (!mr->pages) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ rc = bnxt_qplib_alloc_fast_reg_page_list(&rdev->qplib_res,
+ &mr->qplib_frpl, max_num_sg);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to allocate HW FR page list");
+ goto fail_mr;
+ }
+
+ atomic_inc(&rdev->mr_count);
+ return &mr->ib_mr;
+
+fail_mr:
+ bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+fail:
+ kfree(mr->pages);
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_mw *mw;
+ int rc;
+
+ mw = kzalloc(sizeof(*mw), GFP_KERNEL);
+ if (!mw)
+ return ERR_PTR(-ENOMEM);
+ mw->rdev = rdev;
+ mw->qplib_mw.pd = &pd->qplib_pd;
+
+ mw->qplib_mw.type = (type == IB_MW_TYPE_1 ?
+ CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1 :
+ CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B);
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mw->qplib_mw);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Allocate MW failed!");
+ goto fail;
+ }
+ mw->ib_mw.rkey = mw->qplib_mw.rkey;
+
+ atomic_inc(&rdev->mw_count);
+ return &mw->ib_mw;
+
+fail:
+ kfree(mw);
+ return ERR_PTR(rc);
+}
+
+int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
+{
+ struct bnxt_re_mw *mw = container_of(ib_mw, struct bnxt_re_mw, ib_mw);
+ struct bnxt_re_dev *rdev = mw->rdev;
+ int rc;
+
+ rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mw->qplib_mw);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Free MW failed: %#x\n", rc);
+ return rc;
+ }
+
+ kfree(mw);
+ atomic_dec(&rdev->mw_count);
+ return rc;
+}
+
+/* uverbs */
+struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_mr *mr;
+ struct ib_umem *umem;
+ u64 *pbl_tbl, *pbl_tbl_orig;
+ int i, umem_pgs, pages, rc;
+ struct scatterlist *sg;
+ int entry;
+
+ if (length > BNXT_RE_MAX_MR_SIZE) {
+ dev_err(rdev_to_dev(rdev), "MR Size: %lld > Max supported:%ld\n",
+ length, BNXT_RE_MAX_MR_SIZE);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->rdev = rdev;
+ mr->qplib_mr.pd = &pd->qplib_pd;
+ mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+ mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR;
+
+ umem = ib_umem_get(ib_pd->uobject->context, start, length,
+ mr_access_flags, 0);
+ if (IS_ERR(umem)) {
+ dev_err(rdev_to_dev(rdev), "Failed to get umem");
+ rc = -EFAULT;
+ goto free_mr;
+ }
+ mr->ib_umem = umem;
+
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to allocate MR");
+ goto release_umem;
+ }
+ /* The fixed portion of the rkey is the same as the lkey */
+ mr->ib_mr.rkey = mr->qplib_mr.rkey;
+
+ mr->qplib_mr.va = virt_addr;
+ umem_pgs = ib_umem_page_count(umem);
+ if (!umem_pgs) {
+ dev_err(rdev_to_dev(rdev), "umem is invalid!");
+ rc = -EINVAL;
+ goto free_mrw;
+ }
+ mr->qplib_mr.total_size = length;
+
+ pbl_tbl = kcalloc(umem_pgs, sizeof(u64 *), GFP_KERNEL);
+ if (!pbl_tbl) {
+ rc = -EINVAL;
+ goto free_mrw;
+ }
+ pbl_tbl_orig = pbl_tbl;
+
+ if (umem->hugetlb) {
+ dev_err(rdev_to_dev(rdev), "umem hugetlb not supported!");
+ rc = -EFAULT;
+ goto fail;
+ }
+
+ if (umem->page_shift != PAGE_SHIFT) {
+ dev_err(rdev_to_dev(rdev), "umem page shift unsupported!");
+ rc = -EFAULT;
+ goto fail;
+ }
+ /* Map umem buf ptrs to the PBL */
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ pages = sg_dma_len(sg) >> umem->page_shift;
+ for (i = 0; i < pages; i++, pbl_tbl++)
+ *pbl_tbl = sg_dma_address(sg) + (i << umem->page_shift);
+ }
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl_orig,
+ umem_pgs, false);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to register user MR");
+ goto fail;
+ }
+
+ kfree(pbl_tbl_orig);
+
+ mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ mr->ib_mr.rkey = mr->qplib_mr.lkey;
+ atomic_inc(&rdev->mr_count);
+
+ return &mr->ib_mr;
+fail:
+ kfree(pbl_tbl_orig);
+free_mrw:
+ bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+release_umem:
+ ib_umem_release(umem);
+free_mr:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_re_uctx_resp resp;
+ struct bnxt_re_ucontext *uctx;
+ struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ int rc;
+
+ dev_dbg(rdev_to_dev(rdev), "ABI version requested %d",
+ ibdev->uverbs_abi_ver);
+
+ if (ibdev->uverbs_abi_ver != BNXT_RE_ABI_VERSION) {
+ dev_dbg(rdev_to_dev(rdev), " is different from the device %d ",
+ BNXT_RE_ABI_VERSION);
+ return ERR_PTR(-EPERM);
+ }
+
+ uctx = kzalloc(sizeof(*uctx), GFP_KERNEL);
+ if (!uctx)
+ return ERR_PTR(-ENOMEM);
+
+ uctx->rdev = rdev;
+
+ uctx->shpg = (void *)__get_free_page(GFP_KERNEL);
+ if (!uctx->shpg) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ spin_lock_init(&uctx->sh_lock);
+
+ resp.dev_id = rdev->en_dev->pdev->devfn; /*Temp, Use idr_alloc instead*/
+ resp.max_qp = rdev->qplib_ctx.qpc_count;
+ resp.pg_size = PAGE_SIZE;
+ resp.cqe_sz = sizeof(struct cq_base);
+ resp.max_cqd = dev_attr->max_cq_wqes;
+ resp.rsvd = 0;
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to copy user context");
+ rc = -EFAULT;
+ goto cfail;
+ }
+
+ return &uctx->ib_uctx;
+cfail:
+ free_page((unsigned long)uctx->shpg);
+ uctx->shpg = NULL;
+fail:
+ kfree(uctx);
+ return ERR_PTR(rc);
+}
+
+int bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
+{
+ struct bnxt_re_ucontext *uctx = container_of(ib_uctx,
+ struct bnxt_re_ucontext,
+ ib_uctx);
+
+ struct bnxt_re_dev *rdev = uctx->rdev;
+ int rc = 0;
+
+ if (uctx->shpg)
+ free_page((unsigned long)uctx->shpg);
+
+ if (uctx->dpi.dbr) {
+ /* Free DPI only if this is the first PD allocated by the
+ * application and mark the context dpi as NULL
+ */
+ rc = bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
+ &rdev->qplib_res.dpi_tbl,
+ &uctx->dpi);
+ if (rc)
+ dev_err(rdev_to_dev(rdev), "Deallocte HW DPI failed!");
+ /* Don't fail, continue*/
+ uctx->dpi.dbr = NULL;
+ }
+
+ kfree(uctx);
+ return 0;
+}
+
+/* Helper function to mmap the virtual memory from user app */
+int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma)
+{
+ struct bnxt_re_ucontext *uctx = container_of(ib_uctx,
+ struct bnxt_re_ucontext,
+ ib_uctx);
+ struct bnxt_re_dev *rdev = uctx->rdev;
+ u64 pfn;
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ if (vma->vm_pgoff) {
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ PAGE_SIZE, vma->vm_page_prot)) {
+ dev_err(rdev_to_dev(rdev), "Failed to map DPI");
+ return -EAGAIN;
+ }
+ } else {
+ pfn = virt_to_phys(uctx->shpg) >> PAGE_SHIFT;
+ if (remap_pfn_range(vma, vma->vm_start,
+ pfn, PAGE_SIZE, vma->vm_page_prot)) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to map shared page");
+ return -EAGAIN;
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
new file mode 100644
index 000000000000..a0bb7e33d7ca
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -0,0 +1,206 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: IB Verbs interpreter (header)
+ */
+
+#ifndef __BNXT_RE_IB_VERBS_H__
+#define __BNXT_RE_IB_VERBS_H__
+
+struct bnxt_re_gid_ctx {
+ u32 idx;
+ u32 refcnt;
+};
+
+#define BNXT_RE_FENCE_BYTES 64
+struct bnxt_re_fence_data {
+ u32 size;
+ u8 va[BNXT_RE_FENCE_BYTES];
+ dma_addr_t dma_addr;
+ struct bnxt_re_mr *mr;
+ struct ib_mw *mw;
+ struct bnxt_qplib_swqe bind_wqe;
+ u32 bind_rkey;
+};
+
+struct bnxt_re_pd {
+ struct bnxt_re_dev *rdev;
+ struct ib_pd ib_pd;
+ struct bnxt_qplib_pd qplib_pd;
+ struct bnxt_re_fence_data fence;
+};
+
+struct bnxt_re_ah {
+ struct bnxt_re_dev *rdev;
+ struct ib_ah ib_ah;
+ struct bnxt_qplib_ah qplib_ah;
+};
+
+struct bnxt_re_qp {
+ struct list_head list;
+ struct bnxt_re_dev *rdev;
+ struct ib_qp ib_qp;
+ spinlock_t sq_lock; /* protect sq */
+ spinlock_t rq_lock; /* protect rq */
+ struct bnxt_qplib_qp qplib_qp;
+ struct ib_umem *sumem;
+ struct ib_umem *rumem;
+ /* QP1 */
+ u32 send_psn;
+ struct ib_ud_header qp1_hdr;
+};
+
+struct bnxt_re_cq {
+ struct bnxt_re_dev *rdev;
+ spinlock_t cq_lock; /* protect cq */
+ u16 cq_count;
+ u16 cq_period;
+ struct ib_cq ib_cq;
+ struct bnxt_qplib_cq qplib_cq;
+ struct bnxt_qplib_cqe *cql;
+#define MAX_CQL_PER_POLL 1024
+ u32 max_cql;
+ struct ib_umem *umem;
+};
+
+struct bnxt_re_mr {
+ struct bnxt_re_dev *rdev;
+ struct ib_mr ib_mr;
+ struct ib_umem *ib_umem;
+ struct bnxt_qplib_mrw qplib_mr;
+ u32 npages;
+ u64 *pages;
+ struct bnxt_qplib_frpl qplib_frpl;
+};
+
+struct bnxt_re_frpl {
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_frpl qplib_frpl;
+ u64 *page_list;
+};
+
+struct bnxt_re_fmr {
+ struct bnxt_re_dev *rdev;
+ struct ib_fmr ib_fmr;
+ struct bnxt_qplib_mrw qplib_fmr;
+};
+
+struct bnxt_re_mw {
+ struct bnxt_re_dev *rdev;
+ struct ib_mw ib_mw;
+ struct bnxt_qplib_mrw qplib_mw;
+};
+
+struct bnxt_re_ucontext {
+ struct bnxt_re_dev *rdev;
+ struct ib_ucontext ib_uctx;
+ struct bnxt_qplib_dpi dpi;
+ void *shpg;
+ spinlock_t sh_lock; /* protect shpg */
+};
+
+struct net_device *bnxt_re_get_netdev(struct ib_device *ibdev, u8 port_num);
+
+int bnxt_re_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *ib_attr,
+ struct ib_udata *udata);
+int bnxt_re_modify_device(struct ib_device *ibdev,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify);
+int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_attr *port_attr);
+int bnxt_re_modify_port(struct ib_device *ibdev, u8 port_num,
+ int port_modify_mask,
+ struct ib_port_modify *port_modify);
+int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable);
+int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
+ u16 index, u16 *pkey);
+int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
+ unsigned int index, void **context);
+int bnxt_re_add_gid(struct ib_device *ibdev, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr, void **context);
+int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
+ int index, union ib_gid *gid);
+enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
+ u8 port_num);
+struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int bnxt_re_dealloc_pd(struct ib_pd *pd);
+struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
+ struct ib_udata *udata);
+int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+int bnxt_re_destroy_ah(struct ib_ah *ah);
+struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
+int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_udata *udata);
+int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+int bnxt_re_destroy_qp(struct ib_qp *qp);
+int bnxt_re_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
+ struct ib_send_wr **bad_send_wr);
+int bnxt_re_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr);
+struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int bnxt_re_destroy_cq(struct ib_cq *cq);
+int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
+int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
+
+int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
+struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
+int bnxt_re_dereg_mr(struct ib_mr *mr);
+struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+ struct ib_udata *udata);
+int bnxt_re_dealloc_mw(struct ib_mw *mw);
+struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata);
+struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata);
+int bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
+int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+#endif /* __BNXT_RE_IB_VERBS_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
new file mode 100644
index 000000000000..ceae2d92fb08
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -0,0 +1,1312 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Main component of the bnxt_re driver
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <net/dcbnl.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <linux/if_ether.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_addr.h>
+
+#include "bnxt_ulp.h"
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_sp.h"
+#include "qplib_fp.h"
+#include "qplib_rcfw.h"
+#include "bnxt_re.h"
+#include "ib_verbs.h"
+#include <rdma/bnxt_re-abi.h>
+#include "bnxt.h"
+static char version[] =
+ BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n";
+
+MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
+MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(ROCE_DRV_MODULE_VERSION);
+
+/* globals */
+static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list);
+/* Mutex to protect the list of bnxt_re devices added */
+static DEFINE_MUTEX(bnxt_re_dev_lock);
+static struct workqueue_struct *bnxt_re_wq;
+
+/* for handling bnxt_en callbacks later */
+static void bnxt_re_stop(void *p)
+{
+}
+
+static void bnxt_re_start(void *p)
+{
+}
+
+static void bnxt_re_sriov_config(void *p, int num_vfs)
+{
+}
+
+static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
+ .ulp_async_notifier = NULL,
+ .ulp_stop = bnxt_re_stop,
+ .ulp_start = bnxt_re_start,
+ .ulp_sriov_config = bnxt_re_sriov_config
+};
+
+/* RoCE -> Net driver */
+
+/* Driver registration routines used to let the networking driver (bnxt_en)
+ * to know that the RoCE driver is now installed
+ */
+static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+ struct bnxt_en_dev *en_dev;
+ int rc;
+
+ if (!rdev)
+ return -EINVAL;
+
+ en_dev = rdev->en_dev;
+ /* Acquire rtnl lock if it is not invokded from netdev event */
+ if (lock_wait)
+ rtnl_lock();
+
+ rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev,
+ BNXT_ROCE_ULP);
+ if (lock_wait)
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev;
+ int rc = 0;
+
+ if (!rdev)
+ return -EINVAL;
+
+ en_dev = rdev->en_dev;
+
+ rtnl_lock();
+ rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP,
+ &bnxt_re_ulp_ops, rdev);
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+ struct bnxt_en_dev *en_dev;
+ int rc;
+
+ if (!rdev)
+ return -EINVAL;
+
+ en_dev = rdev->en_dev;
+
+ if (lock_wait)
+ rtnl_lock();
+
+ rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP);
+
+ if (lock_wait)
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
+{
+ int rc = 0, num_msix_want = BNXT_RE_MIN_MSIX, num_msix_got;
+ struct bnxt_en_dev *en_dev;
+
+ if (!rdev)
+ return -EINVAL;
+
+ en_dev = rdev->en_dev;
+
+ rtnl_lock();
+ num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP,
+ rdev->msix_entries,
+ num_msix_want);
+ if (num_msix_got < BNXT_RE_MIN_MSIX) {
+ rc = -EINVAL;
+ goto done;
+ }
+ if (num_msix_got != num_msix_want) {
+ dev_warn(rdev_to_dev(rdev),
+ "Requested %d MSI-X vectors, got %d\n",
+ num_msix_want, num_msix_got);
+ }
+ rdev->num_msix = num_msix_got;
+done:
+ rtnl_unlock();
+ return rc;
+}
+
+static void bnxt_re_init_hwrm_hdr(struct bnxt_re_dev *rdev, struct input *hdr,
+ u16 opcd, u16 crid, u16 trid)
+{
+ hdr->req_type = cpu_to_le16(opcd);
+ hdr->cmpl_ring = cpu_to_le16(crid);
+ hdr->target_id = cpu_to_le16(trid);
+}
+
+static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
+ int msg_len, void *resp, int resp_max_len,
+ int timeout)
+{
+ fw_msg->msg = msg;
+ fw_msg->msg_len = msg_len;
+ fw_msg->resp = resp;
+ fw_msg->resp_max_len = resp_max_len;
+ fw_msg->timeout = timeout;
+}
+
+static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id,
+ bool lock_wait)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_ring_free_input req = {0};
+ struct hwrm_ring_free_output resp;
+ struct bnxt_fw_msg fw_msg;
+ bool do_unlock = false;
+ int rc = -EINVAL;
+
+ if (!en_dev)
+ return rc;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ if (lock_wait) {
+ rtnl_lock();
+ do_unlock = true;
+ }
+
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
+ req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
+ req.ring_id = cpu_to_le16(fw_ring_id);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (rc)
+ dev_err(rdev_to_dev(rdev),
+ "Failed to free HW ring:%d :%#x", req.ring_id, rc);
+ if (do_unlock)
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
+ int pages, int type, u32 ring_mask,
+ u32 map_index, u16 *fw_ring_id)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_ring_alloc_input req = {0};
+ struct hwrm_ring_alloc_output resp;
+ struct bnxt_fw_msg fw_msg;
+ int rc = -EINVAL;
+
+ if (!en_dev)
+ return rc;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ rtnl_lock();
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1);
+ req.enables = 0;
+ req.page_tbl_addr = cpu_to_le64(dma_arr[0]);
+ if (pages > 1) {
+ /* Page size is in log2 units */
+ req.page_size = BNXT_PAGE_SHIFT;
+ req.page_tbl_depth = 1;
+ }
+ req.fbo = 0;
+ /* Association of ring index with doorbell index and MSIX number */
+ req.logical_id = cpu_to_le16(map_index);
+ req.length = cpu_to_le32(ring_mask + 1);
+ req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
+ req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (!rc)
+ *fw_ring_id = le16_to_cpu(resp.ring_id);
+
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
+ u32 fw_stats_ctx_id, bool lock_wait)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_stat_ctx_free_input req = {0};
+ struct bnxt_fw_msg fw_msg;
+ bool do_unlock = false;
+ int rc = -EINVAL;
+
+ if (!en_dev)
+ return rc;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ if (lock_wait) {
+ rtnl_lock();
+ do_unlock = true;
+ }
+
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1);
+ req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&req,
+ sizeof(req), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (rc)
+ dev_err(rdev_to_dev(rdev),
+ "Failed to free HW stats context %#x", rc);
+
+ if (do_unlock)
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
+ dma_addr_t dma_map,
+ u32 *fw_stats_ctx_id)
+{
+ struct hwrm_stat_ctx_alloc_output resp = {0};
+ struct hwrm_stat_ctx_alloc_input req = {0};
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct bnxt_fw_msg fw_msg;
+ int rc = -EINVAL;
+
+ *fw_stats_ctx_id = INVALID_STATS_CTX_ID;
+
+ if (!en_dev)
+ return rc;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ rtnl_lock();
+
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
+ req.update_period_ms = cpu_to_le32(1000);
+ req.stats_dma_addr = cpu_to_le64(dma_map);
+ req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (!rc)
+ *fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id);
+
+ rtnl_unlock();
+ return rc;
+}
+
+/* Device */
+
+static bool is_bnxt_re_dev(struct net_device *netdev)
+{
+ struct ethtool_drvinfo drvinfo;
+
+ if (netdev->ethtool_ops && netdev->ethtool_ops->get_drvinfo) {
+ memset(&drvinfo, 0, sizeof(drvinfo));
+ netdev->ethtool_ops->get_drvinfo(netdev, &drvinfo);
+
+ if (strcmp(drvinfo.driver, "bnxt_en"))
+ return false;
+ return true;
+ }
+ return false;
+}
+
+static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
+{
+ struct bnxt_re_dev *rdev;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rdev, &bnxt_re_dev_list, list) {
+ if (rdev->netdev == netdev) {
+ rcu_read_unlock();
+ return rdev;
+ }
+ }
+ rcu_read_unlock();
+ return NULL;
+}
+
+static void bnxt_re_dev_unprobe(struct net_device *netdev,
+ struct bnxt_en_dev *en_dev)
+{
+ dev_put(netdev);
+ module_put(en_dev->pdev->driver->driver.owner);
+}
+
+static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
+{
+ struct bnxt *bp = netdev_priv(netdev);
+ struct bnxt_en_dev *en_dev;
+ struct pci_dev *pdev;
+
+ /* Call bnxt_en's RoCE probe via indirect API */
+ if (!bp->ulp_probe)
+ return ERR_PTR(-EINVAL);
+
+ en_dev = bp->ulp_probe(netdev);
+ if (IS_ERR(en_dev))
+ return en_dev;
+
+ pdev = en_dev->pdev;
+ if (!pdev)
+ return ERR_PTR(-EINVAL);
+
+ if (!(en_dev->flags & BNXT_EN_FLAG_ROCE_CAP)) {
+ dev_dbg(&pdev->dev,
+ "%s: probe error: RoCE is not supported on this device",
+ ROCE_DRV_MODULE_NAME);
+ return ERR_PTR(-ENODEV);
+ }
+
+ /* Bump net device reference count */
+ if (!try_module_get(pdev->driver->driver.owner))
+ return ERR_PTR(-ENODEV);
+
+ dev_hold(netdev);
+
+ return en_dev;
+}
+
+static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
+{
+ ib_unregister_device(&rdev->ibdev);
+}
+
+static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
+{
+ struct ib_device *ibdev = &rdev->ibdev;
+
+ /* ib device init */
+ ibdev->owner = THIS_MODULE;
+ ibdev->node_type = RDMA_NODE_IB_CA;
+ strlcpy(ibdev->name, "bnxt_re%d", IB_DEVICE_NAME_MAX);
+ strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
+ strlen(BNXT_RE_DESC) + 5);
+ ibdev->phys_port_cnt = 1;
+
+ bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ibdev->node_guid);
+
+ ibdev->num_comp_vectors = 1;
+ ibdev->dev.parent = &rdev->en_dev->pdev->dev;
+ ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
+
+ /* User space */
+ ibdev->uverbs_abi_ver = BNXT_RE_ABI_VERSION;
+ ibdev->uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_REREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_AH) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH);
+ /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */
+
+ /* Kernel verbs */
+ ibdev->query_device = bnxt_re_query_device;
+ ibdev->modify_device = bnxt_re_modify_device;
+
+ ibdev->query_port = bnxt_re_query_port;
+ ibdev->modify_port = bnxt_re_modify_port;
+ ibdev->get_port_immutable = bnxt_re_get_port_immutable;
+ ibdev->query_pkey = bnxt_re_query_pkey;
+ ibdev->query_gid = bnxt_re_query_gid;
+ ibdev->get_netdev = bnxt_re_get_netdev;
+ ibdev->add_gid = bnxt_re_add_gid;
+ ibdev->del_gid = bnxt_re_del_gid;
+ ibdev->get_link_layer = bnxt_re_get_link_layer;
+
+ ibdev->alloc_pd = bnxt_re_alloc_pd;
+ ibdev->dealloc_pd = bnxt_re_dealloc_pd;
+
+ ibdev->create_ah = bnxt_re_create_ah;
+ ibdev->modify_ah = bnxt_re_modify_ah;
+ ibdev->query_ah = bnxt_re_query_ah;
+ ibdev->destroy_ah = bnxt_re_destroy_ah;
+
+ ibdev->create_qp = bnxt_re_create_qp;
+ ibdev->modify_qp = bnxt_re_modify_qp;
+ ibdev->query_qp = bnxt_re_query_qp;
+ ibdev->destroy_qp = bnxt_re_destroy_qp;
+
+ ibdev->post_send = bnxt_re_post_send;
+ ibdev->post_recv = bnxt_re_post_recv;
+
+ ibdev->create_cq = bnxt_re_create_cq;
+ ibdev->destroy_cq = bnxt_re_destroy_cq;
+ ibdev->poll_cq = bnxt_re_poll_cq;
+ ibdev->req_notify_cq = bnxt_re_req_notify_cq;
+
+ ibdev->get_dma_mr = bnxt_re_get_dma_mr;
+ ibdev->dereg_mr = bnxt_re_dereg_mr;
+ ibdev->alloc_mr = bnxt_re_alloc_mr;
+ ibdev->map_mr_sg = bnxt_re_map_mr_sg;
+
+ ibdev->reg_user_mr = bnxt_re_reg_user_mr;
+ ibdev->alloc_ucontext = bnxt_re_alloc_ucontext;
+ ibdev->dealloc_ucontext = bnxt_re_dealloc_ucontext;
+ ibdev->mmap = bnxt_re_mmap;
+
+ return ib_register_device(ibdev, NULL);
+}
+
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
+}
+
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->dev_attr.fw_ver);
+}
+
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
+}
+
+static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
+static DEVICE_ATTR(fw_rev, 0444, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
+
+static struct device_attribute *bnxt_re_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_fw_rev,
+ &dev_attr_hca_type
+};
+
+static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
+{
+ dev_put(rdev->netdev);
+ rdev->netdev = NULL;
+
+ mutex_lock(&bnxt_re_dev_lock);
+ list_del_rcu(&rdev->list);
+ mutex_unlock(&bnxt_re_dev_lock);
+
+ synchronize_rcu();
+ flush_workqueue(bnxt_re_wq);
+
+ ib_dealloc_device(&rdev->ibdev);
+ /* rdev is gone */
+}
+
+static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
+ struct bnxt_en_dev *en_dev)
+{
+ struct bnxt_re_dev *rdev;
+
+ /* Allocate bnxt_re_dev instance here */
+ rdev = (struct bnxt_re_dev *)ib_alloc_device(sizeof(*rdev));
+ if (!rdev) {
+ dev_err(NULL, "%s: bnxt_re_dev allocation failure!",
+ ROCE_DRV_MODULE_NAME);
+ return NULL;
+ }
+ /* Default values */
+ rdev->netdev = netdev;
+ dev_hold(rdev->netdev);
+ rdev->en_dev = en_dev;
+ rdev->id = rdev->en_dev->pdev->devfn;
+ INIT_LIST_HEAD(&rdev->qp_list);
+ mutex_init(&rdev->qp_lock);
+ atomic_set(&rdev->qp_count, 0);
+ atomic_set(&rdev->cq_count, 0);
+ atomic_set(&rdev->srq_count, 0);
+ atomic_set(&rdev->mr_count, 0);
+ atomic_set(&rdev->mw_count, 0);
+ rdev->cosq[0] = 0xFFFF;
+ rdev->cosq[1] = 0xFFFF;
+
+ mutex_lock(&bnxt_re_dev_lock);
+ list_add_tail_rcu(&rdev->list, &bnxt_re_dev_list);
+ mutex_unlock(&bnxt_re_dev_lock);
+ return rdev;
+}
+
+static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
+ struct creq_func_event *aeqe)
+{
+ switch (aeqe->event) {
+ case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_cq *handle)
+{
+ struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq,
+ qplib_cq);
+
+ if (!cq) {
+ dev_err(NULL, "%s: CQ is NULL, CQN not handled",
+ ROCE_DRV_MODULE_NAME);
+ return -EINVAL;
+ }
+ if (cq->ib_cq.comp_handler) {
+ /* Lock comp_handler? */
+ (*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context);
+ }
+
+ return 0;
+}
+
+static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
+{
+ if (rdev->nq.hwq.max_elements)
+ bnxt_qplib_disable_nq(&rdev->nq);
+
+ if (rdev->qplib_res.rcfw)
+ bnxt_qplib_cleanup_res(&rdev->qplib_res);
+}
+
+static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
+{
+ int rc = 0;
+
+ bnxt_qplib_init_res(&rdev->qplib_res);
+
+ if (rdev->msix_entries[BNXT_RE_NQ_IDX].vector <= 0)
+ return -EINVAL;
+
+ rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq,
+ rdev->msix_entries[BNXT_RE_NQ_IDX].vector,
+ rdev->msix_entries[BNXT_RE_NQ_IDX].db_offset,
+ &bnxt_re_cqn_handler,
+ NULL);
+
+ if (rc)
+ dev_err(rdev_to_dev(rdev), "Failed to enable NQ: %#x", rc);
+
+ return rc;
+}
+
+static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+ if (rdev->nq.hwq.max_elements) {
+ bnxt_re_net_ring_free(rdev, rdev->nq.ring_id, lock_wait);
+ bnxt_qplib_free_nq(&rdev->nq);
+ }
+ if (rdev->qplib_res.dpi_tbl.max) {
+ bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
+ &rdev->qplib_res.dpi_tbl,
+ &rdev->dpi_privileged);
+ }
+ if (rdev->qplib_res.rcfw) {
+ bnxt_qplib_free_res(&rdev->qplib_res);
+ rdev->qplib_res.rcfw = NULL;
+ }
+}
+
+static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
+{
+ int rc = 0;
+
+ /* Configure and allocate resources for qplib */
+ rdev->qplib_res.rcfw = &rdev->rcfw;
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev,
+ rdev->netdev, &rdev->dev_attr);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl,
+ &rdev->dpi_privileged,
+ rdev);
+ if (rc)
+ goto fail;
+
+ rdev->nq.hwq.max_elements = BNXT_RE_MAX_CQ_COUNT +
+ BNXT_RE_MAX_SRQC_COUNT + 2;
+ rc = bnxt_qplib_alloc_nq(rdev->en_dev->pdev, &rdev->nq);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to allocate NQ memory: %#x", rc);
+ goto fail;
+ }
+ rc = bnxt_re_net_ring_alloc
+ (rdev, rdev->nq.hwq.pbl[PBL_LVL_0].pg_map_arr,
+ rdev->nq.hwq.pbl[rdev->nq.hwq.level].pg_count,
+ HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_NQE_MAX_CNT - 1,
+ rdev->msix_entries[BNXT_RE_NQ_IDX].ring_idx,
+ &rdev->nq.ring_id);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to allocate NQ ring: %#x", rc);
+ goto free_nq;
+ }
+ return 0;
+free_nq:
+ bnxt_qplib_free_nq(&rdev->nq);
+fail:
+ rdev->qplib_res.rcfw = NULL;
+ return rc;
+}
+
+static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
+ u8 port_num, enum ib_event_type event)
+{
+ struct ib_event ib_event;
+
+ ib_event.device = ibdev;
+ if (qp)
+ ib_event.element.qp = qp;
+ else
+ ib_event.element.port_num = port_num;
+ ib_event.event = event;
+ ib_dispatch_event(&ib_event);
+}
+
+#define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN 0x02
+static int bnxt_re_query_hwrm_pri2cos(struct bnxt_re_dev *rdev, u8 dir,
+ u64 *cid_map)
+{
+ struct hwrm_queue_pri2cos_qcfg_input req = {0};
+ struct bnxt *bp = netdev_priv(rdev->netdev);
+ struct hwrm_queue_pri2cos_qcfg_output resp;
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct bnxt_fw_msg fw_msg;
+ u32 flags = 0;
+ u8 *qcfgmap, *tmp_map;
+ int rc = 0, i;
+
+ if (!cid_map)
+ return -EINVAL;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
+ HWRM_QUEUE_PRI2COS_QCFG, -1, -1);
+ flags |= (dir & 0x01);
+ flags |= HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN;
+ req.flags = cpu_to_le32(flags);
+ req.port_id = bp->pf.port_id;
+
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (rc)
+ return rc;
+
+ if (resp.queue_cfg_info) {
+ dev_warn(rdev_to_dev(rdev),
+ "Asymmetric cos queue configuration detected");
+ dev_warn(rdev_to_dev(rdev),
+ " on device, QoS may not be fully functional\n");
+ }
+ qcfgmap = &resp.pri0_cos_queue_id;
+ tmp_map = (u8 *)cid_map;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ tmp_map[i] = qcfgmap[i];
+
+ return rc;
+}
+
+static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev,
+ struct bnxt_re_qp *qp)
+{
+ return (qp->ib_qp.qp_type == IB_QPT_GSI) || (qp == rdev->qp1_sqp);
+}
+
+static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev)
+{
+ int mask = IB_QP_STATE;
+ struct ib_qp_attr qp_attr;
+ struct bnxt_re_qp *qp;
+
+ qp_attr.qp_state = IB_QPS_ERR;
+ mutex_lock(&rdev->qp_lock);
+ list_for_each_entry(qp, &rdev->qp_list, list) {
+ /* Modify the state of all QPs except QP1/Shadow QP */
+ if (!bnxt_re_is_qp1_or_shadow_qp(rdev, qp)) {
+ if (qp->qplib_qp.state !=
+ CMDQ_MODIFY_QP_NEW_STATE_RESET &&
+ qp->qplib_qp.state !=
+ CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp,
+ 1, IB_EVENT_QP_FATAL);
+ bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, mask,
+ NULL);
+ }
+ }
+ }
+ mutex_unlock(&rdev->qp_lock);
+}
+
+static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev)
+{
+ u32 prio_map = 0, tmp_map = 0;
+ struct net_device *netdev;
+ struct dcb_app app;
+
+ netdev = rdev->netdev;
+
+ memset(&app, 0, sizeof(app));
+ app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
+ app.protocol = ETH_P_IBOE;
+ tmp_map = dcb_ieee_getapp_mask(netdev, &app);
+ prio_map = tmp_map;
+
+ app.selector = IEEE_8021QAZ_APP_SEL_DGRAM;
+ app.protocol = ROCE_V2_UDP_DPORT;
+ tmp_map = dcb_ieee_getapp_mask(netdev, &app);
+ prio_map |= tmp_map;
+
+ if (!prio_map)
+ prio_map = -EFAULT;
+ return prio_map;
+}
+
+static void bnxt_re_parse_cid_map(u8 prio_map, u8 *cid_map, u16 *cosq)
+{
+ u16 prio;
+ u8 id;
+
+ for (prio = 0, id = 0; prio < 8; prio++) {
+ if (prio_map & (1 << prio)) {
+ cosq[id] = cid_map[prio];
+ id++;
+ if (id == 2) /* Max 2 tcs supported */
+ break;
+ }
+ }
+}
+
+static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
+{
+ u8 prio_map = 0;
+ u64 cid_map;
+ int rc;
+
+ /* Get priority for roce */
+ rc = bnxt_re_get_priority_mask(rdev);
+ if (rc < 0)
+ return rc;
+ prio_map = (u8)rc;
+
+ if (prio_map == rdev->cur_prio_map)
+ return 0;
+ rdev->cur_prio_map = prio_map;
+ /* Get cosq id for this priority */
+ rc = bnxt_re_query_hwrm_pri2cos(rdev, 0, &cid_map);
+ if (rc) {
+ dev_warn(rdev_to_dev(rdev), "no cos for p_mask %x\n", prio_map);
+ return rc;
+ }
+ /* Parse CoS IDs for app priority */
+ bnxt_re_parse_cid_map(prio_map, (u8 *)&cid_map, rdev->cosq);
+
+ /* Config BONO. */
+ rc = bnxt_qplib_map_tc2cos(&rdev->qplib_res, rdev->cosq);
+ if (rc) {
+ dev_warn(rdev_to_dev(rdev), "no tc for cos{%x, %x}\n",
+ rdev->cosq[0], rdev->cosq[1]);
+ return rc;
+ }
+
+ return 0;
+}
+
+static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+ int i, rc;
+
+ if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
+ for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++)
+ device_remove_file(&rdev->ibdev.dev,
+ bnxt_re_attributes[i]);
+ /* Cleanup ib dev */
+ bnxt_re_unregister_ib(rdev);
+ }
+ if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
+ cancel_delayed_work(&rdev->worker);
+
+ bnxt_re_cleanup_res(rdev);
+ bnxt_re_free_res(rdev, lock_wait);
+
+ if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
+ rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
+ if (rc)
+ dev_warn(rdev_to_dev(rdev),
+ "Failed to deinitialize RCFW: %#x", rc);
+ bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id,
+ lock_wait);
+ bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
+ bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
+ bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, lock_wait);
+ bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
+ }
+ if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) {
+ rc = bnxt_re_free_msix(rdev, lock_wait);
+ if (rc)
+ dev_warn(rdev_to_dev(rdev),
+ "Failed to free MSI-X vectors: %#x", rc);
+ }
+ if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
+ rc = bnxt_re_unregister_netdev(rdev, lock_wait);
+ if (rc)
+ dev_warn(rdev_to_dev(rdev),
+ "Failed to unregister with netdev: %#x", rc);
+ }
+}
+
+static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
+{
+ u32 i;
+
+ rdev->qplib_ctx.qpc_count = BNXT_RE_MAX_QPC_COUNT;
+ rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT;
+ rdev->qplib_ctx.srqc_count = BNXT_RE_MAX_SRQC_COUNT;
+ rdev->qplib_ctx.cq_count = BNXT_RE_MAX_CQ_COUNT;
+ for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
+ rdev->qplib_ctx.tqm_count[i] =
+ rdev->dev_attr.tqm_alloc_reqs[i];
+}
+
+/* worker thread for polling periodic events. Now used for QoS programming*/
+static void bnxt_re_worker(struct work_struct *work)
+{
+ struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
+ worker.work);
+
+ bnxt_re_setup_qos(rdev);
+ schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
+}
+
+static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
+{
+ int i, j, rc;
+
+ /* Registered a new RoCE device instance to netdev */
+ rc = bnxt_re_register_netdev(rdev);
+ if (rc) {
+ pr_err("Failed to register with netedev: %#x\n", rc);
+ return -EINVAL;
+ }
+ set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+
+ rc = bnxt_re_request_msix(rdev);
+ if (rc) {
+ pr_err("Failed to get MSI-X vectors: %#x\n", rc);
+ rc = -EINVAL;
+ goto fail;
+ }
+ set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags);
+
+ /* Establish RCFW Communication Channel to initialize the context
+ * memory for the function and all child VFs
+ */
+ rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_re_net_ring_alloc
+ (rdev, rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr,
+ rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count,
+ HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_CREQE_MAX_CNT - 1,
+ rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx,
+ &rdev->rcfw.creq_ring_id);
+ if (rc) {
+ pr_err("Failed to allocate CREQ: %#x\n", rc);
+ goto free_rcfw;
+ }
+ rc = bnxt_qplib_enable_rcfw_channel
+ (rdev->en_dev->pdev, &rdev->rcfw,
+ rdev->msix_entries[BNXT_RE_AEQ_IDX].vector,
+ rdev->msix_entries[BNXT_RE_AEQ_IDX].db_offset,
+ 0, &bnxt_re_aeq_handler);
+ if (rc) {
+ pr_err("Failed to enable RCFW channel: %#x\n", rc);
+ goto free_ring;
+ }
+
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
+ if (rc)
+ goto disable_rcfw;
+ bnxt_re_set_resource_limits(rdev);
+
+ rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0);
+ if (rc) {
+ pr_err("Failed to allocate QPLIB context: %#x\n", rc);
+ goto disable_rcfw;
+ }
+ rc = bnxt_re_net_stats_ctx_alloc(rdev,
+ rdev->qplib_ctx.stats.dma_map,
+ &rdev->qplib_ctx.stats.fw_id);
+ if (rc) {
+ pr_err("Failed to allocate stats context: %#x\n", rc);
+ goto free_ctx;
+ }
+
+ rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx, 0);
+ if (rc) {
+ pr_err("Failed to initialize RCFW: %#x\n", rc);
+ goto free_sctx;
+ }
+ set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
+
+ /* Resources based on the 'new' device caps */
+ rc = bnxt_re_alloc_res(rdev);
+ if (rc) {
+ pr_err("Failed to allocate resources: %#x\n", rc);
+ goto fail;
+ }
+ rc = bnxt_re_init_res(rdev);
+ if (rc) {
+ pr_err("Failed to initialize resources: %#x\n", rc);
+ goto fail;
+ }
+
+ rc = bnxt_re_setup_qos(rdev);
+ if (rc)
+ pr_info("RoCE priority not yet configured\n");
+
+ INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
+ set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
+ schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
+
+ /* Register ib dev */
+ rc = bnxt_re_register_ib(rdev);
+ if (rc) {
+ pr_err("Failed to register with IB: %#x\n", rc);
+ goto fail;
+ }
+ dev_info(rdev_to_dev(rdev), "Device registered successfully");
+ for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
+ rc = device_create_file(&rdev->ibdev.dev,
+ bnxt_re_attributes[i]);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to create IB sysfs: %#x", rc);
+ /* Must clean up all created device files */
+ for (j = 0; j < i; j++)
+ device_remove_file(&rdev->ibdev.dev,
+ bnxt_re_attributes[j]);
+ bnxt_re_unregister_ib(rdev);
+ goto fail;
+ }
+ }
+ set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
+
+ return 0;
+free_sctx:
+ bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id, true);
+free_ctx:
+ bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
+disable_rcfw:
+ bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
+free_ring:
+ bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, true);
+free_rcfw:
+ bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
+fail:
+ bnxt_re_ib_unreg(rdev, true);
+ return rc;
+}
+
+static void bnxt_re_dev_unreg(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct net_device *netdev = rdev->netdev;
+
+ bnxt_re_dev_remove(rdev);
+
+ if (netdev)
+ bnxt_re_dev_unprobe(netdev, en_dev);
+}
+
+static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct net_device *netdev)
+{
+ struct bnxt_en_dev *en_dev;
+ int rc = 0;
+
+ if (!is_bnxt_re_dev(netdev))
+ return -ENODEV;
+
+ en_dev = bnxt_re_dev_probe(netdev);
+ if (IS_ERR(en_dev)) {
+ if (en_dev != ERR_PTR(-ENODEV))
+ pr_err("%s: Failed to probe\n", ROCE_DRV_MODULE_NAME);
+ rc = PTR_ERR(en_dev);
+ goto exit;
+ }
+ *rdev = bnxt_re_dev_add(netdev, en_dev);
+ if (!*rdev) {
+ rc = -ENOMEM;
+ bnxt_re_dev_unprobe(netdev, en_dev);
+ goto exit;
+ }
+exit:
+ return rc;
+}
+
+static void bnxt_re_remove_one(struct bnxt_re_dev *rdev)
+{
+ pci_dev_put(rdev->en_dev->pdev);
+}
+
+/* Handle all deferred netevents tasks */
+static void bnxt_re_task(struct work_struct *work)
+{
+ struct bnxt_re_work *re_work;
+ struct bnxt_re_dev *rdev;
+ int rc = 0;
+
+ re_work = container_of(work, struct bnxt_re_work, work);
+ rdev = re_work->rdev;
+
+ if (re_work->event != NETDEV_REGISTER &&
+ !test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
+ return;
+
+ switch (re_work->event) {
+ case NETDEV_REGISTER:
+ rc = bnxt_re_ib_reg(rdev);
+ if (rc)
+ dev_err(rdev_to_dev(rdev),
+ "Failed to register with IB: %#x", rc);
+ break;
+ case NETDEV_UP:
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
+ IB_EVENT_PORT_ACTIVE);
+ break;
+ case NETDEV_DOWN:
+ bnxt_re_dev_stop(rdev);
+ break;
+ case NETDEV_CHANGE:
+ if (!netif_carrier_ok(rdev->netdev))
+ bnxt_re_dev_stop(rdev);
+ else if (netif_carrier_ok(rdev->netdev))
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
+ IB_EVENT_PORT_ACTIVE);
+ break;
+ default:
+ break;
+ }
+ kfree(re_work);
+}
+
+static void bnxt_re_init_one(struct bnxt_re_dev *rdev)
+{
+ pci_dev_get(rdev->en_dev->pdev);
+}
+
+/*
+ * "Notifier chain callback can be invoked for the same chain from
+ * different CPUs at the same time".
+ *
+ * For cases when the netdev is already present, our call to the
+ * register_netdevice_notifier() will actually get the rtnl_lock()
+ * before sending NETDEV_REGISTER and (if up) NETDEV_UP
+ * events.
+ *
+ * But for cases when the netdev is not already present, the notifier
+ * chain is subjected to be invoked from different CPUs simultaneously.
+ *
+ * This is protected by the netdev_mutex.
+ */
+static int bnxt_re_netdev_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr);
+ struct bnxt_re_work *re_work;
+ struct bnxt_re_dev *rdev;
+ int rc = 0;
+ bool sch_work = false;
+
+ real_dev = rdma_vlan_dev_real_dev(netdev);
+ if (!real_dev)
+ real_dev = netdev;
+
+ rdev = bnxt_re_from_netdev(real_dev);
+ if (!rdev && event != NETDEV_REGISTER)
+ goto exit;
+ if (real_dev != netdev)
+ goto exit;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ if (rdev)
+ break;
+ rc = bnxt_re_dev_reg(&rdev, real_dev);
+ if (rc == -ENODEV)
+ break;
+ if (rc) {
+ pr_err("Failed to register with the device %s: %#x\n",
+ real_dev->name, rc);
+ break;
+ }
+ bnxt_re_init_one(rdev);
+ sch_work = true;
+ break;
+
+ case NETDEV_UNREGISTER:
+ bnxt_re_ib_unreg(rdev, false);
+ bnxt_re_remove_one(rdev);
+ bnxt_re_dev_unreg(rdev);
+ break;
+
+ default:
+ sch_work = true;
+ break;
+ }
+ if (sch_work) {
+ /* Allocate for the deferred task */
+ re_work = kzalloc(sizeof(*re_work), GFP_ATOMIC);
+ if (re_work) {
+ re_work->rdev = rdev;
+ re_work->event = event;
+ re_work->vlan_dev = (real_dev == netdev ?
+ NULL : netdev);
+ INIT_WORK(&re_work->work, bnxt_re_task);
+ queue_work(bnxt_re_wq, &re_work->work);
+ }
+ }
+
+exit:
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block bnxt_re_netdev_notifier = {
+ .notifier_call = bnxt_re_netdev_event
+};
+
+static int __init bnxt_re_mod_init(void)
+{
+ int rc = 0;
+
+ pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version);
+
+ bnxt_re_wq = create_singlethread_workqueue("bnxt_re");
+ if (!bnxt_re_wq)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&bnxt_re_dev_list);
+
+ rc = register_netdevice_notifier(&bnxt_re_netdev_notifier);
+ if (rc) {
+ pr_err("%s: Cannot register to netdevice_notifier",
+ ROCE_DRV_MODULE_NAME);
+ goto err_netdev;
+ }
+ return 0;
+
+err_netdev:
+ destroy_workqueue(bnxt_re_wq);
+
+ return rc;
+}
+
+static void __exit bnxt_re_mod_exit(void)
+{
+ unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
+ if (bnxt_re_wq)
+ destroy_workqueue(bnxt_re_wq);
+}
+
+module_init(bnxt_re_mod_init);
+module_exit(bnxt_re_mod_exit);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
new file mode 100644
index 000000000000..9af1514e5944
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -0,0 +1,2210 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Fast Path Operators
+ */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/prefetch.h>
+
+#include "roce_hsi.h"
+
+#include "qplib_res.h"
+#include "qplib_rcfw.h"
+#include "qplib_sp.h"
+#include "qplib_fp.h"
+
+static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq);
+
+static void bnxt_qplib_free_qp_hdr_buf(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct bnxt_qplib_q *sq = &qp->sq;
+
+ if (qp->rq_hdr_buf)
+ dma_free_coherent(&res->pdev->dev,
+ rq->hwq.max_elements * qp->rq_hdr_buf_size,
+ qp->rq_hdr_buf, qp->rq_hdr_buf_map);
+ if (qp->sq_hdr_buf)
+ dma_free_coherent(&res->pdev->dev,
+ sq->hwq.max_elements * qp->sq_hdr_buf_size,
+ qp->sq_hdr_buf, qp->sq_hdr_buf_map);
+ qp->rq_hdr_buf = NULL;
+ qp->sq_hdr_buf = NULL;
+ qp->rq_hdr_buf_map = 0;
+ qp->sq_hdr_buf_map = 0;
+ qp->sq_hdr_buf_size = 0;
+ qp->rq_hdr_buf_size = 0;
+}
+
+static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct bnxt_qplib_q *sq = &qp->rq;
+ int rc = 0;
+
+ if (qp->sq_hdr_buf_size && sq->hwq.max_elements) {
+ qp->sq_hdr_buf = dma_alloc_coherent(&res->pdev->dev,
+ sq->hwq.max_elements *
+ qp->sq_hdr_buf_size,
+ &qp->sq_hdr_buf_map, GFP_KERNEL);
+ if (!qp->sq_hdr_buf) {
+ rc = -ENOMEM;
+ dev_err(&res->pdev->dev,
+ "QPLIB: Failed to create sq_hdr_buf");
+ goto fail;
+ }
+ }
+
+ if (qp->rq_hdr_buf_size && rq->hwq.max_elements) {
+ qp->rq_hdr_buf = dma_alloc_coherent(&res->pdev->dev,
+ rq->hwq.max_elements *
+ qp->rq_hdr_buf_size,
+ &qp->rq_hdr_buf_map,
+ GFP_KERNEL);
+ if (!qp->rq_hdr_buf) {
+ rc = -ENOMEM;
+ dev_err(&res->pdev->dev,
+ "QPLIB: Failed to create rq_hdr_buf");
+ goto fail;
+ }
+ }
+ return 0;
+
+fail:
+ bnxt_qplib_free_qp_hdr_buf(res, qp);
+ return rc;
+}
+
+static void bnxt_qplib_service_nq(unsigned long data)
+{
+ struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data;
+ struct bnxt_qplib_hwq *hwq = &nq->hwq;
+ struct nq_base *nqe, **nq_ptr;
+ int num_cqne_processed = 0;
+ u32 sw_cons, raw_cons;
+ u16 type;
+ int budget = nq->budget;
+ u64 q_handle;
+
+ /* Service the NQ until empty */
+ raw_cons = hwq->cons;
+ while (budget--) {
+ sw_cons = HWQ_CMP(raw_cons, hwq);
+ nq_ptr = (struct nq_base **)hwq->pbl_ptr;
+ nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)];
+ if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements))
+ break;
+
+ type = le16_to_cpu(nqe->info10_type) & NQ_BASE_TYPE_MASK;
+ switch (type) {
+ case NQ_BASE_TYPE_CQ_NOTIFICATION:
+ {
+ struct nq_cn *nqcne = (struct nq_cn *)nqe;
+
+ q_handle = le32_to_cpu(nqcne->cq_handle_low);
+ q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high)
+ << 32;
+ bnxt_qplib_arm_cq_enable((struct bnxt_qplib_cq *)
+ ((unsigned long)q_handle));
+ if (!nq->cqn_handler(nq, (struct bnxt_qplib_cq *)
+ ((unsigned long)q_handle)))
+ num_cqne_processed++;
+ else
+ dev_warn(&nq->pdev->dev,
+ "QPLIB: cqn - type 0x%x not handled",
+ type);
+ break;
+ }
+ case NQ_BASE_TYPE_DBQ_EVENT:
+ break;
+ default:
+ dev_warn(&nq->pdev->dev,
+ "QPLIB: nqe with type = 0x%x not handled",
+ type);
+ break;
+ }
+ raw_cons++;
+ }
+ if (hwq->cons != raw_cons) {
+ hwq->cons = raw_cons;
+ NQ_DB_REARM(nq->bar_reg_iomem, hwq->cons, hwq->max_elements);
+ }
+}
+
+static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
+{
+ struct bnxt_qplib_nq *nq = dev_instance;
+ struct bnxt_qplib_hwq *hwq = &nq->hwq;
+ struct nq_base **nq_ptr;
+ u32 sw_cons;
+
+ /* Prefetch the NQ element */
+ sw_cons = HWQ_CMP(hwq->cons, hwq);
+ nq_ptr = (struct nq_base **)nq->hwq.pbl_ptr;
+ prefetch(&nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]);
+
+ /* Fan out to CPU affinitized kthreads? */
+ tasklet_schedule(&nq->worker);
+
+ return IRQ_HANDLED;
+}
+
+void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
+{
+ /* Make sure the HW is stopped! */
+ synchronize_irq(nq->vector);
+ tasklet_disable(&nq->worker);
+ tasklet_kill(&nq->worker);
+
+ if (nq->requested) {
+ free_irq(nq->vector, nq);
+ nq->requested = false;
+ }
+ if (nq->bar_reg_iomem)
+ iounmap(nq->bar_reg_iomem);
+ nq->bar_reg_iomem = NULL;
+
+ nq->cqn_handler = NULL;
+ nq->srqn_handler = NULL;
+ nq->vector = 0;
+}
+
+int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
+ int msix_vector, int bar_reg_offset,
+ int (*cqn_handler)(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_cq *),
+ int (*srqn_handler)(struct bnxt_qplib_nq *nq,
+ void *, u8 event))
+{
+ resource_size_t nq_base;
+ int rc;
+
+ nq->pdev = pdev;
+ nq->vector = msix_vector;
+
+ nq->cqn_handler = cqn_handler;
+
+ nq->srqn_handler = srqn_handler;
+
+ tasklet_init(&nq->worker, bnxt_qplib_service_nq, (unsigned long)nq);
+
+ nq->requested = false;
+ rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, "bnxt_qplib_nq", nq);
+ if (rc) {
+ dev_err(&nq->pdev->dev,
+ "Failed to request IRQ for NQ: %#x", rc);
+ bnxt_qplib_disable_nq(nq);
+ goto fail;
+ }
+ nq->requested = true;
+ nq->bar_reg = NQ_CONS_PCI_BAR_REGION;
+ nq->bar_reg_off = bar_reg_offset;
+ nq_base = pci_resource_start(pdev, nq->bar_reg);
+ if (!nq_base) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ nq->bar_reg_iomem = ioremap_nocache(nq_base + nq->bar_reg_off, 4);
+ if (!nq->bar_reg_iomem) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ NQ_DB_REARM(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements);
+
+ return 0;
+fail:
+ bnxt_qplib_disable_nq(nq);
+ return rc;
+}
+
+void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq)
+{
+ if (nq->hwq.max_elements)
+ bnxt_qplib_free_hwq(nq->pdev, &nq->hwq);
+}
+
+int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq)
+{
+ nq->pdev = pdev;
+ if (!nq->hwq.max_elements ||
+ nq->hwq.max_elements > BNXT_QPLIB_NQE_MAX_CNT)
+ nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
+
+ if (bnxt_qplib_alloc_init_hwq(nq->pdev, &nq->hwq, NULL, 0,
+ &nq->hwq.max_elements,
+ BNXT_QPLIB_MAX_NQE_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_L2_CMPL))
+ return -ENOMEM;
+
+ nq->budget = 8;
+ return 0;
+}
+
+/* QP */
+int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_create_qp1 req;
+ struct creq_create_qp1_resp resp;
+ struct bnxt_qplib_pbl *pbl;
+ struct bnxt_qplib_q *sq = &qp->sq;
+ struct bnxt_qplib_q *rq = &qp->rq;
+ int rc;
+ u16 cmd_flags = 0;
+ u32 qp_flags = 0;
+
+ RCFW_CMD_PREP(req, CREATE_QP1, cmd_flags);
+
+ /* General */
+ req.type = qp->type;
+ req.dpi = cpu_to_le32(qp->dpi->dpi);
+ req.qp_handle = cpu_to_le64(qp->qp_handle);
+
+ /* SQ */
+ sq->hwq.max_elements = sq->max_wqe;
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, NULL, 0,
+ &sq->hwq.max_elements,
+ BNXT_QPLIB_MAX_SQE_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_QUEUE);
+ if (rc)
+ goto exit;
+
+ sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL);
+ if (!sq->swq) {
+ rc = -ENOMEM;
+ goto fail_sq;
+ }
+ pbl = &sq->hwq.pbl[PBL_LVL_0];
+ req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+ req.sq_pg_size_sq_lvl =
+ ((sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK)
+ << CMDQ_CREATE_QP1_SQ_LVL_SFT) |
+ (pbl->pg_size == ROCE_PG_SIZE_4K ?
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K :
+ pbl->pg_size == ROCE_PG_SIZE_8K ?
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8K :
+ pbl->pg_size == ROCE_PG_SIZE_64K ?
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_64K :
+ pbl->pg_size == ROCE_PG_SIZE_2M ?
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_2M :
+ pbl->pg_size == ROCE_PG_SIZE_8M ?
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8M :
+ pbl->pg_size == ROCE_PG_SIZE_1G ?
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_1G :
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K);
+
+ if (qp->scq)
+ req.scq_cid = cpu_to_le32(qp->scq->id);
+
+ qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE;
+
+ /* RQ */
+ if (rq->max_wqe) {
+ rq->hwq.max_elements = qp->rq.max_wqe;
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, NULL, 0,
+ &rq->hwq.max_elements,
+ BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_QUEUE);
+ if (rc)
+ goto fail_sq;
+
+ rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq),
+ GFP_KERNEL);
+ if (!rq->swq) {
+ rc = -ENOMEM;
+ goto fail_rq;
+ }
+ pbl = &rq->hwq.pbl[PBL_LVL_0];
+ req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+ req.rq_pg_size_rq_lvl =
+ ((rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK) <<
+ CMDQ_CREATE_QP1_RQ_LVL_SFT) |
+ (pbl->pg_size == ROCE_PG_SIZE_4K ?
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K :
+ pbl->pg_size == ROCE_PG_SIZE_8K ?
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8K :
+ pbl->pg_size == ROCE_PG_SIZE_64K ?
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_64K :
+ pbl->pg_size == ROCE_PG_SIZE_2M ?
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_2M :
+ pbl->pg_size == ROCE_PG_SIZE_8M ?
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8M :
+ pbl->pg_size == ROCE_PG_SIZE_1G ?
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_1G :
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K);
+ if (qp->rcq)
+ req.rcq_cid = cpu_to_le32(qp->rcq->id);
+ }
+
+ /* Header buffer - allow hdr_buf pass in */
+ rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp);
+ if (rc) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ req.qp_flags = cpu_to_le32(qp_flags);
+ req.sq_size = cpu_to_le32(sq->hwq.max_elements);
+ req.rq_size = cpu_to_le32(rq->hwq.max_elements);
+
+ req.sq_fwo_sq_sge =
+ cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) <<
+ CMDQ_CREATE_QP1_SQ_SGE_SFT);
+ req.rq_fwo_rq_sge =
+ cpu_to_le16((rq->max_sge & CMDQ_CREATE_QP1_RQ_SGE_MASK) <<
+ CMDQ_CREATE_QP1_RQ_SGE_SFT);
+
+ req.pd_id = cpu_to_le32(qp->pd->id);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ goto fail;
+
+ qp->id = le32_to_cpu(resp.xid);
+ qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
+ sq->flush_in_progress = false;
+ rq->flush_in_progress = false;
+
+ return 0;
+
+fail:
+ bnxt_qplib_free_qp_hdr_buf(res, qp);
+fail_rq:
+ bnxt_qplib_free_hwq(res->pdev, &rq->hwq);
+ kfree(rq->swq);
+fail_sq:
+ bnxt_qplib_free_hwq(res->pdev, &sq->hwq);
+ kfree(sq->swq);
+exit:
+ return rc;
+}
+
+int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
+ struct cmdq_create_qp req;
+ struct creq_create_qp_resp resp;
+ struct bnxt_qplib_pbl *pbl;
+ struct sq_psn_search **psn_search_ptr;
+ unsigned long int psn_search, poff = 0;
+ struct bnxt_qplib_q *sq = &qp->sq;
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct bnxt_qplib_hwq *xrrq;
+ int i, rc, req_size, psn_sz;
+ u16 cmd_flags = 0, max_ssge;
+ u32 sw_prod, qp_flags = 0;
+
+ RCFW_CMD_PREP(req, CREATE_QP, cmd_flags);
+
+ /* General */
+ req.type = qp->type;
+ req.dpi = cpu_to_le32(qp->dpi->dpi);
+ req.qp_handle = cpu_to_le64(qp->qp_handle);
+
+ /* SQ */
+ psn_sz = (qp->type == CMDQ_CREATE_QP_TYPE_RC) ?
+ sizeof(struct sq_psn_search) : 0;
+ sq->hwq.max_elements = sq->max_wqe;
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, sq->sglist,
+ sq->nmap, &sq->hwq.max_elements,
+ BNXT_QPLIB_MAX_SQE_ENTRY_SIZE,
+ psn_sz,
+ PAGE_SIZE, HWQ_TYPE_QUEUE);
+ if (rc)
+ goto exit;
+
+ sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL);
+ if (!sq->swq) {
+ rc = -ENOMEM;
+ goto fail_sq;
+ }
+ hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr;
+ if (psn_sz) {
+ psn_search_ptr = (struct sq_psn_search **)
+ &hw_sq_send_ptr[get_sqe_pg
+ (sq->hwq.max_elements)];
+ psn_search = (unsigned long int)
+ &hw_sq_send_ptr[get_sqe_pg(sq->hwq.max_elements)]
+ [get_sqe_idx(sq->hwq.max_elements)];
+ if (psn_search & ~PAGE_MASK) {
+ /* If the psn_search does not start on a page boundary,
+ * then calculate the offset
+ */
+ poff = (psn_search & ~PAGE_MASK) /
+ BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE;
+ }
+ for (i = 0; i < sq->hwq.max_elements; i++)
+ sq->swq[i].psn_search =
+ &psn_search_ptr[get_psne_pg(i + poff)]
+ [get_psne_idx(i + poff)];
+ }
+ pbl = &sq->hwq.pbl[PBL_LVL_0];
+ req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+ req.sq_pg_size_sq_lvl =
+ ((sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK)
+ << CMDQ_CREATE_QP_SQ_LVL_SFT) |
+ (pbl->pg_size == ROCE_PG_SIZE_4K ?
+ CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K :
+ pbl->pg_size == ROCE_PG_SIZE_8K ?
+ CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8K :
+ pbl->pg_size == ROCE_PG_SIZE_64K ?
+ CMDQ_CREATE_QP_SQ_PG_SIZE_PG_64K :
+ pbl->pg_size == ROCE_PG_SIZE_2M ?
+ CMDQ_CREATE_QP_SQ_PG_SIZE_PG_2M :
+ pbl->pg_size == ROCE_PG_SIZE_8M ?
+ CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8M :
+ pbl->pg_size == ROCE_PG_SIZE_1G ?
+ CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G :
+ CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K);
+
+ /* initialize all SQ WQEs to LOCAL_INVALID (sq prep for hw fetch) */
+ hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr;
+ for (sw_prod = 0; sw_prod < sq->hwq.max_elements; sw_prod++) {
+ hw_sq_send_hdr = &hw_sq_send_ptr[get_sqe_pg(sw_prod)]
+ [get_sqe_idx(sw_prod)];
+ hw_sq_send_hdr->wqe_type = SQ_BASE_WQE_TYPE_LOCAL_INVALID;
+ }
+
+ if (qp->scq)
+ req.scq_cid = cpu_to_le32(qp->scq->id);
+
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE;
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED;
+ if (qp->sig_type)
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION;
+
+ /* RQ */
+ if (rq->max_wqe) {
+ rq->hwq.max_elements = rq->max_wqe;
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, rq->sglist,
+ rq->nmap, &rq->hwq.max_elements,
+ BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_QUEUE);
+ if (rc)
+ goto fail_sq;
+
+ rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq),
+ GFP_KERNEL);
+ if (!rq->swq) {
+ rc = -ENOMEM;
+ goto fail_rq;
+ }
+ pbl = &rq->hwq.pbl[PBL_LVL_0];
+ req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+ req.rq_pg_size_rq_lvl =
+ ((rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK) <<
+ CMDQ_CREATE_QP_RQ_LVL_SFT) |
+ (pbl->pg_size == ROCE_PG_SIZE_4K ?
+ CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K :
+ pbl->pg_size == ROCE_PG_SIZE_8K ?
+ CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8K :
+ pbl->pg_size == ROCE_PG_SIZE_64K ?
+ CMDQ_CREATE_QP_RQ_PG_SIZE_PG_64K :
+ pbl->pg_size == ROCE_PG_SIZE_2M ?
+ CMDQ_CREATE_QP_RQ_PG_SIZE_PG_2M :
+ pbl->pg_size == ROCE_PG_SIZE_8M ?
+ CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8M :
+ pbl->pg_size == ROCE_PG_SIZE_1G ?
+ CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G :
+ CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K);
+ }
+
+ if (qp->rcq)
+ req.rcq_cid = cpu_to_le32(qp->rcq->id);
+ req.qp_flags = cpu_to_le32(qp_flags);
+ req.sq_size = cpu_to_le32(sq->hwq.max_elements);
+ req.rq_size = cpu_to_le32(rq->hwq.max_elements);
+ qp->sq_hdr_buf = NULL;
+ qp->rq_hdr_buf = NULL;
+
+ rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp);
+ if (rc)
+ goto fail_rq;
+
+ /* CTRL-22434: Irrespective of the requested SGE count on the SQ
+ * always create the QP with max send sges possible if the requested
+ * inline size is greater than 0.
+ */
+ max_ssge = qp->max_inline_data ? 6 : sq->max_sge;
+ req.sq_fwo_sq_sge = cpu_to_le16(
+ ((max_ssge & CMDQ_CREATE_QP_SQ_SGE_MASK)
+ << CMDQ_CREATE_QP_SQ_SGE_SFT) | 0);
+ req.rq_fwo_rq_sge = cpu_to_le16(
+ ((rq->max_sge & CMDQ_CREATE_QP_RQ_SGE_MASK)
+ << CMDQ_CREATE_QP_RQ_SGE_SFT) | 0);
+ /* ORRQ and IRRQ */
+ if (psn_sz) {
+ xrrq = &qp->orrq;
+ xrrq->max_elements =
+ ORD_LIMIT_TO_ORRQ_SLOTS(qp->max_rd_atomic);
+ req_size = xrrq->max_elements *
+ BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE + PAGE_SIZE - 1;
+ req_size &= ~(PAGE_SIZE - 1);
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, 0,
+ &xrrq->max_elements,
+ BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE,
+ 0, req_size, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail_buf_free;
+ pbl = &xrrq->pbl[PBL_LVL_0];
+ req.orrq_addr = cpu_to_le64(pbl->pg_map_arr[0]);
+
+ xrrq = &qp->irrq;
+ xrrq->max_elements = IRD_LIMIT_TO_IRRQ_SLOTS(
+ qp->max_dest_rd_atomic);
+ req_size = xrrq->max_elements *
+ BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE + PAGE_SIZE - 1;
+ req_size &= ~(PAGE_SIZE - 1);
+
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, 0,
+ &xrrq->max_elements,
+ BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE,
+ 0, req_size, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail_orrq;
+
+ pbl = &xrrq->pbl[PBL_LVL_0];
+ req.irrq_addr = cpu_to_le64(pbl->pg_map_arr[0]);
+ }
+ req.pd_id = cpu_to_le32(qp->pd->id);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ goto fail;
+
+ qp->id = le32_to_cpu(resp.xid);
+ qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
+ sq->flush_in_progress = false;
+ rq->flush_in_progress = false;
+
+ return 0;
+
+fail:
+ if (qp->irrq.max_elements)
+ bnxt_qplib_free_hwq(res->pdev, &qp->irrq);
+fail_orrq:
+ if (qp->orrq.max_elements)
+ bnxt_qplib_free_hwq(res->pdev, &qp->orrq);
+fail_buf_free:
+ bnxt_qplib_free_qp_hdr_buf(res, qp);
+fail_rq:
+ bnxt_qplib_free_hwq(res->pdev, &rq->hwq);
+ kfree(rq->swq);
+fail_sq:
+ bnxt_qplib_free_hwq(res->pdev, &sq->hwq);
+ kfree(sq->swq);
+exit:
+ return rc;
+}
+
+static void __modify_flags_from_init_state(struct bnxt_qplib_qp *qp)
+{
+ switch (qp->state) {
+ case CMDQ_MODIFY_QP_NEW_STATE_RTR:
+ /* INIT->RTR, configure the path_mtu to the default
+ * 2048 if not being requested
+ */
+ if (!(qp->modify_flags &
+ CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU)) {
+ qp->modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
+ qp->path_mtu =
+ CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
+ }
+ qp->modify_flags &=
+ ~CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID;
+ /* Bono FW require the max_dest_rd_atomic to be >= 1 */
+ if (qp->max_dest_rd_atomic < 1)
+ qp->max_dest_rd_atomic = 1;
+ qp->modify_flags &= ~CMDQ_MODIFY_QP_MODIFY_MASK_SRC_MAC;
+ /* Bono FW 20.6.5 requires SGID_INDEX configuration */
+ if (!(qp->modify_flags &
+ CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX)) {
+ qp->modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX;
+ qp->ah.sgid_index = 0;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static void __modify_flags_from_rtr_state(struct bnxt_qplib_qp *qp)
+{
+ switch (qp->state) {
+ case CMDQ_MODIFY_QP_NEW_STATE_RTS:
+ /* Bono FW requires the max_rd_atomic to be >= 1 */
+ if (qp->max_rd_atomic < 1)
+ qp->max_rd_atomic = 1;
+ /* Bono FW does not allow PKEY_INDEX,
+ * DGID, FLOW_LABEL, SGID_INDEX, HOP_LIMIT,
+ * TRAFFIC_CLASS, DEST_MAC, PATH_MTU, RQ_PSN,
+ * MIN_RNR_TIMER, MAX_DEST_RD_ATOMIC, DEST_QP_ID
+ * modification
+ */
+ qp->modify_flags &=
+ ~(CMDQ_MODIFY_QP_MODIFY_MASK_PKEY |
+ CMDQ_MODIFY_QP_MODIFY_MASK_DGID |
+ CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL |
+ CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX |
+ CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT |
+ CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS |
+ CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC |
+ CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU |
+ CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN |
+ CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER |
+ CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC |
+ CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID);
+ break;
+ default:
+ break;
+ }
+}
+
+static void __filter_modify_flags(struct bnxt_qplib_qp *qp)
+{
+ switch (qp->cur_qp_state) {
+ case CMDQ_MODIFY_QP_NEW_STATE_RESET:
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_INIT:
+ __modify_flags_from_init_state(qp);
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_RTR:
+ __modify_flags_from_rtr_state(qp);
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_RTS:
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_SQD:
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_SQE:
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_ERR:
+ break;
+ default:
+ break;
+ }
+}
+
+int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_modify_qp req;
+ struct creq_modify_qp_resp resp;
+ u16 cmd_flags = 0, pkey;
+ u32 temp32[4];
+ u32 bmask;
+ int rc;
+
+ RCFW_CMD_PREP(req, MODIFY_QP, cmd_flags);
+
+ /* Filter out the qp_attr_mask based on the state->new transition */
+ __filter_modify_flags(qp);
+ bmask = qp->modify_flags;
+ req.modify_mask = cpu_to_le32(qp->modify_flags);
+ req.qp_cid = cpu_to_le32(qp->id);
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_STATE) {
+ req.network_type_en_sqd_async_notify_new_state =
+ (qp->state & CMDQ_MODIFY_QP_NEW_STATE_MASK) |
+ (qp->en_sqd_async_notify ?
+ CMDQ_MODIFY_QP_EN_SQD_ASYNC_NOTIFY : 0);
+ }
+ req.network_type_en_sqd_async_notify_new_state |= qp->nw_type;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS)
+ req.access = qp->access;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PKEY) {
+ if (!bnxt_qplib_get_pkey(res, &res->pkey_tbl,
+ qp->pkey_index, &pkey))
+ req.pkey = cpu_to_le16(pkey);
+ }
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_QKEY)
+ req.qkey = cpu_to_le32(qp->qkey);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DGID) {
+ memcpy(temp32, qp->ah.dgid.data, sizeof(struct bnxt_qplib_gid));
+ req.dgid[0] = cpu_to_le32(temp32[0]);
+ req.dgid[1] = cpu_to_le32(temp32[1]);
+ req.dgid[2] = cpu_to_le32(temp32[2]);
+ req.dgid[3] = cpu_to_le32(temp32[3]);
+ }
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL)
+ req.flow_label = cpu_to_le32(qp->ah.flow_label);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX)
+ req.sgid_index = cpu_to_le16(res->sgid_tbl.hw_id
+ [qp->ah.sgid_index]);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT)
+ req.hop_limit = qp->ah.hop_limit;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS)
+ req.traffic_class = qp->ah.traffic_class;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC)
+ memcpy(req.dest_mac, qp->ah.dmac, 6);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU)
+ req.path_mtu = qp->path_mtu;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT)
+ req.timeout = qp->timeout;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_RETRY_CNT)
+ req.retry_cnt = qp->retry_cnt;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_RNR_RETRY)
+ req.rnr_retry = qp->rnr_retry;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER)
+ req.min_rnr_timer = qp->min_rnr_timer;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN)
+ req.rq_psn = cpu_to_le32(qp->rq.psn);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN)
+ req.sq_psn = cpu_to_le32(qp->sq.psn);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC)
+ req.max_rd_atomic =
+ ORD_LIMIT_TO_ORRQ_SLOTS(qp->max_rd_atomic);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC)
+ req.max_dest_rd_atomic =
+ IRD_LIMIT_TO_IRRQ_SLOTS(qp->max_dest_rd_atomic);
+
+ req.sq_size = cpu_to_le32(qp->sq.hwq.max_elements);
+ req.rq_size = cpu_to_le32(qp->rq.hwq.max_elements);
+ req.sq_sge = cpu_to_le16(qp->sq.max_sge);
+ req.rq_sge = cpu_to_le16(qp->rq.max_sge);
+ req.max_inline_data = cpu_to_le32(qp->max_inline_data);
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID)
+ req.dest_qp_id = cpu_to_le32(qp->dest_qpn);
+
+ req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(qp->vlan_id);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ return rc;
+ qp->cur_qp_state = qp->state;
+ return 0;
+}
+
+int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_query_qp req;
+ struct creq_query_qp_resp resp;
+ struct bnxt_qplib_rcfw_sbuf *sbuf;
+ struct creq_query_qp_resp_sb *sb;
+ u16 cmd_flags = 0;
+ u32 temp32[4];
+ int i, rc = 0;
+
+ RCFW_CMD_PREP(req, QUERY_QP, cmd_flags);
+
+ sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+ if (!sbuf)
+ return -ENOMEM;
+ sb = sbuf->sb;
+
+ req.qp_cid = cpu_to_le32(qp->id);
+ req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ (void *)sbuf, 0);
+ if (rc)
+ goto bail;
+ /* Extract the context from the side buffer */
+ qp->state = sb->en_sqd_async_notify_state &
+ CREQ_QUERY_QP_RESP_SB_STATE_MASK;
+ qp->en_sqd_async_notify = sb->en_sqd_async_notify_state &
+ CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY ?
+ true : false;
+ qp->access = sb->access;
+ qp->pkey_index = le16_to_cpu(sb->pkey);
+ qp->qkey = le32_to_cpu(sb->qkey);
+
+ temp32[0] = le32_to_cpu(sb->dgid[0]);
+ temp32[1] = le32_to_cpu(sb->dgid[1]);
+ temp32[2] = le32_to_cpu(sb->dgid[2]);
+ temp32[3] = le32_to_cpu(sb->dgid[3]);
+ memcpy(qp->ah.dgid.data, temp32, sizeof(qp->ah.dgid.data));
+
+ qp->ah.flow_label = le32_to_cpu(sb->flow_label);
+
+ qp->ah.sgid_index = 0;
+ for (i = 0; i < res->sgid_tbl.max; i++) {
+ if (res->sgid_tbl.hw_id[i] == le16_to_cpu(sb->sgid_index)) {
+ qp->ah.sgid_index = i;
+ break;
+ }
+ }
+ if (i == res->sgid_tbl.max)
+ dev_warn(&res->pdev->dev, "QPLIB: SGID not found??");
+
+ qp->ah.hop_limit = sb->hop_limit;
+ qp->ah.traffic_class = sb->traffic_class;
+ memcpy(qp->ah.dmac, sb->dest_mac, 6);
+ qp->ah.vlan_id = (le16_to_cpu(sb->path_mtu_dest_vlan_id) &
+ CREQ_QUERY_QP_RESP_SB_VLAN_ID_MASK) >>
+ CREQ_QUERY_QP_RESP_SB_VLAN_ID_SFT;
+ qp->path_mtu = (le16_to_cpu(sb->path_mtu_dest_vlan_id) &
+ CREQ_QUERY_QP_RESP_SB_PATH_MTU_MASK) >>
+ CREQ_QUERY_QP_RESP_SB_PATH_MTU_SFT;
+ qp->timeout = sb->timeout;
+ qp->retry_cnt = sb->retry_cnt;
+ qp->rnr_retry = sb->rnr_retry;
+ qp->min_rnr_timer = sb->min_rnr_timer;
+ qp->rq.psn = le32_to_cpu(sb->rq_psn);
+ qp->max_rd_atomic = ORRQ_SLOTS_TO_ORD_LIMIT(sb->max_rd_atomic);
+ qp->sq.psn = le32_to_cpu(sb->sq_psn);
+ qp->max_dest_rd_atomic =
+ IRRQ_SLOTS_TO_IRD_LIMIT(sb->max_dest_rd_atomic);
+ qp->sq.max_wqe = qp->sq.hwq.max_elements;
+ qp->rq.max_wqe = qp->rq.hwq.max_elements;
+ qp->sq.max_sge = le16_to_cpu(sb->sq_sge);
+ qp->rq.max_sge = le16_to_cpu(sb->rq_sge);
+ qp->max_inline_data = le32_to_cpu(sb->max_inline_data);
+ qp->dest_qpn = le32_to_cpu(sb->dest_qp_id);
+ memcpy(qp->smac, sb->src_mac, 6);
+ qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id);
+bail:
+ bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+ return rc;
+}
+
+static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
+{
+ struct bnxt_qplib_hwq *cq_hwq = &cq->hwq;
+ struct cq_base *hw_cqe, **hw_cqe_ptr;
+ int i;
+
+ for (i = 0; i < cq_hwq->max_elements; i++) {
+ hw_cqe_ptr = (struct cq_base **)cq_hwq->pbl_ptr;
+ hw_cqe = &hw_cqe_ptr[CQE_PG(i)][CQE_IDX(i)];
+ if (!CQE_CMP_VALID(hw_cqe, i, cq_hwq->max_elements))
+ continue;
+ switch (hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) {
+ case CQ_BASE_CQE_TYPE_REQ:
+ case CQ_BASE_CQE_TYPE_TERMINAL:
+ {
+ struct cq_req *cqe = (struct cq_req *)hw_cqe;
+
+ if (qp == le64_to_cpu(cqe->qp_handle))
+ cqe->qp_handle = 0;
+ break;
+ }
+ case CQ_BASE_CQE_TYPE_RES_RC:
+ case CQ_BASE_CQE_TYPE_RES_UD:
+ case CQ_BASE_CQE_TYPE_RES_RAWETH_QP1:
+ {
+ struct cq_res_rc *cqe = (struct cq_res_rc *)hw_cqe;
+
+ if (qp == le64_to_cpu(cqe->qp_handle))
+ cqe->qp_handle = 0;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_destroy_qp req;
+ struct creq_destroy_qp_resp resp;
+ unsigned long flags;
+ u16 cmd_flags = 0;
+ int rc;
+
+ RCFW_CMD_PREP(req, DESTROY_QP, cmd_flags);
+
+ req.qp_cid = cpu_to_le32(qp->id);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ return rc;
+
+ /* Must walk the associated CQs to nullified the QP ptr */
+ spin_lock_irqsave(&qp->scq->hwq.lock, flags);
+
+ __clean_cq(qp->scq, (u64)(unsigned long)qp);
+
+ if (qp->rcq && qp->rcq != qp->scq) {
+ spin_lock(&qp->rcq->hwq.lock);
+ __clean_cq(qp->rcq, (u64)(unsigned long)qp);
+ spin_unlock(&qp->rcq->hwq.lock);
+ }
+
+ spin_unlock_irqrestore(&qp->scq->hwq.lock, flags);
+
+ bnxt_qplib_free_qp_hdr_buf(res, qp);
+ bnxt_qplib_free_hwq(res->pdev, &qp->sq.hwq);
+ kfree(qp->sq.swq);
+
+ bnxt_qplib_free_hwq(res->pdev, &qp->rq.hwq);
+ kfree(qp->rq.swq);
+
+ if (qp->irrq.max_elements)
+ bnxt_qplib_free_hwq(res->pdev, &qp->irrq);
+ if (qp->orrq.max_elements)
+ bnxt_qplib_free_hwq(res->pdev, &qp->orrq);
+
+ return 0;
+}
+
+void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_sge *sge)
+{
+ struct bnxt_qplib_q *sq = &qp->sq;
+ u32 sw_prod;
+
+ memset(sge, 0, sizeof(*sge));
+
+ if (qp->sq_hdr_buf) {
+ sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+ sge->addr = (dma_addr_t)(qp->sq_hdr_buf_map +
+ sw_prod * qp->sq_hdr_buf_size);
+ sge->lkey = 0xFFFFFFFF;
+ sge->size = qp->sq_hdr_buf_size;
+ return qp->sq_hdr_buf + sw_prod * sge->size;
+ }
+ return NULL;
+}
+
+u32 bnxt_qplib_get_rq_prod_index(struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+
+ return HWQ_CMP(rq->hwq.prod, &rq->hwq);
+}
+
+dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp, u32 index)
+{
+ return (qp->rq_hdr_buf_map + index * qp->rq_hdr_buf_size);
+}
+
+void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_sge *sge)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+ u32 sw_prod;
+
+ memset(sge, 0, sizeof(*sge));
+
+ if (qp->rq_hdr_buf) {
+ sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ sge->addr = (dma_addr_t)(qp->rq_hdr_buf_map +
+ sw_prod * qp->rq_hdr_buf_size);
+ sge->lkey = 0xFFFFFFFF;
+ sge->size = qp->rq_hdr_buf_size;
+ return qp->rq_hdr_buf + sw_prod * sge->size;
+ }
+ return NULL;
+}
+
+void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_q *sq = &qp->sq;
+ struct dbr_dbr db_msg = { 0 };
+ u32 sw_prod;
+
+ sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+
+ db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) &
+ DBR_DBR_INDEX_MASK);
+ db_msg.type_xid =
+ cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
+ DBR_DBR_TYPE_SQ);
+ /* Flush all the WQE writes to HW */
+ wmb();
+ __iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64));
+}
+
+int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe)
+{
+ struct bnxt_qplib_q *sq = &qp->sq;
+ struct bnxt_qplib_swq *swq;
+ struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
+ struct sq_sge *hw_sge;
+ u32 sw_prod;
+ u8 wqe_size16;
+ int i, rc = 0, data_len = 0, pkt_num = 0;
+ __le32 temp32;
+
+ if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) {
+ rc = -EINVAL;
+ goto done;
+ }
+
+ if (bnxt_qplib_queue_full(sq)) {
+ dev_err(&sq->hwq.pdev->dev,
+ "QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x",
+ sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
+ sq->q_full_delta);
+ rc = -ENOMEM;
+ goto done;
+ }
+ sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+ swq = &sq->swq[sw_prod];
+ swq->wr_id = wqe->wr_id;
+ swq->type = wqe->type;
+ swq->flags = wqe->flags;
+ if (qp->sig_type)
+ swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP;
+ swq->start_psn = sq->psn & BTH_PSN_MASK;
+
+ hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr;
+ hw_sq_send_hdr = &hw_sq_send_ptr[get_sqe_pg(sw_prod)]
+ [get_sqe_idx(sw_prod)];
+
+ memset(hw_sq_send_hdr, 0, BNXT_QPLIB_MAX_SQE_ENTRY_SIZE);
+
+ if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) {
+ /* Copy the inline data */
+ if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
+ dev_warn(&sq->hwq.pdev->dev,
+ "QPLIB: Inline data length > 96 detected");
+ data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH;
+ } else {
+ data_len = wqe->inline_len;
+ }
+ memcpy(hw_sq_send_hdr->data, wqe->inline_data, data_len);
+ wqe_size16 = (data_len + 15) >> 4;
+ } else {
+ for (i = 0, hw_sge = (struct sq_sge *)hw_sq_send_hdr->data;
+ i < wqe->num_sge; i++, hw_sge++) {
+ hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr);
+ hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey);
+ hw_sge->size = cpu_to_le32(wqe->sg_list[i].size);
+ data_len += wqe->sg_list[i].size;
+ }
+ /* Each SGE entry = 1 WQE size16 */
+ wqe_size16 = wqe->num_sge;
+ /* HW requires wqe size has room for atleast one SGE even if
+ * none was supplied by ULP
+ */
+ if (!wqe->num_sge)
+ wqe_size16++;
+ }
+
+ /* Specifics */
+ switch (wqe->type) {
+ case BNXT_QPLIB_SWQE_TYPE_SEND:
+ if (qp->type == CMDQ_CREATE_QP1_TYPE_GSI) {
+ /* Assemble info for Raw Ethertype QPs */
+ struct sq_send_raweth_qp1 *sqe =
+ (struct sq_send_raweth_qp1 *)hw_sq_send_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->wqe_size = wqe_size16 +
+ ((offsetof(typeof(*sqe), data) + 15) >> 4);
+ sqe->cfa_action = cpu_to_le16(wqe->rawqp1.cfa_action);
+ sqe->lflags = cpu_to_le16(wqe->rawqp1.lflags);
+ sqe->length = cpu_to_le32(data_len);
+ sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta &
+ SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK) <<
+ SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT);
+
+ break;
+ }
+ /* else, just fall thru */
+ case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM:
+ case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV:
+ {
+ struct sq_send *sqe = (struct sq_send *)hw_sq_send_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->wqe_size = wqe_size16 +
+ ((offsetof(typeof(*sqe), data) + 15) >> 4);
+ sqe->inv_key_or_imm_data = cpu_to_le32(
+ wqe->send.inv_key);
+ if (qp->type == CMDQ_CREATE_QP_TYPE_UD) {
+ sqe->q_key = cpu_to_le32(wqe->send.q_key);
+ sqe->dst_qp = cpu_to_le32(
+ wqe->send.dst_qp & SQ_SEND_DST_QP_MASK);
+ sqe->length = cpu_to_le32(data_len);
+ sqe->avid = cpu_to_le32(wqe->send.avid &
+ SQ_SEND_AVID_MASK);
+ sq->psn = (sq->psn + 1) & BTH_PSN_MASK;
+ } else {
+ sqe->length = cpu_to_le32(data_len);
+ sqe->dst_qp = 0;
+ sqe->avid = 0;
+ if (qp->mtu)
+ pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
+ if (!pkt_num)
+ pkt_num = 1;
+ sq->psn = (sq->psn + pkt_num) & BTH_PSN_MASK;
+ }
+ break;
+ }
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE:
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM:
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_READ:
+ {
+ struct sq_rdma *sqe = (struct sq_rdma *)hw_sq_send_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->wqe_size = wqe_size16 +
+ ((offsetof(typeof(*sqe), data) + 15) >> 4);
+ sqe->imm_data = cpu_to_le32(wqe->rdma.inv_key);
+ sqe->length = cpu_to_le32((u32)data_len);
+ sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va);
+ sqe->remote_key = cpu_to_le32(wqe->rdma.r_key);
+ if (qp->mtu)
+ pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
+ if (!pkt_num)
+ pkt_num = 1;
+ sq->psn = (sq->psn + pkt_num) & BTH_PSN_MASK;
+ break;
+ }
+ case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP:
+ case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD:
+ {
+ struct sq_atomic *sqe = (struct sq_atomic *)hw_sq_send_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->remote_key = cpu_to_le32(wqe->atomic.r_key);
+ sqe->remote_va = cpu_to_le64(wqe->atomic.remote_va);
+ sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data);
+ sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data);
+ if (qp->mtu)
+ pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
+ if (!pkt_num)
+ pkt_num = 1;
+ sq->psn = (sq->psn + pkt_num) & BTH_PSN_MASK;
+ break;
+ }
+ case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV:
+ {
+ struct sq_localinvalidate *sqe =
+ (struct sq_localinvalidate *)hw_sq_send_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->inv_l_key = cpu_to_le32(wqe->local_inv.inv_l_key);
+
+ break;
+ }
+ case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR:
+ {
+ struct sq_fr_pmr *sqe = (struct sq_fr_pmr *)hw_sq_send_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->access_cntl = wqe->frmr.access_cntl |
+ SQ_FR_PMR_ACCESS_CNTL_LOCAL_WRITE;
+ sqe->zero_based_page_size_log =
+ (wqe->frmr.pg_sz_log & SQ_FR_PMR_PAGE_SIZE_LOG_MASK) <<
+ SQ_FR_PMR_PAGE_SIZE_LOG_SFT |
+ (wqe->frmr.zero_based ? SQ_FR_PMR_ZERO_BASED : 0);
+ sqe->l_key = cpu_to_le32(wqe->frmr.l_key);
+ temp32 = cpu_to_le32(wqe->frmr.length);
+ memcpy(sqe->length, &temp32, sizeof(wqe->frmr.length));
+ sqe->numlevels_pbl_page_size_log =
+ ((wqe->frmr.pbl_pg_sz_log <<
+ SQ_FR_PMR_PBL_PAGE_SIZE_LOG_SFT) &
+ SQ_FR_PMR_PBL_PAGE_SIZE_LOG_MASK) |
+ ((wqe->frmr.levels << SQ_FR_PMR_NUMLEVELS_SFT) &
+ SQ_FR_PMR_NUMLEVELS_MASK);
+
+ for (i = 0; i < wqe->frmr.page_list_len; i++)
+ wqe->frmr.pbl_ptr[i] = cpu_to_le64(
+ wqe->frmr.page_list[i] |
+ PTU_PTE_VALID);
+ sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr);
+ sqe->va = cpu_to_le64(wqe->frmr.va);
+
+ break;
+ }
+ case BNXT_QPLIB_SWQE_TYPE_BIND_MW:
+ {
+ struct sq_bind *sqe = (struct sq_bind *)hw_sq_send_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->access_cntl = wqe->bind.access_cntl;
+ sqe->mw_type_zero_based = wqe->bind.mw_type |
+ (wqe->bind.zero_based ? SQ_BIND_ZERO_BASED : 0);
+ sqe->parent_l_key = cpu_to_le32(wqe->bind.parent_l_key);
+ sqe->l_key = cpu_to_le32(wqe->bind.r_key);
+ sqe->va = cpu_to_le64(wqe->bind.va);
+ temp32 = cpu_to_le32(wqe->bind.length);
+ memcpy(&sqe->length, &temp32, sizeof(wqe->bind.length));
+ break;
+ }
+ default:
+ /* Bad wqe, return error */
+ rc = -EINVAL;
+ goto done;
+ }
+ swq->next_psn = sq->psn & BTH_PSN_MASK;
+ if (swq->psn_search) {
+ swq->psn_search->opcode_start_psn = cpu_to_le32(
+ ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) &
+ SQ_PSN_SEARCH_START_PSN_MASK) |
+ ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) &
+ SQ_PSN_SEARCH_OPCODE_MASK));
+ swq->psn_search->flags_next_psn = cpu_to_le32(
+ ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) &
+ SQ_PSN_SEARCH_NEXT_PSN_MASK));
+ }
+
+ sq->hwq.prod++;
+
+ qp->wqe_cnt++;
+
+done:
+ return rc;
+}
+
+void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct dbr_dbr db_msg = { 0 };
+ u32 sw_prod;
+
+ sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) &
+ DBR_DBR_INDEX_MASK);
+ db_msg.type_xid =
+ cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
+ DBR_DBR_TYPE_RQ);
+
+ /* Flush the writes to HW Rx WQE before the ringing Rx DB */
+ wmb();
+ __iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64));
+}
+
+int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct rq_wqe *rqe, **rqe_ptr;
+ struct sq_sge *hw_sge;
+ u32 sw_prod;
+ int i, rc = 0;
+
+ if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ dev_err(&rq->hwq.pdev->dev,
+ "QPLIB: FP: QP (0x%x) is in the 0x%x state",
+ qp->id, qp->state);
+ rc = -EINVAL;
+ goto done;
+ }
+ if (bnxt_qplib_queue_full(rq)) {
+ dev_err(&rq->hwq.pdev->dev,
+ "QPLIB: FP: QP (0x%x) RQ is full!", qp->id);
+ rc = -EINVAL;
+ goto done;
+ }
+ sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ rq->swq[sw_prod].wr_id = wqe->wr_id;
+
+ rqe_ptr = (struct rq_wqe **)rq->hwq.pbl_ptr;
+ rqe = &rqe_ptr[RQE_PG(sw_prod)][RQE_IDX(sw_prod)];
+
+ memset(rqe, 0, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+
+ /* Calculate wqe_size16 and data_len */
+ for (i = 0, hw_sge = (struct sq_sge *)rqe->data;
+ i < wqe->num_sge; i++, hw_sge++) {
+ hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr);
+ hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey);
+ hw_sge->size = cpu_to_le32(wqe->sg_list[i].size);
+ }
+ rqe->wqe_type = wqe->type;
+ rqe->flags = wqe->flags;
+ rqe->wqe_size = wqe->num_sge +
+ ((offsetof(typeof(*rqe), data) + 15) >> 4);
+ /* HW requires wqe size has room for atleast one SGE even if none
+ * was supplied by ULP
+ */
+ if (!wqe->num_sge)
+ rqe->wqe_size++;
+
+ /* Supply the rqe->wr_id index to the wr_id_tbl for now */
+ rqe->wr_id[0] = cpu_to_le32(sw_prod);
+
+ rq->hwq.prod++;
+done:
+ return rc;
+}
+
+/* CQ */
+
+/* Spinlock must be held */
+static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq)
+{
+ struct dbr_dbr db_msg = { 0 };
+
+ db_msg.type_xid =
+ cpu_to_le32(((cq->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
+ DBR_DBR_TYPE_CQ_ARMENA);
+ /* Flush memory writes before enabling the CQ */
+ wmb();
+ __iowrite64_copy(cq->dbr_base, &db_msg, sizeof(db_msg) / sizeof(u64));
+}
+
+static void bnxt_qplib_arm_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
+{
+ struct bnxt_qplib_hwq *cq_hwq = &cq->hwq;
+ struct dbr_dbr db_msg = { 0 };
+ u32 sw_cons;
+
+ /* Ring DB */
+ sw_cons = HWQ_CMP(cq_hwq->cons, cq_hwq);
+ db_msg.index = cpu_to_le32((sw_cons << DBR_DBR_INDEX_SFT) &
+ DBR_DBR_INDEX_MASK);
+ db_msg.type_xid =
+ cpu_to_le32(((cq->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
+ arm_type);
+ /* flush memory writes before arming the CQ */
+ wmb();
+ __iowrite64_copy(cq->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64));
+}
+
+int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_create_cq req;
+ struct creq_create_cq_resp resp;
+ struct bnxt_qplib_pbl *pbl;
+ u16 cmd_flags = 0;
+ int rc;
+
+ cq->hwq.max_elements = cq->max_wqe;
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, &cq->hwq, cq->sghead,
+ cq->nmap, &cq->hwq.max_elements,
+ BNXT_QPLIB_MAX_CQE_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_QUEUE);
+ if (rc)
+ goto exit;
+
+ RCFW_CMD_PREP(req, CREATE_CQ, cmd_flags);
+
+ if (!cq->dpi) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: FP: CREATE_CQ failed due to NULL DPI");
+ return -EINVAL;
+ }
+ req.dpi = cpu_to_le32(cq->dpi->dpi);
+ req.cq_handle = cpu_to_le64(cq->cq_handle);
+
+ req.cq_size = cpu_to_le32(cq->hwq.max_elements);
+ pbl = &cq->hwq.pbl[PBL_LVL_0];
+ req.pg_size_lvl = cpu_to_le32(
+ ((cq->hwq.level & CMDQ_CREATE_CQ_LVL_MASK) <<
+ CMDQ_CREATE_CQ_LVL_SFT) |
+ (pbl->pg_size == ROCE_PG_SIZE_4K ? CMDQ_CREATE_CQ_PG_SIZE_PG_4K :
+ pbl->pg_size == ROCE_PG_SIZE_8K ? CMDQ_CREATE_CQ_PG_SIZE_PG_8K :
+ pbl->pg_size == ROCE_PG_SIZE_64K ? CMDQ_CREATE_CQ_PG_SIZE_PG_64K :
+ pbl->pg_size == ROCE_PG_SIZE_2M ? CMDQ_CREATE_CQ_PG_SIZE_PG_2M :
+ pbl->pg_size == ROCE_PG_SIZE_8M ? CMDQ_CREATE_CQ_PG_SIZE_PG_8M :
+ pbl->pg_size == ROCE_PG_SIZE_1G ? CMDQ_CREATE_CQ_PG_SIZE_PG_1G :
+ CMDQ_CREATE_CQ_PG_SIZE_PG_4K));
+
+ req.pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+
+ req.cq_fco_cnq_id = cpu_to_le32(
+ (cq->cnq_hw_ring_id & CMDQ_CREATE_CQ_CNQ_ID_MASK) <<
+ CMDQ_CREATE_CQ_CNQ_ID_SFT);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ goto fail;
+
+ cq->id = le32_to_cpu(resp.xid);
+ cq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
+ cq->period = BNXT_QPLIB_QUEUE_START_PERIOD;
+ init_waitqueue_head(&cq->waitq);
+
+ bnxt_qplib_arm_cq_enable(cq);
+ return 0;
+
+fail:
+ bnxt_qplib_free_hwq(res->pdev, &cq->hwq);
+exit:
+ return rc;
+}
+
+int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_destroy_cq req;
+ struct creq_destroy_cq_resp resp;
+ u16 cmd_flags = 0;
+ int rc;
+
+ RCFW_CMD_PREP(req, DESTROY_CQ, cmd_flags);
+
+ req.cq_cid = cpu_to_le32(cq->id);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ return rc;
+ bnxt_qplib_free_hwq(res->pdev, &cq->hwq);
+ return 0;
+}
+
+static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_cqe **pcqe, int *budget)
+{
+ u32 sw_prod, sw_cons;
+ struct bnxt_qplib_cqe *cqe;
+ int rc = 0;
+
+ /* Now complete all outstanding SQEs with FLUSHED_ERR */
+ sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+ cqe = *pcqe;
+ while (*budget) {
+ sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
+ if (sw_cons == sw_prod) {
+ sq->flush_in_progress = false;
+ break;
+ }
+ memset(cqe, 0, sizeof(*cqe));
+ cqe->status = CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR;
+ cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
+ cqe->qp_handle = (u64)(unsigned long)qp;
+ cqe->wr_id = sq->swq[sw_cons].wr_id;
+ cqe->src_qp = qp->id;
+ cqe->type = sq->swq[sw_cons].type;
+ cqe++;
+ (*budget)--;
+ sq->hwq.cons++;
+ }
+ *pcqe = cqe;
+ if (!(*budget) && HWQ_CMP(sq->hwq.cons, &sq->hwq) != sw_prod)
+ /* Out of budget */
+ rc = -EAGAIN;
+
+ return rc;
+}
+
+static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
+ int opcode, struct bnxt_qplib_cqe **pcqe, int *budget)
+{
+ struct bnxt_qplib_cqe *cqe;
+ u32 sw_prod, sw_cons;
+ int rc = 0;
+
+ /* Flush the rest of the RQ */
+ sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ cqe = *pcqe;
+ while (*budget) {
+ sw_cons = HWQ_CMP(rq->hwq.cons, &rq->hwq);
+ if (sw_cons == sw_prod)
+ break;
+ memset(cqe, 0, sizeof(*cqe));
+ cqe->status =
+ CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR;
+ cqe->opcode = opcode;
+ cqe->qp_handle = (unsigned long)qp;
+ cqe->wr_id = rq->swq[sw_cons].wr_id;
+ cqe++;
+ (*budget)--;
+ rq->hwq.cons++;
+ }
+ *pcqe = cqe;
+ if (!*budget && HWQ_CMP(rq->hwq.cons, &rq->hwq) != sw_prod)
+ /* Out of budget */
+ rc = -EAGAIN;
+
+ return rc;
+}
+
+/* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive)
+ * CQE is track from sw_cq_cons to max_element but valid only if VALID=1
+ */
+static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
+ u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons)
+{
+ struct bnxt_qplib_q *sq = &qp->sq;
+ struct bnxt_qplib_swq *swq;
+ u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx;
+ struct cq_base *peek_hwcqe, **peek_hw_cqe_ptr;
+ struct cq_req *peek_req_hwcqe;
+ struct bnxt_qplib_qp *peek_qp;
+ struct bnxt_qplib_q *peek_sq;
+ int i, rc = 0;
+
+ /* Normal mode */
+ /* Check for the psn_search marking before completing */
+ swq = &sq->swq[sw_sq_cons];
+ if (swq->psn_search &&
+ le32_to_cpu(swq->psn_search->flags_next_psn) & 0x80000000) {
+ /* Unmark */
+ swq->psn_search->flags_next_psn = cpu_to_le32
+ (le32_to_cpu(swq->psn_search->flags_next_psn)
+ & ~0x80000000);
+ dev_dbg(&cq->hwq.pdev->dev,
+ "FP: Process Req cq_cons=0x%x qp=0x%x sq cons sw=0x%x cqe=0x%x marked!\n",
+ cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+ sq->condition = true;
+ sq->send_phantom = true;
+
+ /* TODO: Only ARM if the previous SQE is ARMALL */
+ bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ_ARMALL);
+
+ rc = -EAGAIN;
+ goto out;
+ }
+ if (sq->condition) {
+ /* Peek at the completions */
+ peek_raw_cq_cons = cq->hwq.cons;
+ peek_sw_cq_cons = cq_cons;
+ i = cq->hwq.max_elements;
+ while (i--) {
+ peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq);
+ peek_hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
+ peek_hwcqe = &peek_hw_cqe_ptr[CQE_PG(peek_sw_cq_cons)]
+ [CQE_IDX(peek_sw_cq_cons)];
+ /* If the next hwcqe is VALID */
+ if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons,
+ cq->hwq.max_elements)) {
+ /* If the next hwcqe is a REQ */
+ if ((peek_hwcqe->cqe_type_toggle &
+ CQ_BASE_CQE_TYPE_MASK) ==
+ CQ_BASE_CQE_TYPE_REQ) {
+ peek_req_hwcqe = (struct cq_req *)
+ peek_hwcqe;
+ peek_qp = (struct bnxt_qplib_qp *)
+ ((unsigned long)
+ le64_to_cpu
+ (peek_req_hwcqe->qp_handle));
+ peek_sq = &peek_qp->sq;
+ peek_sq_cons_idx = HWQ_CMP(le16_to_cpu(
+ peek_req_hwcqe->sq_cons_idx) - 1
+ , &sq->hwq);
+ /* If the hwcqe's sq's wr_id matches */
+ if (peek_sq == sq &&
+ sq->swq[peek_sq_cons_idx].wr_id ==
+ BNXT_QPLIB_FENCE_WRID) {
+ /*
+ * Unbreak only if the phantom
+ * comes back
+ */
+ dev_dbg(&cq->hwq.pdev->dev,
+ "FP:Got Phantom CQE");
+ sq->condition = false;
+ sq->single = true;
+ rc = 0;
+ goto out;
+ }
+ }
+ /* Valid but not the phantom, so keep looping */
+ } else {
+ /* Not valid yet, just exit and wait */
+ rc = -EINVAL;
+ goto out;
+ }
+ peek_sw_cq_cons++;
+ peek_raw_cq_cons++;
+ }
+ dev_err(&cq->hwq.pdev->dev,
+ "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x",
+ cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+ rc = -EINVAL;
+ }
+out:
+ return rc;
+}
+
+static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
+ struct cq_req *hwcqe,
+ struct bnxt_qplib_cqe **pcqe, int *budget,
+ u32 cq_cons, struct bnxt_qplib_qp **lib_qp)
+{
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *sq;
+ struct bnxt_qplib_cqe *cqe;
+ u32 sw_sq_cons, cqe_sq_cons;
+ struct bnxt_qplib_swq *swq;
+ int rc = 0;
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+ if (!qp) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: Process Req qp is NULL");
+ return -EINVAL;
+ }
+ sq = &qp->sq;
+
+ cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
+ if (cqe_sq_cons > sq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process req reported ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
+ cqe_sq_cons, sq->hwq.max_elements);
+ return -EINVAL;
+ }
+ /* If we were in the middle of flushing the SQ, continue */
+ if (sq->flush_in_progress)
+ goto flush;
+
+ /* Require to walk the sq's swq to fabricate CQEs for all previously
+ * signaled SWQEs due to CQE aggregation from the current sq cons
+ * to the cqe_sq_cons
+ */
+ cqe = *pcqe;
+ while (*budget) {
+ sw_sq_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
+ if (sw_sq_cons == cqe_sq_cons)
+ /* Done */
+ break;
+
+ swq = &sq->swq[sw_sq_cons];
+ memset(cqe, 0, sizeof(*cqe));
+ cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
+ cqe->qp_handle = (u64)(unsigned long)qp;
+ cqe->src_qp = qp->id;
+ cqe->wr_id = swq->wr_id;
+ if (cqe->wr_id == BNXT_QPLIB_FENCE_WRID)
+ goto skip;
+ cqe->type = swq->type;
+
+ /* For the last CQE, check for status. For errors, regardless
+ * of the request being signaled or not, it must complete with
+ * the hwcqe error status
+ */
+ if (HWQ_CMP((sw_sq_cons + 1), &sq->hwq) == cqe_sq_cons &&
+ hwcqe->status != CQ_REQ_STATUS_OK) {
+ cqe->status = hwcqe->status;
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Processed Req ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: wr_id[%d] = 0x%llx with status 0x%x",
+ sw_sq_cons, cqe->wr_id, cqe->status);
+ cqe++;
+ (*budget)--;
+ sq->flush_in_progress = true;
+ /* Must block new posting of SQ and RQ */
+ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+ sq->condition = false;
+ sq->single = false;
+ } else {
+ if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
+ /* Before we complete, do WA 9060 */
+ if (do_wa9060(qp, cq, cq_cons, sw_sq_cons,
+ cqe_sq_cons)) {
+ *lib_qp = qp;
+ goto out;
+ }
+ cqe->status = CQ_REQ_STATUS_OK;
+ cqe++;
+ (*budget)--;
+ }
+ }
+skip:
+ sq->hwq.cons++;
+ if (sq->single)
+ break;
+ }
+out:
+ *pcqe = cqe;
+ if (HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_sq_cons) {
+ /* Out of budget */
+ rc = -EAGAIN;
+ goto done;
+ }
+ /*
+ * Back to normal completion mode only after it has completed all of
+ * the WC for this CQE
+ */
+ sq->single = false;
+ if (!sq->flush_in_progress)
+ goto done;
+flush:
+ /* Require to walk the sq's swq to fabricate CQEs for all
+ * previously posted SWQEs due to the error CQE received
+ */
+ rc = __flush_sq(sq, qp, pcqe, budget);
+ if (!rc)
+ sq->flush_in_progress = false;
+done:
+ return rc;
+}
+
+static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
+ struct cq_res_rc *hwcqe,
+ struct bnxt_qplib_cqe **pcqe,
+ int *budget)
+{
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *rq;
+ struct bnxt_qplib_cqe *cqe;
+ u32 wr_id_idx;
+ int rc = 0;
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+ if (!qp) {
+ dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq RC qp is NULL");
+ return -EINVAL;
+ }
+ cqe = *pcqe;
+ cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
+ cqe->length = le32_to_cpu(hwcqe->length);
+ cqe->invrkey = le32_to_cpu(hwcqe->imm_data_or_inv_r_key);
+ cqe->mr_handle = le64_to_cpu(hwcqe->mr_handle);
+ cqe->flags = le16_to_cpu(hwcqe->flags);
+ cqe->status = hwcqe->status;
+ cqe->qp_handle = (u64)(unsigned long)qp;
+
+ wr_id_idx = le32_to_cpu(hwcqe->srq_or_rq_wr_id) &
+ CQ_RES_RC_SRQ_OR_RQ_WR_ID_MASK;
+ rq = &qp->rq;
+ if (wr_id_idx > rq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process RC ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
+ wr_id_idx, rq->hwq.max_elements);
+ return -EINVAL;
+ }
+ if (rq->flush_in_progress)
+ goto flush_rq;
+
+ cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ cqe++;
+ (*budget)--;
+ rq->hwq.cons++;
+ *pcqe = cqe;
+
+ if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
+ rq->flush_in_progress = true;
+flush_rq:
+ rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_RC, pcqe, budget);
+ if (!rc)
+ rq->flush_in_progress = false;
+ }
+ return rc;
+}
+
+static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
+ struct cq_res_ud *hwcqe,
+ struct bnxt_qplib_cqe **pcqe,
+ int *budget)
+{
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *rq;
+ struct bnxt_qplib_cqe *cqe;
+ u32 wr_id_idx;
+ int rc = 0;
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+ if (!qp) {
+ dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq UD qp is NULL");
+ return -EINVAL;
+ }
+ cqe = *pcqe;
+ cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
+ cqe->length = le32_to_cpu(hwcqe->length);
+ cqe->invrkey = le32_to_cpu(hwcqe->imm_data);
+ cqe->flags = le16_to_cpu(hwcqe->flags);
+ cqe->status = hwcqe->status;
+ cqe->qp_handle = (u64)(unsigned long)qp;
+ memcpy(cqe->smac, hwcqe->src_mac, 6);
+ wr_id_idx = le32_to_cpu(hwcqe->src_qp_high_srq_or_rq_wr_id)
+ & CQ_RES_UD_SRQ_OR_RQ_WR_ID_MASK;
+ cqe->src_qp = le16_to_cpu(hwcqe->src_qp_low) |
+ ((le32_to_cpu(
+ hwcqe->src_qp_high_srq_or_rq_wr_id) &
+ CQ_RES_UD_SRC_QP_HIGH_MASK) >> 8);
+
+ rq = &qp->rq;
+ if (wr_id_idx > rq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process UD ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: wr_id idx %#x exceeded RQ max %#x",
+ wr_id_idx, rq->hwq.max_elements);
+ return -EINVAL;
+ }
+ if (rq->flush_in_progress)
+ goto flush_rq;
+
+ cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ cqe++;
+ (*budget)--;
+ rq->hwq.cons++;
+ *pcqe = cqe;
+
+ if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
+ rq->flush_in_progress = true;
+flush_rq:
+ rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_UD, pcqe, budget);
+ if (!rc)
+ rq->flush_in_progress = false;
+ }
+ return rc;
+}
+
+bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
+{
+ struct cq_base *hw_cqe, **hw_cqe_ptr;
+ unsigned long flags;
+ u32 sw_cons, raw_cons;
+ bool rc = true;
+
+ spin_lock_irqsave(&cq->hwq.lock, flags);
+ raw_cons = cq->hwq.cons;
+ sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
+ hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
+ hw_cqe = &hw_cqe_ptr[CQE_PG(sw_cons)][CQE_IDX(sw_cons)];
+
+ /* Check for Valid bit. If the CQE is valid, return false */
+ rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements);
+ spin_unlock_irqrestore(&cq->hwq.lock, flags);
+ return rc;
+}
+
+static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
+ struct cq_res_raweth_qp1 *hwcqe,
+ struct bnxt_qplib_cqe **pcqe,
+ int *budget)
+{
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *rq;
+ struct bnxt_qplib_cqe *cqe;
+ u32 wr_id_idx;
+ int rc = 0;
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+ if (!qp) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: process_cq Raw/QP1 qp is NULL");
+ return -EINVAL;
+ }
+ cqe = *pcqe;
+ cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
+ cqe->flags = le16_to_cpu(hwcqe->flags);
+ cqe->qp_handle = (u64)(unsigned long)qp;
+
+ wr_id_idx =
+ le32_to_cpu(hwcqe->raweth_qp1_payload_offset_srq_or_rq_wr_id)
+ & CQ_RES_RAWETH_QP1_SRQ_OR_RQ_WR_ID_MASK;
+ cqe->src_qp = qp->id;
+ if (qp->id == 1 && !cqe->length) {
+ /* Add workaround for the length misdetection */
+ cqe->length = 296;
+ } else {
+ cqe->length = le16_to_cpu(hwcqe->length);
+ }
+ cqe->pkey_index = qp->pkey_index;
+ memcpy(cqe->smac, qp->smac, 6);
+
+ cqe->raweth_qp1_flags = le16_to_cpu(hwcqe->raweth_qp1_flags);
+ cqe->raweth_qp1_flags2 = le32_to_cpu(hwcqe->raweth_qp1_flags2);
+
+ rq = &qp->rq;
+ if (wr_id_idx > rq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id ");
+ dev_err(&cq->hwq.pdev->dev, "QPLIB: ix 0x%x exceeded RQ max 0x%x",
+ wr_id_idx, rq->hwq.max_elements);
+ return -EINVAL;
+ }
+ if (rq->flush_in_progress)
+ goto flush_rq;
+
+ cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ cqe++;
+ (*budget)--;
+ rq->hwq.cons++;
+ *pcqe = cqe;
+
+ if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
+ rq->flush_in_progress = true;
+flush_rq:
+ rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_RAWETH_QP1, pcqe,
+ budget);
+ if (!rc)
+ rq->flush_in_progress = false;
+ }
+ return rc;
+}
+
+static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
+ struct cq_terminal *hwcqe,
+ struct bnxt_qplib_cqe **pcqe,
+ int *budget)
+{
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *sq, *rq;
+ struct bnxt_qplib_cqe *cqe;
+ u32 sw_cons = 0, cqe_cons;
+ int rc = 0;
+ u8 opcode = 0;
+
+ /* Check the Status */
+ if (hwcqe->status != CQ_TERMINAL_STATUS_OK)
+ dev_warn(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process Terminal Error status = 0x%x",
+ hwcqe->status);
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+ if (!qp) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process terminal qp is NULL");
+ return -EINVAL;
+ }
+ /* Must block new posting of SQ and RQ */
+ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+
+ sq = &qp->sq;
+ rq = &qp->rq;
+
+ cqe_cons = le16_to_cpu(hwcqe->sq_cons_idx);
+ if (cqe_cons == 0xFFFF)
+ goto do_rq;
+
+ if (cqe_cons > sq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process terminal reported ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
+ cqe_cons, sq->hwq.max_elements);
+ goto do_rq;
+ }
+ /* If we were in the middle of flushing, continue */
+ if (sq->flush_in_progress)
+ goto flush_sq;
+
+ /* Terminal CQE can also include aggregated successful CQEs prior.
+ * So we must complete all CQEs from the current sq's cons to the
+ * cq_cons with status OK
+ */
+ cqe = *pcqe;
+ while (*budget) {
+ sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
+ if (sw_cons == cqe_cons)
+ break;
+ if (sq->swq[sw_cons].flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
+ memset(cqe, 0, sizeof(*cqe));
+ cqe->status = CQ_REQ_STATUS_OK;
+ cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
+ cqe->qp_handle = (u64)(unsigned long)qp;
+ cqe->src_qp = qp->id;
+ cqe->wr_id = sq->swq[sw_cons].wr_id;
+ cqe->type = sq->swq[sw_cons].type;
+ cqe++;
+ (*budget)--;
+ }
+ sq->hwq.cons++;
+ }
+ *pcqe = cqe;
+ if (!(*budget) && sw_cons != cqe_cons) {
+ /* Out of budget */
+ rc = -EAGAIN;
+ goto sq_done;
+ }
+ sq->flush_in_progress = true;
+flush_sq:
+ rc = __flush_sq(sq, qp, pcqe, budget);
+ if (!rc)
+ sq->flush_in_progress = false;
+sq_done:
+ if (rc)
+ return rc;
+do_rq:
+ cqe_cons = le16_to_cpu(hwcqe->rq_cons_idx);
+ if (cqe_cons == 0xFFFF) {
+ goto done;
+ } else if (cqe_cons > rq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Processed terminal ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: reported rq_cons_idx 0x%x exceeds max 0x%x",
+ cqe_cons, rq->hwq.max_elements);
+ goto done;
+ }
+ /* Terminal CQE requires all posted RQEs to complete with FLUSHED_ERR
+ * from the current rq->cons to the rq->prod regardless what the
+ * rq->cons the terminal CQE indicates
+ */
+ rq->flush_in_progress = true;
+ switch (qp->type) {
+ case CMDQ_CREATE_QP1_TYPE_GSI:
+ opcode = CQ_BASE_CQE_TYPE_RES_RAWETH_QP1;
+ break;
+ case CMDQ_CREATE_QP_TYPE_RC:
+ opcode = CQ_BASE_CQE_TYPE_RES_RC;
+ break;
+ case CMDQ_CREATE_QP_TYPE_UD:
+ opcode = CQ_BASE_CQE_TYPE_RES_UD;
+ break;
+ }
+
+ rc = __flush_rq(rq, qp, opcode, pcqe, budget);
+ if (!rc)
+ rq->flush_in_progress = false;
+done:
+ return rc;
+}
+
+static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq,
+ struct cq_cutoff *hwcqe)
+{
+ /* Check the Status */
+ if (hwcqe->status != CQ_CUTOFF_STATUS_OK) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process Cutoff Error status = 0x%x",
+ hwcqe->status);
+ return -EINVAL;
+ }
+ clear_bit(CQ_FLAGS_RESIZE_IN_PROG, &cq->flags);
+ wake_up_interruptible(&cq->waitq);
+
+ return 0;
+}
+
+int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
+ int num_cqes, struct bnxt_qplib_qp **lib_qp)
+{
+ struct cq_base *hw_cqe, **hw_cqe_ptr;
+ unsigned long flags;
+ u32 sw_cons, raw_cons;
+ int budget, rc = 0;
+
+ spin_lock_irqsave(&cq->hwq.lock, flags);
+ raw_cons = cq->hwq.cons;
+ budget = num_cqes;
+
+ while (budget) {
+ sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
+ hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
+ hw_cqe = &hw_cqe_ptr[CQE_PG(sw_cons)][CQE_IDX(sw_cons)];
+
+ /* Check for Valid bit */
+ if (!CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements))
+ break;
+
+ /* From the device's respective CQE format to qplib_wc*/
+ switch (hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) {
+ case CQ_BASE_CQE_TYPE_REQ:
+ rc = bnxt_qplib_cq_process_req(cq,
+ (struct cq_req *)hw_cqe,
+ &cqe, &budget,
+ sw_cons, lib_qp);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_RC:
+ rc = bnxt_qplib_cq_process_res_rc(cq,
+ (struct cq_res_rc *)
+ hw_cqe, &cqe,
+ &budget);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_UD:
+ rc = bnxt_qplib_cq_process_res_ud
+ (cq, (struct cq_res_ud *)hw_cqe, &cqe,
+ &budget);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_RAWETH_QP1:
+ rc = bnxt_qplib_cq_process_res_raweth_qp1
+ (cq, (struct cq_res_raweth_qp1 *)
+ hw_cqe, &cqe, &budget);
+ break;
+ case CQ_BASE_CQE_TYPE_TERMINAL:
+ rc = bnxt_qplib_cq_process_terminal
+ (cq, (struct cq_terminal *)hw_cqe,
+ &cqe, &budget);
+ break;
+ case CQ_BASE_CQE_TYPE_CUT_OFF:
+ bnxt_qplib_cq_process_cutoff
+ (cq, (struct cq_cutoff *)hw_cqe);
+ /* Done processing this CQ */
+ goto exit;
+ default:
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: process_cq unknown type 0x%lx",
+ hw_cqe->cqe_type_toggle &
+ CQ_BASE_CQE_TYPE_MASK);
+ rc = -EINVAL;
+ break;
+ }
+ if (rc < 0) {
+ if (rc == -EAGAIN)
+ break;
+ /* Error while processing the CQE, just skip to the
+ * next one
+ */
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: process_cqe error rc = 0x%x", rc);
+ }
+ raw_cons++;
+ }
+ if (cq->hwq.cons != raw_cons) {
+ cq->hwq.cons = raw_cons;
+ bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ);
+ }
+exit:
+ spin_unlock_irqrestore(&cq->hwq.lock, flags);
+ return num_cqes - budget;
+}
+
+void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->hwq.lock, flags);
+ if (arm_type)
+ bnxt_qplib_arm_cq(cq, arm_type);
+
+ spin_unlock_irqrestore(&cq->hwq.lock, flags);
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
new file mode 100644
index 000000000000..19176e06c98a
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -0,0 +1,456 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Fast Path Operators (header)
+ */
+
+#ifndef __BNXT_QPLIB_FP_H__
+#define __BNXT_QPLIB_FP_H__
+
+struct bnxt_qplib_sge {
+ u64 addr;
+ u32 lkey;
+ u32 size;
+};
+
+#define BNXT_QPLIB_MAX_SQE_ENTRY_SIZE sizeof(struct sq_send)
+
+#define SQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_SQE_ENTRY_SIZE)
+#define SQE_MAX_IDX_PER_PG (SQE_CNT_PER_PG - 1)
+
+static inline u32 get_sqe_pg(u32 val)
+{
+ return ((val & ~SQE_MAX_IDX_PER_PG) / SQE_CNT_PER_PG);
+}
+
+static inline u32 get_sqe_idx(u32 val)
+{
+ return (val & SQE_MAX_IDX_PER_PG);
+}
+
+#define BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE sizeof(struct sq_psn_search)
+
+#define PSNE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE)
+#define PSNE_MAX_IDX_PER_PG (PSNE_CNT_PER_PG - 1)
+
+static inline u32 get_psne_pg(u32 val)
+{
+ return ((val & ~PSNE_MAX_IDX_PER_PG) / PSNE_CNT_PER_PG);
+}
+
+static inline u32 get_psne_idx(u32 val)
+{
+ return (val & PSNE_MAX_IDX_PER_PG);
+}
+
+#define BNXT_QPLIB_QP_MAX_SGL 6
+
+struct bnxt_qplib_swq {
+ u64 wr_id;
+ u8 type;
+ u8 flags;
+ u32 start_psn;
+ u32 next_psn;
+ struct sq_psn_search *psn_search;
+};
+
+struct bnxt_qplib_swqe {
+ /* General */
+#define BNXT_QPLIB_FENCE_WRID 0x46454E43 /* "FENC" */
+ u64 wr_id;
+ u8 reqs_type;
+ u8 type;
+#define BNXT_QPLIB_SWQE_TYPE_SEND 0
+#define BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM 1
+#define BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV 2
+#define BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE 4
+#define BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM 5
+#define BNXT_QPLIB_SWQE_TYPE_RDMA_READ 6
+#define BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP 8
+#define BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD 11
+#define BNXT_QPLIB_SWQE_TYPE_LOCAL_INV 12
+#define BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR 13
+#define BNXT_QPLIB_SWQE_TYPE_REG_MR 13
+#define BNXT_QPLIB_SWQE_TYPE_BIND_MW 14
+#define BNXT_QPLIB_SWQE_TYPE_RECV 128
+#define BNXT_QPLIB_SWQE_TYPE_RECV_RDMA_IMM 129
+ u8 flags;
+#define BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP BIT(0)
+#define BNXT_QPLIB_SWQE_FLAGS_RD_ATOMIC_FENCE BIT(1)
+#define BNXT_QPLIB_SWQE_FLAGS_UC_FENCE BIT(2)
+#define BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT BIT(3)
+#define BNXT_QPLIB_SWQE_FLAGS_INLINE BIT(4)
+ struct bnxt_qplib_sge sg_list[BNXT_QPLIB_QP_MAX_SGL];
+ int num_sge;
+ /* Max inline data is 96 bytes */
+ u32 inline_len;
+#define BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH 96
+ u8 inline_data[BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH];
+
+ union {
+ /* Send, with imm, inval key */
+ struct {
+ union {
+ __be32 imm_data;
+ u32 inv_key;
+ };
+ u32 q_key;
+ u32 dst_qp;
+ u16 avid;
+ } send;
+
+ /* Send Raw Ethernet and QP1 */
+ struct {
+ u16 lflags;
+ u16 cfa_action;
+ u32 cfa_meta;
+ } rawqp1;
+
+ /* RDMA write, with imm, read */
+ struct {
+ union {
+ __be32 imm_data;
+ u32 inv_key;
+ };
+ u64 remote_va;
+ u32 r_key;
+ } rdma;
+
+ /* Atomic cmp/swap, fetch/add */
+ struct {
+ u64 remote_va;
+ u32 r_key;
+ u64 swap_data;
+ u64 cmp_data;
+ } atomic;
+
+ /* Local Invalidate */
+ struct {
+ u32 inv_l_key;
+ } local_inv;
+
+ /* FR-PMR */
+ struct {
+ u8 access_cntl;
+ u8 pg_sz_log;
+ bool zero_based;
+ u32 l_key;
+ u32 length;
+ u8 pbl_pg_sz_log;
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_4K 0
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_8K 1
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_64K 4
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_256K 6
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_1M 8
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_2M 9
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_4M 10
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_1G 18
+ u8 levels;
+#define PAGE_SHIFT_4K 12
+ __le64 *pbl_ptr;
+ dma_addr_t pbl_dma_ptr;
+ u64 *page_list;
+ u16 page_list_len;
+ u64 va;
+ } frmr;
+
+ /* Bind */
+ struct {
+ u8 access_cntl;
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_LOCAL_WRITE BIT(0)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_REMOTE_READ BIT(1)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_REMOTE_WRITE BIT(2)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_REMOTE_ATOMIC BIT(3)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_WINDOW_BIND BIT(4)
+ bool zero_based;
+ u8 mw_type;
+ u32 parent_l_key;
+ u32 r_key;
+ u64 va;
+ u32 length;
+ } bind;
+ };
+};
+
+#define BNXT_QPLIB_MAX_RQE_ENTRY_SIZE sizeof(struct rq_wqe)
+
+#define RQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_RQE_ENTRY_SIZE)
+#define RQE_MAX_IDX_PER_PG (RQE_CNT_PER_PG - 1)
+#define RQE_PG(x) (((x) & ~RQE_MAX_IDX_PER_PG) / RQE_CNT_PER_PG)
+#define RQE_IDX(x) ((x) & RQE_MAX_IDX_PER_PG)
+
+struct bnxt_qplib_q {
+ struct bnxt_qplib_hwq hwq;
+ struct bnxt_qplib_swq *swq;
+ struct scatterlist *sglist;
+ u32 nmap;
+ u32 max_wqe;
+ u16 q_full_delta;
+ u16 max_sge;
+ u32 psn;
+ bool flush_in_progress;
+ bool condition;
+ bool single;
+ bool send_phantom;
+ u32 phantom_wqe_cnt;
+ u32 phantom_cqe_cnt;
+ u32 next_cq_cons;
+};
+
+struct bnxt_qplib_qp {
+ struct bnxt_qplib_pd *pd;
+ struct bnxt_qplib_dpi *dpi;
+ u64 qp_handle;
+ u32 id;
+ u8 type;
+ u8 sig_type;
+ u32 modify_flags;
+ u8 state;
+ u8 cur_qp_state;
+ u32 max_inline_data;
+ u32 mtu;
+ u8 path_mtu;
+ bool en_sqd_async_notify;
+ u16 pkey_index;
+ u32 qkey;
+ u32 dest_qp_id;
+ u8 access;
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+ u64 wqe_cnt;
+ u32 min_rnr_timer;
+ u32 max_rd_atomic;
+ u32 max_dest_rd_atomic;
+ u32 dest_qpn;
+ u8 smac[6];
+ u16 vlan_id;
+ u8 nw_type;
+ struct bnxt_qplib_ah ah;
+
+#define BTH_PSN_MASK ((1 << 24) - 1)
+ /* SQ */
+ struct bnxt_qplib_q sq;
+ /* RQ */
+ struct bnxt_qplib_q rq;
+ /* SRQ */
+ struct bnxt_qplib_srq *srq;
+ /* CQ */
+ struct bnxt_qplib_cq *scq;
+ struct bnxt_qplib_cq *rcq;
+ /* IRRQ and ORRQ */
+ struct bnxt_qplib_hwq irrq;
+ struct bnxt_qplib_hwq orrq;
+ /* Header buffer for QP1 */
+ int sq_hdr_buf_size;
+ int rq_hdr_buf_size;
+/*
+ * Buffer space for ETH(14), IP or GRH(40), UDP header(8)
+ * and ib_bth + ib_deth (20).
+ * Max required is 82 when RoCE V2 is enabled
+ */
+#define BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE_V2 86
+ /* Ethernet header = 14 */
+ /* ib_grh = 40 (provided by MAD) */
+ /* ib_bth + ib_deth = 20 */
+ /* MAD = 256 (provided by MAD) */
+ /* iCRC = 4 */
+#define BNXT_QPLIB_MAX_QP1_RQ_ETH_HDR_SIZE 14
+#define BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2 512
+#define BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV4 20
+#define BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6 40
+#define BNXT_QPLIB_MAX_QP1_RQ_BDETH_HDR_SIZE 20
+ void *sq_hdr_buf;
+ dma_addr_t sq_hdr_buf_map;
+ void *rq_hdr_buf;
+ dma_addr_t rq_hdr_buf_map;
+};
+
+#define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base)
+
+#define CQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_CQE_ENTRY_SIZE)
+#define CQE_MAX_IDX_PER_PG (CQE_CNT_PER_PG - 1)
+#define CQE_PG(x) (((x) & ~CQE_MAX_IDX_PER_PG) / CQE_CNT_PER_PG)
+#define CQE_IDX(x) ((x) & CQE_MAX_IDX_PER_PG)
+
+#define ROCE_CQE_CMP_V 0
+#define CQE_CMP_VALID(hdr, raw_cons, cp_bit) \
+ (!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \
+ !((raw_cons) & (cp_bit)))
+
+static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *qplib_q)
+{
+ return HWQ_CMP((qplib_q->hwq.prod + qplib_q->q_full_delta),
+ &qplib_q->hwq) == HWQ_CMP(qplib_q->hwq.cons,
+ &qplib_q->hwq);
+}
+
+struct bnxt_qplib_cqe {
+ u8 status;
+ u8 type;
+ u8 opcode;
+ u32 length;
+ u64 wr_id;
+ union {
+ __be32 immdata;
+ u32 invrkey;
+ };
+ u64 qp_handle;
+ u64 mr_handle;
+ u16 flags;
+ u8 smac[6];
+ u32 src_qp;
+ u16 raweth_qp1_flags;
+ u16 raweth_qp1_errors;
+ u16 raweth_qp1_cfa_code;
+ u32 raweth_qp1_flags2;
+ u32 raweth_qp1_metadata;
+ u8 raweth_qp1_payload_offset;
+ u16 pkey_index;
+};
+
+#define BNXT_QPLIB_QUEUE_START_PERIOD 0x01
+struct bnxt_qplib_cq {
+ struct bnxt_qplib_dpi *dpi;
+ void __iomem *dbr_base;
+ u32 max_wqe;
+ u32 id;
+ u16 count;
+ u16 period;
+ struct bnxt_qplib_hwq hwq;
+ u32 cnq_hw_ring_id;
+ bool resize_in_progress;
+ struct scatterlist *sghead;
+ u32 nmap;
+ u64 cq_handle;
+
+#define CQ_RESIZE_WAIT_TIME_MS 500
+ unsigned long flags;
+#define CQ_FLAGS_RESIZE_IN_PROG 1
+ wait_queue_head_t waitq;
+};
+
+#define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE sizeof(struct xrrq_irrq)
+#define BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE sizeof(struct xrrq_orrq)
+#define IRD_LIMIT_TO_IRRQ_SLOTS(x) (2 * (x) + 2)
+#define IRRQ_SLOTS_TO_IRD_LIMIT(s) (((s) >> 1) - 1)
+#define ORD_LIMIT_TO_ORRQ_SLOTS(x) ((x) + 1)
+#define ORRQ_SLOTS_TO_ORD_LIMIT(s) ((s) - 1)
+
+#define BNXT_QPLIB_MAX_NQE_ENTRY_SIZE sizeof(struct nq_base)
+
+#define NQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_NQE_ENTRY_SIZE)
+#define NQE_MAX_IDX_PER_PG (NQE_CNT_PER_PG - 1)
+#define NQE_PG(x) (((x) & ~NQE_MAX_IDX_PER_PG) / NQE_CNT_PER_PG)
+#define NQE_IDX(x) ((x) & NQE_MAX_IDX_PER_PG)
+
+#define NQE_CMP_VALID(hdr, raw_cons, cp_bit) \
+ (!!(le32_to_cpu((hdr)->info63_v[0]) & NQ_BASE_V) == \
+ !((raw_cons) & (cp_bit)))
+
+#define BNXT_QPLIB_NQE_MAX_CNT (128 * 1024)
+
+#define NQ_CONS_PCI_BAR_REGION 2
+#define NQ_DB_KEY_CP (0x2 << CMPL_DOORBELL_KEY_SFT)
+#define NQ_DB_IDX_VALID CMPL_DOORBELL_IDX_VALID
+#define NQ_DB_IRQ_DIS CMPL_DOORBELL_MASK
+#define NQ_DB_CP_FLAGS_REARM (NQ_DB_KEY_CP | \
+ NQ_DB_IDX_VALID)
+#define NQ_DB_CP_FLAGS (NQ_DB_KEY_CP | \
+ NQ_DB_IDX_VALID | \
+ NQ_DB_IRQ_DIS)
+#define NQ_DB_REARM(db, raw_cons, cp_bit) \
+ writel(NQ_DB_CP_FLAGS_REARM | ((raw_cons) & ((cp_bit) - 1)), db)
+#define NQ_DB(db, raw_cons, cp_bit) \
+ writel(NQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
+
+struct bnxt_qplib_nq {
+ struct pci_dev *pdev;
+
+ int vector;
+ int budget;
+ bool requested;
+ struct tasklet_struct worker;
+ struct bnxt_qplib_hwq hwq;
+
+ u16 bar_reg;
+ u16 bar_reg_off;
+ u16 ring_id;
+ void __iomem *bar_reg_iomem;
+
+ int (*cqn_handler)
+ (struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_cq *cq);
+ int (*srqn_handler)
+ (struct bnxt_qplib_nq *nq,
+ void *srq,
+ u8 event);
+};
+
+void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq);
+int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
+ int msix_vector, int bar_reg_offset,
+ int (*cqn_handler)(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_cq *cq),
+ int (*srqn_handler)(struct bnxt_qplib_nq *nq,
+ void *srq,
+ u8 event));
+int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_sge *sge);
+void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_sge *sge);
+u32 bnxt_qplib_get_rq_prod_index(struct bnxt_qplib_qp *qp);
+dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp,
+ u32 index);
+void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp);
+int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe);
+void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp);
+int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe);
+int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
+int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
+int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
+ int num, struct bnxt_qplib_qp **qp);
+bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq);
+void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type);
+void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
+int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);
+#endif /* __BNXT_QPLIB_FP_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
new file mode 100644
index 000000000000..16e42754dbec
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -0,0 +1,708 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: RDMA Controller HW interface
+ */
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/prefetch.h>
+#include <linux/delay.h>
+
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_rcfw.h"
+static void bnxt_qplib_service_creq(unsigned long data);
+
+/* Hardware communication channel */
+static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+{
+ u16 cbit;
+ int rc;
+
+ cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ rc = wait_event_timeout(rcfw->waitq,
+ !test_bit(cbit, rcfw->cmdq_bitmap),
+ msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS));
+ return rc ? 0 : -ETIMEDOUT;
+};
+
+static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+{
+ u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT;
+ u16 cbit;
+
+ cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ if (!test_bit(cbit, rcfw->cmdq_bitmap))
+ goto done;
+ do {
+ mdelay(1); /* 1m sec */
+ bnxt_qplib_service_creq((unsigned long)rcfw);
+ } while (test_bit(cbit, rcfw->cmdq_bitmap) && --count);
+done:
+ return count ? 0 : -ETIMEDOUT;
+};
+
+static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
+ struct creq_base *resp, void *sb, u8 is_block)
+{
+ struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr;
+ struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
+ struct bnxt_qplib_crsq *crsqe;
+ u32 sw_prod, cmdq_prod;
+ unsigned long flags;
+ u32 size, opcode;
+ u16 cookie, cbit;
+ int pg, idx;
+ u8 *preq;
+
+ opcode = req->opcode;
+ if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
+ (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
+ opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW)) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: RCFW not initialized, reject opcode 0x%x",
+ opcode);
+ return -EINVAL;
+ }
+
+ if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
+ opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
+ return -EINVAL;
+ }
+
+ /* Cmdq are in 16-byte units, each request can consume 1 or more
+ * cmdqe
+ */
+ spin_lock_irqsave(&cmdq->lock, flags);
+ if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!");
+ spin_unlock_irqrestore(&cmdq->lock, flags);
+ return -EAGAIN;
+ }
+
+
+ cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE;
+ cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ if (is_block)
+ cookie |= RCFW_CMD_IS_BLOCKING;
+
+ set_bit(cbit, rcfw->cmdq_bitmap);
+ req->cookie = cpu_to_le16(cookie);
+ crsqe = &rcfw->crsqe_tbl[cbit];
+ if (crsqe->resp) {
+ spin_unlock_irqrestore(&cmdq->lock, flags);
+ return -EBUSY;
+ }
+ memset(resp, 0, sizeof(*resp));
+ crsqe->resp = (struct creq_qp_event *)resp;
+ crsqe->resp->cookie = req->cookie;
+ crsqe->req_size = req->cmd_size;
+ if (req->resp_size && sb) {
+ struct bnxt_qplib_rcfw_sbuf *sbuf = sb;
+
+ req->resp_addr = cpu_to_le64(sbuf->dma_addr);
+ req->resp_size = (sbuf->size + BNXT_QPLIB_CMDQE_UNITS - 1) /
+ BNXT_QPLIB_CMDQE_UNITS;
+ }
+
+ cmdq_ptr = (struct bnxt_qplib_cmdqe **)cmdq->pbl_ptr;
+ preq = (u8 *)req;
+ size = req->cmd_size * BNXT_QPLIB_CMDQE_UNITS;
+ do {
+ pg = 0;
+ idx = 0;
+
+ /* Locate the next cmdq slot */
+ sw_prod = HWQ_CMP(cmdq->prod, cmdq);
+ cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)];
+ if (!cmdqe) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: RCFW request failed with no cmdqe!");
+ goto done;
+ }
+ /* Copy a segment of the req cmd to the cmdq */
+ memset(cmdqe, 0, sizeof(*cmdqe));
+ memcpy(cmdqe, preq, min_t(u32, size, sizeof(*cmdqe)));
+ preq += min_t(u32, size, sizeof(*cmdqe));
+ size -= min_t(u32, size, sizeof(*cmdqe));
+ cmdq->prod++;
+ rcfw->seq_num++;
+ } while (size > 0);
+
+ rcfw->seq_num++;
+
+ cmdq_prod = cmdq->prod;
+ if (rcfw->flags & FIRMWARE_FIRST_FLAG) {
+ /* The very first doorbell write
+ * is required to set this flag
+ * which prompts the FW to reset
+ * its internal pointers
+ */
+ cmdq_prod |= FIRMWARE_FIRST_FLAG;
+ rcfw->flags &= ~FIRMWARE_FIRST_FLAG;
+ }
+
+ /* ring CMDQ DB */
+ wmb();
+ writel(cmdq_prod, rcfw->cmdq_bar_reg_iomem +
+ rcfw->cmdq_bar_reg_prod_off);
+ writel(RCFW_CMDQ_TRIG_VAL, rcfw->cmdq_bar_reg_iomem +
+ rcfw->cmdq_bar_reg_trig_off);
+done:
+ spin_unlock_irqrestore(&cmdq->lock, flags);
+ /* Return the CREQ response pointer */
+ return 0;
+}
+
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct cmdq_base *req,
+ struct creq_base *resp,
+ void *sb, u8 is_block)
+{
+ struct creq_qp_event *evnt = (struct creq_qp_event *)resp;
+ u16 cookie;
+ u8 opcode, retry_cnt = 0xFF;
+ int rc = 0;
+
+ do {
+ opcode = req->opcode;
+ rc = __send_message(rcfw, req, resp, sb, is_block);
+ cookie = le16_to_cpu(req->cookie) & RCFW_MAX_COOKIE_VALUE;
+ if (!rc)
+ break;
+
+ if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) {
+ /* send failed */
+ dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x send failed",
+ cookie, opcode);
+ return rc;
+ }
+ is_block ? mdelay(1) : usleep_range(500, 1000);
+
+ } while (retry_cnt--);
+
+ if (is_block)
+ rc = __block_for_resp(rcfw, cookie);
+ else
+ rc = __wait_for_resp(rcfw, cookie);
+ if (rc) {
+ /* timed out */
+ dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec",
+ cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
+ return rc;
+ }
+
+ if (evnt->status) {
+ /* failed with status */
+ dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x status %#x",
+ cookie, opcode, evnt->status);
+ rc = -EFAULT;
+ }
+
+ return rc;
+}
+/* Completions */
+static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
+ struct creq_func_event *func_event)
+{
+ switch (func_event->event) {
+ case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
+ /* SRQ ctx error, call srq_handler??
+ * But there's no SRQ handle!
+ */
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_VF_COMM_REQUEST:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RESOURCE_EXHAUSTED:
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
+ struct creq_qp_event *qp_event)
+{
+ struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
+ struct bnxt_qplib_crsq *crsqe;
+ unsigned long flags;
+ u16 cbit, blocked = 0;
+ u16 cookie;
+ __le16 mcookie;
+
+ switch (qp_event->event) {
+ case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
+ dev_dbg(&rcfw->pdev->dev,
+ "QPLIB: Received QP error notification");
+ break;
+ default:
+ /* Command Response */
+ spin_lock_irqsave(&cmdq->lock, flags);
+ cookie = le16_to_cpu(qp_event->cookie);
+ mcookie = qp_event->cookie;
+ blocked = cookie & RCFW_CMD_IS_BLOCKING;
+ cookie &= RCFW_MAX_COOKIE_VALUE;
+ cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ crsqe = &rcfw->crsqe_tbl[cbit];
+ if (crsqe->resp &&
+ crsqe->resp->cookie == mcookie) {
+ memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
+ crsqe->resp = NULL;
+ } else {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: CMD %s resp->cookie = %#x, evnt->cookie = %#x",
+ crsqe->resp ? "mismatch" : "collision",
+ crsqe->resp ? crsqe->resp->cookie : 0, mcookie);
+ }
+ if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
+ dev_warn(&rcfw->pdev->dev,
+ "QPLIB: CMD bit %d was not requested", cbit);
+ cmdq->cons += crsqe->req_size;
+ crsqe->req_size = 0;
+
+ if (!blocked)
+ wake_up(&rcfw->waitq);
+ spin_unlock_irqrestore(&cmdq->lock, flags);
+ }
+ return 0;
+}
+
+/* SP - CREQ Completion handlers */
+static void bnxt_qplib_service_creq(unsigned long data)
+{
+ struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data;
+ struct bnxt_qplib_hwq *creq = &rcfw->creq;
+ struct creq_base *creqe, **creq_ptr;
+ u32 sw_cons, raw_cons;
+ unsigned long flags;
+ u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
+
+ /* Service the CREQ until budget is over */
+ spin_lock_irqsave(&creq->lock, flags);
+ raw_cons = creq->cons;
+ while (budget > 0) {
+ sw_cons = HWQ_CMP(raw_cons, creq);
+ creq_ptr = (struct creq_base **)creq->pbl_ptr;
+ creqe = &creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)];
+ if (!CREQ_CMP_VALID(creqe, raw_cons, creq->max_elements))
+ break;
+
+ type = creqe->type & CREQ_BASE_TYPE_MASK;
+ switch (type) {
+ case CREQ_BASE_TYPE_QP_EVENT:
+ bnxt_qplib_process_qp_event
+ (rcfw, (struct creq_qp_event *)creqe);
+ rcfw->creq_qp_event_processed++;
+ break;
+ case CREQ_BASE_TYPE_FUNC_EVENT:
+ if (!bnxt_qplib_process_func_event
+ (rcfw, (struct creq_func_event *)creqe))
+ rcfw->creq_func_event_processed++;
+ else
+ dev_warn
+ (&rcfw->pdev->dev, "QPLIB:aeqe:%#x Not handled",
+ type);
+ break;
+ default:
+ dev_warn(&rcfw->pdev->dev, "QPLIB: creqe with ");
+ dev_warn(&rcfw->pdev->dev,
+ "QPLIB: op_event = 0x%x not handled", type);
+ break;
+ }
+ raw_cons++;
+ budget--;
+ }
+
+ if (creq->cons != raw_cons) {
+ creq->cons = raw_cons;
+ CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, raw_cons,
+ creq->max_elements);
+ }
+ spin_unlock_irqrestore(&creq->lock, flags);
+}
+
+static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance)
+{
+ struct bnxt_qplib_rcfw *rcfw = dev_instance;
+ struct bnxt_qplib_hwq *creq = &rcfw->creq;
+ struct creq_base **creq_ptr;
+ u32 sw_cons;
+
+ /* Prefetch the CREQ element */
+ sw_cons = HWQ_CMP(creq->cons, creq);
+ creq_ptr = (struct creq_base **)rcfw->creq.pbl_ptr;
+ prefetch(&creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)]);
+
+ tasklet_schedule(&rcfw->worker);
+
+ return IRQ_HANDLED;
+}
+
+/* RCFW */
+int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw)
+{
+ struct cmdq_deinitialize_fw req;
+ struct creq_deinitialize_fw_resp resp;
+ u16 cmd_flags = 0;
+ int rc;
+
+ RCFW_CMD_PREP(req, DEINITIALIZE_FW, cmd_flags);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ NULL, 0);
+ if (rc)
+ return rc;
+
+ clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
+ return 0;
+}
+
+static int __get_pbl_pg_idx(struct bnxt_qplib_pbl *pbl)
+{
+ return (pbl->pg_size == ROCE_PG_SIZE_4K ?
+ CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K :
+ pbl->pg_size == ROCE_PG_SIZE_8K ?
+ CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8K :
+ pbl->pg_size == ROCE_PG_SIZE_64K ?
+ CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_64K :
+ pbl->pg_size == ROCE_PG_SIZE_2M ?
+ CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_2M :
+ pbl->pg_size == ROCE_PG_SIZE_8M ?
+ CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8M :
+ pbl->pg_size == ROCE_PG_SIZE_1G ?
+ CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_1G :
+ CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K);
+}
+
+int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx, int is_virtfn)
+{
+ struct cmdq_initialize_fw req;
+ struct creq_initialize_fw_resp resp;
+ u16 cmd_flags = 0, level;
+ int rc;
+
+ RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags);
+
+ /*
+ * VFs need not setup the HW context area, PF
+ * shall setup this area for VF. Skipping the
+ * HW programming
+ */
+ if (is_virtfn)
+ goto skip_ctx_setup;
+
+ level = ctx->qpc_tbl.level;
+ req.qpc_pg_size_qpc_lvl = (level << CMDQ_INITIALIZE_FW_QPC_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->qpc_tbl.pbl[level]);
+ level = ctx->mrw_tbl.level;
+ req.mrw_pg_size_mrw_lvl = (level << CMDQ_INITIALIZE_FW_MRW_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->mrw_tbl.pbl[level]);
+ level = ctx->srqc_tbl.level;
+ req.srq_pg_size_srq_lvl = (level << CMDQ_INITIALIZE_FW_SRQ_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->srqc_tbl.pbl[level]);
+ level = ctx->cq_tbl.level;
+ req.cq_pg_size_cq_lvl = (level << CMDQ_INITIALIZE_FW_CQ_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->cq_tbl.pbl[level]);
+ level = ctx->srqc_tbl.level;
+ req.srq_pg_size_srq_lvl = (level << CMDQ_INITIALIZE_FW_SRQ_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->srqc_tbl.pbl[level]);
+ level = ctx->cq_tbl.level;
+ req.cq_pg_size_cq_lvl = (level << CMDQ_INITIALIZE_FW_CQ_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->cq_tbl.pbl[level]);
+ level = ctx->tim_tbl.level;
+ req.tim_pg_size_tim_lvl = (level << CMDQ_INITIALIZE_FW_TIM_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->tim_tbl.pbl[level]);
+ level = ctx->tqm_pde_level;
+ req.tqm_pg_size_tqm_lvl = (level << CMDQ_INITIALIZE_FW_TQM_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->tqm_pde.pbl[level]);
+
+ req.qpc_page_dir =
+ cpu_to_le64(ctx->qpc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+ req.mrw_page_dir =
+ cpu_to_le64(ctx->mrw_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+ req.srq_page_dir =
+ cpu_to_le64(ctx->srqc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+ req.cq_page_dir =
+ cpu_to_le64(ctx->cq_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+ req.tim_page_dir =
+ cpu_to_le64(ctx->tim_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+ req.tqm_page_dir =
+ cpu_to_le64(ctx->tqm_pde.pbl[PBL_LVL_0].pg_map_arr[0]);
+
+ req.number_of_qp = cpu_to_le32(ctx->qpc_tbl.max_elements);
+ req.number_of_mrw = cpu_to_le32(ctx->mrw_tbl.max_elements);
+ req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements);
+ req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements);
+
+ req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf);
+ req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf);
+ req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf);
+ req.max_cq_per_vf = cpu_to_le32(ctx->vf_res.max_cq_per_vf);
+ req.max_gid_per_vf = cpu_to_le32(ctx->vf_res.max_gid_per_vf);
+
+skip_ctx_setup:
+ req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ NULL, 0);
+ if (rc)
+ return rc;
+ set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
+ return 0;
+}
+
+void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
+{
+ kfree(rcfw->crsqe_tbl);
+ bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->cmdq);
+ bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->creq);
+ rcfw->pdev = NULL;
+}
+
+int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
+ struct bnxt_qplib_rcfw *rcfw)
+{
+ rcfw->pdev = pdev;
+ rcfw->creq.max_elements = BNXT_QPLIB_CREQE_MAX_CNT;
+ if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->creq, NULL, 0,
+ &rcfw->creq.max_elements,
+ BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE,
+ HWQ_TYPE_L2_CMPL)) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: HW channel CREQ allocation failed");
+ goto fail;
+ }
+ rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT;
+ if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->cmdq, NULL, 0,
+ &rcfw->cmdq.max_elements,
+ BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE,
+ HWQ_TYPE_CTX)) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: HW channel CMDQ allocation failed");
+ goto fail;
+ }
+
+ rcfw->crsqe_tbl = kcalloc(rcfw->cmdq.max_elements,
+ sizeof(*rcfw->crsqe_tbl), GFP_KERNEL);
+ if (!rcfw->crsqe_tbl)
+ goto fail;
+
+ return 0;
+
+fail:
+ bnxt_qplib_free_rcfw_channel(rcfw);
+ return -ENOMEM;
+}
+
+void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
+{
+ unsigned long indx;
+
+ /* Make sure the HW channel is stopped! */
+ synchronize_irq(rcfw->vector);
+ tasklet_disable(&rcfw->worker);
+ tasklet_kill(&rcfw->worker);
+
+ if (rcfw->requested) {
+ free_irq(rcfw->vector, rcfw);
+ rcfw->requested = false;
+ }
+ if (rcfw->cmdq_bar_reg_iomem)
+ iounmap(rcfw->cmdq_bar_reg_iomem);
+ rcfw->cmdq_bar_reg_iomem = NULL;
+
+ if (rcfw->creq_bar_reg_iomem)
+ iounmap(rcfw->creq_bar_reg_iomem);
+ rcfw->creq_bar_reg_iomem = NULL;
+
+ indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size);
+ if (indx != rcfw->bmap_size)
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: disabling RCFW with pending cmd-bit %lx", indx);
+ kfree(rcfw->cmdq_bitmap);
+ rcfw->bmap_size = 0;
+
+ rcfw->aeq_handler = NULL;
+ rcfw->vector = 0;
+}
+
+int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
+ struct bnxt_qplib_rcfw *rcfw,
+ int msix_vector,
+ int cp_bar_reg_off, int virt_fn,
+ int (*aeq_handler)(struct bnxt_qplib_rcfw *,
+ struct creq_func_event *))
+{
+ resource_size_t res_base;
+ struct cmdq_init init;
+ u16 bmap_size;
+ int rc;
+
+ /* General */
+ rcfw->seq_num = 0;
+ rcfw->flags = FIRMWARE_FIRST_FLAG;
+ bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD *
+ sizeof(unsigned long));
+ rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL);
+ if (!rcfw->cmdq_bitmap)
+ return -ENOMEM;
+ rcfw->bmap_size = bmap_size;
+
+ /* CMDQ */
+ rcfw->cmdq_bar_reg = RCFW_COMM_PCI_BAR_REGION;
+ res_base = pci_resource_start(pdev, rcfw->cmdq_bar_reg);
+ if (!res_base)
+ return -ENOMEM;
+
+ rcfw->cmdq_bar_reg_iomem = ioremap_nocache(res_base +
+ RCFW_COMM_BASE_OFFSET,
+ RCFW_COMM_SIZE);
+ if (!rcfw->cmdq_bar_reg_iomem) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: CMDQ BAR region %d mapping failed",
+ rcfw->cmdq_bar_reg);
+ return -ENOMEM;
+ }
+
+ rcfw->cmdq_bar_reg_prod_off = virt_fn ? RCFW_VF_COMM_PROD_OFFSET :
+ RCFW_PF_COMM_PROD_OFFSET;
+
+ rcfw->cmdq_bar_reg_trig_off = RCFW_COMM_TRIG_OFFSET;
+
+ /* CREQ */
+ rcfw->creq_bar_reg = RCFW_COMM_CONS_PCI_BAR_REGION;
+ res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
+ if (!res_base)
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: CREQ BAR region %d resc start is 0!",
+ rcfw->creq_bar_reg);
+ rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off,
+ 4);
+ if (!rcfw->creq_bar_reg_iomem) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: CREQ BAR region %d mapping failed",
+ rcfw->creq_bar_reg);
+ return -ENOMEM;
+ }
+ rcfw->creq_qp_event_processed = 0;
+ rcfw->creq_func_event_processed = 0;
+
+ rcfw->vector = msix_vector;
+ if (aeq_handler)
+ rcfw->aeq_handler = aeq_handler;
+
+ tasklet_init(&rcfw->worker, bnxt_qplib_service_creq,
+ (unsigned long)rcfw);
+
+ rcfw->requested = false;
+ rc = request_irq(rcfw->vector, bnxt_qplib_creq_irq, 0,
+ "bnxt_qplib_creq", rcfw);
+ if (rc) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: Failed to request IRQ for CREQ rc = 0x%x", rc);
+ bnxt_qplib_disable_rcfw_channel(rcfw);
+ return rc;
+ }
+ rcfw->requested = true;
+
+ init_waitqueue_head(&rcfw->waitq);
+
+ CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, 0, rcfw->creq.max_elements);
+
+ init.cmdq_pbl = cpu_to_le64(rcfw->cmdq.pbl[PBL_LVL_0].pg_map_arr[0]);
+ init.cmdq_size_cmdq_lvl = cpu_to_le16(
+ ((BNXT_QPLIB_CMDQE_MAX_CNT << CMDQ_INIT_CMDQ_SIZE_SFT) &
+ CMDQ_INIT_CMDQ_SIZE_MASK) |
+ ((rcfw->cmdq.level << CMDQ_INIT_CMDQ_LVL_SFT) &
+ CMDQ_INIT_CMDQ_LVL_MASK));
+ init.creq_ring_id = cpu_to_le16(rcfw->creq_ring_id);
+
+ /* Write to the Bono mailbox register */
+ __iowrite32_copy(rcfw->cmdq_bar_reg_iomem, &init, sizeof(init) / 4);
+ return 0;
+}
+
+struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
+ struct bnxt_qplib_rcfw *rcfw,
+ u32 size)
+{
+ struct bnxt_qplib_rcfw_sbuf *sbuf;
+
+ sbuf = kzalloc(sizeof(*sbuf), GFP_ATOMIC);
+ if (!sbuf)
+ return NULL;
+
+ sbuf->size = size;
+ sbuf->sb = dma_zalloc_coherent(&rcfw->pdev->dev, sbuf->size,
+ &sbuf->dma_addr, GFP_ATOMIC);
+ if (!sbuf->sb)
+ goto bail;
+
+ return sbuf;
+bail:
+ kfree(sbuf);
+ return NULL;
+}
+
+void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_rcfw_sbuf *sbuf)
+{
+ if (sbuf->sb)
+ dma_free_coherent(&rcfw->pdev->dev, sbuf->size,
+ sbuf->sb, sbuf->dma_addr);
+ kfree(sbuf);
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
new file mode 100644
index 000000000000..09ce121770cd
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -0,0 +1,210 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: RDMA Controller HW interface (header)
+ */
+
+#ifndef __BNXT_QPLIB_RCFW_H__
+#define __BNXT_QPLIB_RCFW_H__
+
+#define RCFW_CMDQ_TRIG_VAL 1
+#define RCFW_COMM_PCI_BAR_REGION 0
+#define RCFW_COMM_CONS_PCI_BAR_REGION 2
+#define RCFW_COMM_BASE_OFFSET 0x600
+#define RCFW_PF_COMM_PROD_OFFSET 0xc
+#define RCFW_VF_COMM_PROD_OFFSET 0xc
+#define RCFW_COMM_TRIG_OFFSET 0x100
+#define RCFW_COMM_SIZE 0x104
+
+#define RCFW_DBR_PCI_BAR_REGION 2
+
+#define RCFW_CMD_PREP(req, CMD, cmd_flags) \
+ do { \
+ memset(&(req), 0, sizeof((req))); \
+ (req).opcode = CMDQ_BASE_OPCODE_##CMD; \
+ (req).cmd_size = (sizeof((req)) + \
+ BNXT_QPLIB_CMDQE_UNITS - 1) / \
+ BNXT_QPLIB_CMDQE_UNITS; \
+ (req).flags = cpu_to_le16(cmd_flags); \
+ } while (0)
+
+#define RCFW_CMD_WAIT_TIME_MS 20000 /* 20 Seconds timeout */
+
+/* CMDQ elements */
+#define BNXT_QPLIB_CMDQE_MAX_CNT 256
+#define BNXT_QPLIB_CMDQE_UNITS sizeof(struct bnxt_qplib_cmdqe)
+#define BNXT_QPLIB_CMDQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CMDQE_UNITS)
+
+#define MAX_CMDQ_IDX (BNXT_QPLIB_CMDQE_MAX_CNT - 1)
+#define MAX_CMDQ_IDX_PER_PG (BNXT_QPLIB_CMDQE_CNT_PER_PG - 1)
+
+#define RCFW_MAX_OUTSTANDING_CMD BNXT_QPLIB_CMDQE_MAX_CNT
+#define RCFW_MAX_COOKIE_VALUE 0x7FFF
+#define RCFW_CMD_IS_BLOCKING 0x8000
+#define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20
+
+/* Cmdq contains a fix number of a 16-Byte slots */
+struct bnxt_qplib_cmdqe {
+ u8 data[16];
+};
+
+static inline u32 get_cmdq_pg(u32 val)
+{
+ return (val & ~MAX_CMDQ_IDX_PER_PG) / BNXT_QPLIB_CMDQE_CNT_PER_PG;
+}
+
+static inline u32 get_cmdq_idx(u32 val)
+{
+ return val & MAX_CMDQ_IDX_PER_PG;
+}
+
+/* Crsq buf is 1024-Byte */
+struct bnxt_qplib_crsbe {
+ u8 data[1024];
+};
+
+/* CREQ */
+/* Allocate 1 per QP for async error notification for now */
+#define BNXT_QPLIB_CREQE_MAX_CNT (64 * 1024)
+#define BNXT_QPLIB_CREQE_UNITS 16 /* 16-Bytes per prod unit */
+#define BNXT_QPLIB_CREQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CREQE_UNITS)
+
+#define MAX_CREQ_IDX (BNXT_QPLIB_CREQE_MAX_CNT - 1)
+#define MAX_CREQ_IDX_PER_PG (BNXT_QPLIB_CREQE_CNT_PER_PG - 1)
+
+static inline u32 get_creq_pg(u32 val)
+{
+ return (val & ~MAX_CREQ_IDX_PER_PG) / BNXT_QPLIB_CREQE_CNT_PER_PG;
+}
+
+static inline u32 get_creq_idx(u32 val)
+{
+ return val & MAX_CREQ_IDX_PER_PG;
+}
+
+#define BNXT_QPLIB_CREQE_PER_PG (PAGE_SIZE / sizeof(struct creq_base))
+
+#define CREQ_CMP_VALID(hdr, raw_cons, cp_bit) \
+ (!!((hdr)->v & CREQ_BASE_V) == \
+ !((raw_cons) & (cp_bit)))
+
+#define CREQ_DB_KEY_CP (0x2 << CMPL_DOORBELL_KEY_SFT)
+#define CREQ_DB_IDX_VALID CMPL_DOORBELL_IDX_VALID
+#define CREQ_DB_IRQ_DIS CMPL_DOORBELL_MASK
+#define CREQ_DB_CP_FLAGS_REARM (CREQ_DB_KEY_CP | \
+ CREQ_DB_IDX_VALID)
+#define CREQ_DB_CP_FLAGS (CREQ_DB_KEY_CP | \
+ CREQ_DB_IDX_VALID | \
+ CREQ_DB_IRQ_DIS)
+#define CREQ_DB_REARM(db, raw_cons, cp_bit) \
+ writel(CREQ_DB_CP_FLAGS_REARM | ((raw_cons) & ((cp_bit) - 1)), db)
+#define CREQ_DB(db, raw_cons, cp_bit) \
+ writel(CREQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
+
+#define CREQ_ENTRY_POLL_BUDGET 0x100
+
+/* HWQ */
+
+struct bnxt_qplib_crsq {
+ struct creq_qp_event *resp;
+ u32 req_size;
+};
+
+struct bnxt_qplib_rcfw_sbuf {
+ void *sb;
+ dma_addr_t dma_addr;
+ u32 size;
+};
+
+/* RCFW Communication Channels */
+struct bnxt_qplib_rcfw {
+ struct pci_dev *pdev;
+ int vector;
+ struct tasklet_struct worker;
+ bool requested;
+ unsigned long *cmdq_bitmap;
+ u32 bmap_size;
+ unsigned long flags;
+#define FIRMWARE_INITIALIZED_FLAG 1
+#define FIRMWARE_FIRST_FLAG BIT(31)
+ wait_queue_head_t waitq;
+ int (*aeq_handler)(struct bnxt_qplib_rcfw *,
+ struct creq_func_event *);
+ u32 seq_num;
+
+ /* Bar region info */
+ void __iomem *cmdq_bar_reg_iomem;
+ u16 cmdq_bar_reg;
+ u16 cmdq_bar_reg_prod_off;
+ u16 cmdq_bar_reg_trig_off;
+ u16 creq_ring_id;
+ u16 creq_bar_reg;
+ void __iomem *creq_bar_reg_iomem;
+
+ /* Cmd-Resp and Async Event notification queue */
+ struct bnxt_qplib_hwq creq;
+ u64 creq_qp_event_processed;
+ u64 creq_func_event_processed;
+
+ /* Actual Cmd and Resp Queues */
+ struct bnxt_qplib_hwq cmdq;
+ struct bnxt_qplib_crsq *crsqe_tbl;
+};
+
+void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
+ struct bnxt_qplib_rcfw *rcfw);
+void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
+ struct bnxt_qplib_rcfw *rcfw,
+ int msix_vector,
+ int cp_bar_reg_off, int virt_fn,
+ int (*aeq_handler)
+ (struct bnxt_qplib_rcfw *,
+ struct creq_func_event *));
+
+struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
+ struct bnxt_qplib_rcfw *rcfw,
+ u32 size);
+void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_rcfw_sbuf *sbuf);
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct cmdq_base *req, struct creq_base *resp,
+ void *sbuf, u8 is_block);
+
+int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx, int is_virtfn);
+#endif /* __BNXT_QPLIB_RCFW_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
new file mode 100644
index 000000000000..62447b3badec
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -0,0 +1,825 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: QPLib resource manager
+ */
+
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/inetdevice.h>
+#include <linux/dma-mapping.h>
+#include <linux/if_vlan.h>
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_sp.h"
+#include "qplib_rcfw.h"
+
+static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_stats *stats);
+static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_stats *stats);
+
+/* PBL */
+static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
+ bool is_umem)
+{
+ int i;
+
+ if (!is_umem) {
+ for (i = 0; i < pbl->pg_count; i++) {
+ if (pbl->pg_arr[i])
+ dma_free_coherent(&pdev->dev, pbl->pg_size,
+ (void *)((unsigned long)
+ pbl->pg_arr[i] &
+ PAGE_MASK),
+ pbl->pg_map_arr[i]);
+ else
+ dev_warn(&pdev->dev,
+ "QPLIB: PBL free pg_arr[%d] empty?!",
+ i);
+ pbl->pg_arr[i] = NULL;
+ }
+ }
+ kfree(pbl->pg_arr);
+ pbl->pg_arr = NULL;
+ kfree(pbl->pg_map_arr);
+ pbl->pg_map_arr = NULL;
+ pbl->pg_count = 0;
+ pbl->pg_size = 0;
+}
+
+static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
+ struct scatterlist *sghead, u32 pages, u32 pg_size)
+{
+ struct scatterlist *sg;
+ bool is_umem = false;
+ int i;
+
+ /* page ptr arrays */
+ pbl->pg_arr = kcalloc(pages, sizeof(void *), GFP_KERNEL);
+ if (!pbl->pg_arr)
+ return -ENOMEM;
+
+ pbl->pg_map_arr = kcalloc(pages, sizeof(dma_addr_t), GFP_KERNEL);
+ if (!pbl->pg_map_arr) {
+ kfree(pbl->pg_arr);
+ pbl->pg_arr = NULL;
+ return -ENOMEM;
+ }
+ pbl->pg_count = 0;
+ pbl->pg_size = pg_size;
+
+ if (!sghead) {
+ for (i = 0; i < pages; i++) {
+ pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev,
+ pbl->pg_size,
+ &pbl->pg_map_arr[i],
+ GFP_KERNEL);
+ if (!pbl->pg_arr[i])
+ goto fail;
+ memset(pbl->pg_arr[i], 0, pbl->pg_size);
+ pbl->pg_count++;
+ }
+ } else {
+ i = 0;
+ is_umem = true;
+ for_each_sg(sghead, sg, pages, i) {
+ pbl->pg_map_arr[i] = sg_dma_address(sg);
+ pbl->pg_arr[i] = sg_virt(sg);
+ if (!pbl->pg_arr[i])
+ goto fail;
+
+ pbl->pg_count++;
+ }
+ }
+
+ return 0;
+
+fail:
+ __free_pbl(pdev, pbl, is_umem);
+ return -ENOMEM;
+}
+
+/* HWQ */
+void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq)
+{
+ int i;
+
+ if (!hwq->max_elements)
+ return;
+ if (hwq->level >= PBL_LVL_MAX)
+ return;
+
+ for (i = 0; i < hwq->level + 1; i++) {
+ if (i == hwq->level)
+ __free_pbl(pdev, &hwq->pbl[i], hwq->is_user);
+ else
+ __free_pbl(pdev, &hwq->pbl[i], false);
+ }
+
+ hwq->level = PBL_LVL_MAX;
+ hwq->max_elements = 0;
+ hwq->element_size = 0;
+ hwq->prod = 0;
+ hwq->cons = 0;
+ hwq->cp_bit = 0;
+}
+
+/* All HWQs are power of 2 in size */
+int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
+ struct scatterlist *sghead, int nmap,
+ u32 *elements, u32 element_size, u32 aux,
+ u32 pg_size, enum bnxt_qplib_hwq_type hwq_type)
+{
+ u32 pages, slots, size, aux_pages = 0, aux_size = 0;
+ dma_addr_t *src_phys_ptr, **dst_virt_ptr;
+ int i, rc;
+
+ hwq->level = PBL_LVL_MAX;
+
+ slots = roundup_pow_of_two(*elements);
+ if (aux) {
+ aux_size = roundup_pow_of_two(aux);
+ aux_pages = (slots * aux_size) / pg_size;
+ if ((slots * aux_size) % pg_size)
+ aux_pages++;
+ }
+ size = roundup_pow_of_two(element_size);
+
+ if (!sghead) {
+ hwq->is_user = false;
+ pages = (slots * size) / pg_size + aux_pages;
+ if ((slots * size) % pg_size)
+ pages++;
+ if (!pages)
+ return -EINVAL;
+ } else {
+ hwq->is_user = true;
+ pages = nmap;
+ }
+
+ /* Alloc the 1st memory block; can be a PDL/PTL/PBL */
+ if (sghead && (pages == MAX_PBL_LVL_0_PGS))
+ rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], sghead,
+ pages, pg_size);
+ else
+ rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], NULL, 1, pg_size);
+ if (rc)
+ goto fail;
+
+ hwq->level = PBL_LVL_0;
+
+ if (pages > MAX_PBL_LVL_0_PGS) {
+ if (pages > MAX_PBL_LVL_1_PGS) {
+ /* 2 levels of indirection */
+ rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], NULL,
+ MAX_PBL_LVL_1_PGS_FOR_LVL_2, pg_size);
+ if (rc)
+ goto fail;
+ /* Fill in lvl0 PBL */
+ dst_virt_ptr =
+ (dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr;
+ src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr;
+ for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++)
+ dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+ src_phys_ptr[i] | PTU_PDE_VALID;
+ hwq->level = PBL_LVL_1;
+
+ rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_2], sghead,
+ pages, pg_size);
+ if (rc)
+ goto fail;
+
+ /* Fill in lvl1 PBL */
+ dst_virt_ptr =
+ (dma_addr_t **)hwq->pbl[PBL_LVL_1].pg_arr;
+ src_phys_ptr = hwq->pbl[PBL_LVL_2].pg_map_arr;
+ for (i = 0; i < hwq->pbl[PBL_LVL_2].pg_count; i++) {
+ dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+ src_phys_ptr[i] | PTU_PTE_VALID;
+ }
+ if (hwq_type == HWQ_TYPE_QUEUE) {
+ /* Find the last pg of the size */
+ i = hwq->pbl[PBL_LVL_2].pg_count;
+ dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
+ PTU_PTE_LAST;
+ if (i > 1)
+ dst_virt_ptr[PTR_PG(i - 2)]
+ [PTR_IDX(i - 2)] |=
+ PTU_PTE_NEXT_TO_LAST;
+ }
+ hwq->level = PBL_LVL_2;
+ } else {
+ u32 flag = hwq_type == HWQ_TYPE_L2_CMPL ? 0 :
+ PTU_PTE_VALID;
+
+ /* 1 level of indirection */
+ rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], sghead,
+ pages, pg_size);
+ if (rc)
+ goto fail;
+ /* Fill in lvl0 PBL */
+ dst_virt_ptr =
+ (dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr;
+ src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr;
+ for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++) {
+ dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+ src_phys_ptr[i] | flag;
+ }
+ if (hwq_type == HWQ_TYPE_QUEUE) {
+ /* Find the last pg of the size */
+ i = hwq->pbl[PBL_LVL_1].pg_count;
+ dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
+ PTU_PTE_LAST;
+ if (i > 1)
+ dst_virt_ptr[PTR_PG(i - 2)]
+ [PTR_IDX(i - 2)] |=
+ PTU_PTE_NEXT_TO_LAST;
+ }
+ hwq->level = PBL_LVL_1;
+ }
+ }
+ hwq->pdev = pdev;
+ spin_lock_init(&hwq->lock);
+ hwq->prod = 0;
+ hwq->cons = 0;
+ *elements = hwq->max_elements = slots;
+ hwq->element_size = size;
+
+ /* For direct access to the elements */
+ hwq->pbl_ptr = hwq->pbl[hwq->level].pg_arr;
+ hwq->pbl_dma_ptr = hwq->pbl[hwq->level].pg_map_arr;
+
+ return 0;
+
+fail:
+ bnxt_qplib_free_hwq(pdev, hwq);
+ return -ENOMEM;
+}
+
+/* Context Tables */
+void bnxt_qplib_free_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_ctx *ctx)
+{
+ int i;
+
+ bnxt_qplib_free_hwq(pdev, &ctx->qpc_tbl);
+ bnxt_qplib_free_hwq(pdev, &ctx->mrw_tbl);
+ bnxt_qplib_free_hwq(pdev, &ctx->srqc_tbl);
+ bnxt_qplib_free_hwq(pdev, &ctx->cq_tbl);
+ bnxt_qplib_free_hwq(pdev, &ctx->tim_tbl);
+ for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
+ bnxt_qplib_free_hwq(pdev, &ctx->tqm_tbl[i]);
+ bnxt_qplib_free_hwq(pdev, &ctx->tqm_pde);
+ bnxt_qplib_free_stats_ctx(pdev, &ctx->stats);
+}
+
+/*
+ * Routine: bnxt_qplib_alloc_ctx
+ * Description:
+ * Context tables are memories which are used by the chip fw.
+ * The 6 tables defined are:
+ * QPC ctx - holds QP states
+ * MRW ctx - holds memory region and window
+ * SRQ ctx - holds shared RQ states
+ * CQ ctx - holds completion queue states
+ * TQM ctx - holds Tx Queue Manager context
+ * TIM ctx - holds timer context
+ * Depending on the size of the tbl requested, either a 1 Page Buffer List
+ * or a 1-to-2-stage indirection Page Directory List + 1 PBL is used
+ * instead.
+ * Table might be employed as follows:
+ * For 0 < ctx size <= 1 PAGE, 0 level of ind is used
+ * For 1 PAGE < ctx size <= 512 entries size, 1 level of ind is used
+ * For 512 < ctx size <= MAX, 2 levels of ind is used
+ * Returns:
+ * 0 if success, else -ERRORS
+ */
+int bnxt_qplib_alloc_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_ctx *ctx,
+ bool virt_fn)
+{
+ int i, j, k, rc = 0;
+ int fnz_idx = -1;
+ __le64 **pbl_ptr;
+
+ if (virt_fn)
+ goto stats_alloc;
+
+ /* QPC Tables */
+ ctx->qpc_tbl.max_elements = ctx->qpc_count;
+ rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->qpc_tbl, NULL, 0,
+ &ctx->qpc_tbl.max_elements,
+ BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail;
+
+ /* MRW Tables */
+ ctx->mrw_tbl.max_elements = ctx->mrw_count;
+ rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->mrw_tbl, NULL, 0,
+ &ctx->mrw_tbl.max_elements,
+ BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail;
+
+ /* SRQ Tables */
+ ctx->srqc_tbl.max_elements = ctx->srqc_count;
+ rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->srqc_tbl, NULL, 0,
+ &ctx->srqc_tbl.max_elements,
+ BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail;
+
+ /* CQ Tables */
+ ctx->cq_tbl.max_elements = ctx->cq_count;
+ rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->cq_tbl, NULL, 0,
+ &ctx->cq_tbl.max_elements,
+ BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail;
+
+ /* TQM Buffer */
+ ctx->tqm_pde.max_elements = 512;
+ rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_pde, NULL, 0,
+ &ctx->tqm_pde.max_elements, sizeof(u64),
+ 0, PAGE_SIZE, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail;
+
+ for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) {
+ if (!ctx->tqm_count[i])
+ continue;
+ ctx->tqm_tbl[i].max_elements = ctx->qpc_count *
+ ctx->tqm_count[i];
+ rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_tbl[i], NULL, 0,
+ &ctx->tqm_tbl[i].max_elements, 1,
+ 0, PAGE_SIZE, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail;
+ }
+ pbl_ptr = (__le64 **)ctx->tqm_pde.pbl_ptr;
+ for (i = 0, j = 0; i < MAX_TQM_ALLOC_REQ;
+ i++, j += MAX_TQM_ALLOC_BLK_SIZE) {
+ if (!ctx->tqm_tbl[i].max_elements)
+ continue;
+ if (fnz_idx == -1)
+ fnz_idx = i;
+ switch (ctx->tqm_tbl[i].level) {
+ case PBL_LVL_2:
+ for (k = 0; k < ctx->tqm_tbl[i].pbl[PBL_LVL_1].pg_count;
+ k++)
+ pbl_ptr[PTR_PG(j + k)][PTR_IDX(j + k)] =
+ cpu_to_le64(
+ ctx->tqm_tbl[i].pbl[PBL_LVL_1].pg_map_arr[k]
+ | PTU_PTE_VALID);
+ break;
+ case PBL_LVL_1:
+ case PBL_LVL_0:
+ default:
+ pbl_ptr[PTR_PG(j)][PTR_IDX(j)] = cpu_to_le64(
+ ctx->tqm_tbl[i].pbl[PBL_LVL_0].pg_map_arr[0] |
+ PTU_PTE_VALID);
+ break;
+ }
+ }
+ if (fnz_idx == -1)
+ fnz_idx = 0;
+ ctx->tqm_pde_level = ctx->tqm_tbl[fnz_idx].level == PBL_LVL_2 ?
+ PBL_LVL_2 : ctx->tqm_tbl[fnz_idx].level + 1;
+
+ /* TIM Buffer */
+ ctx->tim_tbl.max_elements = ctx->qpc_count * 16;
+ rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tim_tbl, NULL, 0,
+ &ctx->tim_tbl.max_elements, 1,
+ 0, PAGE_SIZE, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail;
+
+stats_alloc:
+ /* Stats */
+ rc = bnxt_qplib_alloc_stats_ctx(pdev, &ctx->stats);
+ if (rc)
+ goto fail;
+
+ return 0;
+
+fail:
+ bnxt_qplib_free_ctx(pdev, ctx);
+ return rc;
+}
+
+/* GUID */
+void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid)
+{
+ u8 mac[ETH_ALEN];
+
+ /* MAC-48 to EUI-64 mapping */
+ memcpy(mac, dev_addr, ETH_ALEN);
+ guid[0] = mac[0] ^ 2;
+ guid[1] = mac[1];
+ guid[2] = mac[2];
+ guid[3] = 0xff;
+ guid[4] = 0xfe;
+ guid[5] = mac[3];
+ guid[6] = mac[4];
+ guid[7] = mac[5];
+}
+
+static void bnxt_qplib_free_sgid_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_sgid_tbl *sgid_tbl)
+{
+ kfree(sgid_tbl->tbl);
+ kfree(sgid_tbl->hw_id);
+ kfree(sgid_tbl->ctx);
+ sgid_tbl->tbl = NULL;
+ sgid_tbl->hw_id = NULL;
+ sgid_tbl->ctx = NULL;
+ sgid_tbl->max = 0;
+ sgid_tbl->active = 0;
+}
+
+static int bnxt_qplib_alloc_sgid_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ u16 max)
+{
+ sgid_tbl->tbl = kcalloc(max, sizeof(struct bnxt_qplib_gid), GFP_KERNEL);
+ if (!sgid_tbl->tbl)
+ return -ENOMEM;
+
+ sgid_tbl->hw_id = kcalloc(max, sizeof(u16), GFP_KERNEL);
+ if (!sgid_tbl->hw_id)
+ goto out_free1;
+
+ sgid_tbl->ctx = kcalloc(max, sizeof(void *), GFP_KERNEL);
+ if (!sgid_tbl->ctx)
+ goto out_free2;
+
+ sgid_tbl->max = max;
+ return 0;
+out_free2:
+ kfree(sgid_tbl->hw_id);
+ sgid_tbl->hw_id = NULL;
+out_free1:
+ kfree(sgid_tbl->tbl);
+ sgid_tbl->tbl = NULL;
+ return -ENOMEM;
+};
+
+static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_sgid_tbl *sgid_tbl)
+{
+ int i;
+
+ for (i = 0; i < sgid_tbl->max; i++) {
+ if (memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
+ sizeof(bnxt_qplib_gid_zero)))
+ bnxt_qplib_del_sgid(sgid_tbl, &sgid_tbl->tbl[i], true);
+ }
+ memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max);
+ memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
+ sgid_tbl->active = 0;
+}
+
+static void bnxt_qplib_init_sgid_tbl(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct net_device *netdev)
+{
+ memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max);
+ memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
+}
+
+static void bnxt_qplib_free_pkey_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl)
+{
+ if (!pkey_tbl->tbl)
+ dev_dbg(&res->pdev->dev, "QPLIB: PKEY tbl not present");
+ else
+ kfree(pkey_tbl->tbl);
+
+ pkey_tbl->tbl = NULL;
+ pkey_tbl->max = 0;
+ pkey_tbl->active = 0;
+}
+
+static int bnxt_qplib_alloc_pkey_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl,
+ u16 max)
+{
+ pkey_tbl->tbl = kcalloc(max, sizeof(u16), GFP_KERNEL);
+ if (!pkey_tbl->tbl)
+ return -ENOMEM;
+
+ pkey_tbl->max = max;
+ return 0;
+};
+
+/* PDs */
+int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pdt, struct bnxt_qplib_pd *pd)
+{
+ u32 bit_num;
+
+ bit_num = find_first_bit(pdt->tbl, pdt->max);
+ if (bit_num == pdt->max)
+ return -ENOMEM;
+
+ /* Found unused PD */
+ clear_bit(bit_num, pdt->tbl);
+ pd->id = bit_num;
+ return 0;
+}
+
+int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pd_tbl *pdt,
+ struct bnxt_qplib_pd *pd)
+{
+ if (test_and_set_bit(pd->id, pdt->tbl)) {
+ dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d",
+ pd->id);
+ return -EINVAL;
+ }
+ pd->id = 0;
+ return 0;
+}
+
+static void bnxt_qplib_free_pd_tbl(struct bnxt_qplib_pd_tbl *pdt)
+{
+ kfree(pdt->tbl);
+ pdt->tbl = NULL;
+ pdt->max = 0;
+}
+
+static int bnxt_qplib_alloc_pd_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pd_tbl *pdt,
+ u32 max)
+{
+ u32 bytes;
+
+ bytes = max >> 3;
+ if (!bytes)
+ bytes = 1;
+ pdt->tbl = kmalloc(bytes, GFP_KERNEL);
+ if (!pdt->tbl)
+ return -ENOMEM;
+
+ pdt->max = max;
+ memset((u8 *)pdt->tbl, 0xFF, bytes);
+
+ return 0;
+}
+
+/* DPIs */
+int bnxt_qplib_alloc_dpi(struct bnxt_qplib_dpi_tbl *dpit,
+ struct bnxt_qplib_dpi *dpi,
+ void *app)
+{
+ u32 bit_num;
+
+ bit_num = find_first_bit(dpit->tbl, dpit->max);
+ if (bit_num == dpit->max)
+ return -ENOMEM;
+
+ /* Found unused DPI */
+ clear_bit(bit_num, dpit->tbl);
+ dpit->app_tbl[bit_num] = app;
+
+ dpi->dpi = bit_num;
+ dpi->dbr = dpit->dbr_bar_reg_iomem + (bit_num * PAGE_SIZE);
+ dpi->umdbr = dpit->unmapped_dbr + (bit_num * PAGE_SIZE);
+
+ return 0;
+}
+
+int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi_tbl *dpit,
+ struct bnxt_qplib_dpi *dpi)
+{
+ if (dpi->dpi >= dpit->max) {
+ dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d", dpi->dpi);
+ return -EINVAL;
+ }
+ if (test_and_set_bit(dpi->dpi, dpit->tbl)) {
+ dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d",
+ dpi->dpi);
+ return -EINVAL;
+ }
+ if (dpit->app_tbl)
+ dpit->app_tbl[dpi->dpi] = NULL;
+ memset(dpi, 0, sizeof(*dpi));
+
+ return 0;
+}
+
+static void bnxt_qplib_free_dpi_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi_tbl *dpit)
+{
+ kfree(dpit->tbl);
+ kfree(dpit->app_tbl);
+ if (dpit->dbr_bar_reg_iomem)
+ pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
+ memset(dpit, 0, sizeof(*dpit));
+}
+
+static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi_tbl *dpit,
+ u32 dbr_offset)
+{
+ u32 dbr_bar_reg = RCFW_DBR_PCI_BAR_REGION;
+ resource_size_t bar_reg_base;
+ u32 dbr_len, bytes;
+
+ if (dpit->dbr_bar_reg_iomem) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: DBR BAR region %d already mapped", dbr_bar_reg);
+ return -EALREADY;
+ }
+
+ bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg);
+ if (!bar_reg_base) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: BAR region %d resc start failed", dbr_bar_reg);
+ return -ENOMEM;
+ }
+
+ dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset;
+ if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) {
+ dev_err(&res->pdev->dev, "QPLIB: Invalid DBR length %d",
+ dbr_len);
+ return -ENOMEM;
+ }
+
+ dpit->dbr_bar_reg_iomem = ioremap_nocache(bar_reg_base + dbr_offset,
+ dbr_len);
+ if (!dpit->dbr_bar_reg_iomem) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: FP: DBR BAR region %d mapping failed",
+ dbr_bar_reg);
+ return -ENOMEM;
+ }
+
+ dpit->unmapped_dbr = bar_reg_base + dbr_offset;
+ dpit->max = dbr_len / PAGE_SIZE;
+
+ dpit->app_tbl = kcalloc(dpit->max, sizeof(void *), GFP_KERNEL);
+ if (!dpit->app_tbl) {
+ pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
+ dev_err(&res->pdev->dev,
+ "QPLIB: DPI app tbl allocation failed");
+ return -ENOMEM;
+ }
+
+ bytes = dpit->max >> 3;
+ if (!bytes)
+ bytes = 1;
+
+ dpit->tbl = kmalloc(bytes, GFP_KERNEL);
+ if (!dpit->tbl) {
+ pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
+ kfree(dpit->app_tbl);
+ dpit->app_tbl = NULL;
+ dev_err(&res->pdev->dev,
+ "QPLIB: DPI tbl allocation failed for size = %d",
+ bytes);
+ return -ENOMEM;
+ }
+
+ memset((u8 *)dpit->tbl, 0xFF, bytes);
+
+ return 0;
+}
+
+/* PKEYs */
+static void bnxt_qplib_cleanup_pkey_tbl(struct bnxt_qplib_pkey_tbl *pkey_tbl)
+{
+ memset(pkey_tbl->tbl, 0, sizeof(u16) * pkey_tbl->max);
+ pkey_tbl->active = 0;
+}
+
+static void bnxt_qplib_init_pkey_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl)
+{
+ u16 pkey = 0xFFFF;
+
+ memset(pkey_tbl->tbl, 0, sizeof(u16) * pkey_tbl->max);
+
+ /* pkey default = 0xFFFF */
+ bnxt_qplib_add_pkey(res, pkey_tbl, &pkey, false);
+}
+
+/* Stats */
+static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_stats *stats)
+{
+ if (stats->dma) {
+ dma_free_coherent(&pdev->dev, stats->size,
+ stats->dma, stats->dma_map);
+ }
+ memset(stats, 0, sizeof(*stats));
+ stats->fw_id = -1;
+}
+
+static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_stats *stats)
+{
+ memset(stats, 0, sizeof(*stats));
+ stats->fw_id = -1;
+ stats->size = sizeof(struct ctx_hw_stats);
+ stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
+ &stats->dma_map, GFP_KERNEL);
+ if (!stats->dma) {
+ dev_err(&pdev->dev, "QPLIB: Stats DMA allocation failed");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res)
+{
+ bnxt_qplib_cleanup_pkey_tbl(&res->pkey_tbl);
+ bnxt_qplib_cleanup_sgid_tbl(res, &res->sgid_tbl);
+}
+
+int bnxt_qplib_init_res(struct bnxt_qplib_res *res)
+{
+ bnxt_qplib_init_sgid_tbl(&res->sgid_tbl, res->netdev);
+ bnxt_qplib_init_pkey_tbl(res, &res->pkey_tbl);
+
+ return 0;
+}
+
+void bnxt_qplib_free_res(struct bnxt_qplib_res *res)
+{
+ bnxt_qplib_free_pkey_tbl(res, &res->pkey_tbl);
+ bnxt_qplib_free_sgid_tbl(res, &res->sgid_tbl);
+ bnxt_qplib_free_pd_tbl(&res->pd_tbl);
+ bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl);
+
+ res->netdev = NULL;
+ res->pdev = NULL;
+}
+
+int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
+ struct net_device *netdev,
+ struct bnxt_qplib_dev_attr *dev_attr)
+{
+ int rc = 0;
+
+ res->pdev = pdev;
+ res->netdev = netdev;
+
+ rc = bnxt_qplib_alloc_sgid_tbl(res, &res->sgid_tbl, dev_attr->max_sgid);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_pkey_tbl(res, &res->pkey_tbl, dev_attr->max_pkey);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_pd_tbl(res, &res->pd_tbl, dev_attr->max_pd);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_dpi_tbl(res, &res->dpi_tbl, dev_attr->l2_db_size);
+ if (rc)
+ goto fail;
+
+ return 0;
+fail:
+ bnxt_qplib_free_res(res);
+ return rc;
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
new file mode 100644
index 000000000000..2e4855509719
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -0,0 +1,227 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: QPLib resource manager (header)
+ */
+
+#ifndef __BNXT_QPLIB_RES_H__
+#define __BNXT_QPLIB_RES_H__
+
+extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
+
+#define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *))
+#define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1)
+#define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG)
+#define PTR_IDX(x) ((x) & PTR_MAX_IDX_PER_PG)
+
+#define HWQ_CMP(idx, hwq) ((idx) & ((hwq)->max_elements - 1))
+
+#define HWQ_FREE_SLOTS(hwq) (hwq->max_elements - \
+ ((HWQ_CMP(hwq->prod, hwq)\
+ - HWQ_CMP(hwq->cons, hwq))\
+ & (hwq->max_elements - 1)))
+enum bnxt_qplib_hwq_type {
+ HWQ_TYPE_CTX,
+ HWQ_TYPE_QUEUE,
+ HWQ_TYPE_L2_CMPL
+};
+
+#define MAX_PBL_LVL_0_PGS 1
+#define MAX_PBL_LVL_1_PGS 512
+#define MAX_PBL_LVL_1_PGS_SHIFT 9
+#define MAX_PBL_LVL_1_PGS_FOR_LVL_2 256
+#define MAX_PBL_LVL_2_PGS (256 * 512)
+
+enum bnxt_qplib_pbl_lvl {
+ PBL_LVL_0,
+ PBL_LVL_1,
+ PBL_LVL_2,
+ PBL_LVL_MAX
+};
+
+#define ROCE_PG_SIZE_4K (4 * 1024)
+#define ROCE_PG_SIZE_8K (8 * 1024)
+#define ROCE_PG_SIZE_64K (64 * 1024)
+#define ROCE_PG_SIZE_2M (2 * 1024 * 1024)
+#define ROCE_PG_SIZE_8M (8 * 1024 * 1024)
+#define ROCE_PG_SIZE_1G (1024 * 1024 * 1024)
+
+struct bnxt_qplib_pbl {
+ u32 pg_count;
+ u32 pg_size;
+ void **pg_arr;
+ dma_addr_t *pg_map_arr;
+};
+
+struct bnxt_qplib_hwq {
+ struct pci_dev *pdev;
+ /* lock to protect qplib_hwq */
+ spinlock_t lock;
+ struct bnxt_qplib_pbl pbl[PBL_LVL_MAX];
+ enum bnxt_qplib_pbl_lvl level; /* 0, 1, or 2 */
+ /* ptr for easy access to the PBL entries */
+ void **pbl_ptr;
+ /* ptr for easy access to the dma_addr */
+ dma_addr_t *pbl_dma_ptr;
+ u32 max_elements;
+ u16 element_size; /* Size of each entry */
+
+ u32 prod; /* raw */
+ u32 cons; /* raw */
+ u8 cp_bit;
+ u8 is_user;
+};
+
+/* Tables */
+struct bnxt_qplib_pd_tbl {
+ unsigned long *tbl;
+ u32 max;
+};
+
+struct bnxt_qplib_sgid_tbl {
+ struct bnxt_qplib_gid *tbl;
+ u16 *hw_id;
+ u16 max;
+ u16 active;
+ void *ctx;
+};
+
+struct bnxt_qplib_pkey_tbl {
+ u16 *tbl;
+ u16 max;
+ u16 active;
+};
+
+struct bnxt_qplib_dpi {
+ u32 dpi;
+ void __iomem *dbr;
+ u64 umdbr;
+};
+
+struct bnxt_qplib_dpi_tbl {
+ void **app_tbl;
+ unsigned long *tbl;
+ u16 max;
+ void __iomem *dbr_bar_reg_iomem;
+ u64 unmapped_dbr;
+};
+
+struct bnxt_qplib_stats {
+ dma_addr_t dma_map;
+ void *dma;
+ u32 size;
+ u32 fw_id;
+};
+
+struct bnxt_qplib_vf_res {
+ u32 max_qp_per_vf;
+ u32 max_mrw_per_vf;
+ u32 max_srq_per_vf;
+ u32 max_cq_per_vf;
+ u32 max_gid_per_vf;
+};
+
+#define BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE 448
+#define BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE 64
+#define BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE 64
+#define BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE 128
+
+struct bnxt_qplib_ctx {
+ u32 qpc_count;
+ struct bnxt_qplib_hwq qpc_tbl;
+ u32 mrw_count;
+ struct bnxt_qplib_hwq mrw_tbl;
+ u32 srqc_count;
+ struct bnxt_qplib_hwq srqc_tbl;
+ u32 cq_count;
+ struct bnxt_qplib_hwq cq_tbl;
+ struct bnxt_qplib_hwq tim_tbl;
+#define MAX_TQM_ALLOC_REQ 32
+#define MAX_TQM_ALLOC_BLK_SIZE 8
+ u8 tqm_count[MAX_TQM_ALLOC_REQ];
+ struct bnxt_qplib_hwq tqm_pde;
+ u32 tqm_pde_level;
+ struct bnxt_qplib_hwq tqm_tbl[MAX_TQM_ALLOC_REQ];
+ struct bnxt_qplib_stats stats;
+ struct bnxt_qplib_vf_res vf_res;
+};
+
+struct bnxt_qplib_res {
+ struct pci_dev *pdev;
+ struct net_device *netdev;
+
+ struct bnxt_qplib_rcfw *rcfw;
+
+ struct bnxt_qplib_pd_tbl pd_tbl;
+ struct bnxt_qplib_sgid_tbl sgid_tbl;
+ struct bnxt_qplib_pkey_tbl pkey_tbl;
+ struct bnxt_qplib_dpi_tbl dpi_tbl;
+};
+
+#define to_bnxt_qplib(ptr, type, member) \
+ container_of(ptr, type, member)
+
+struct bnxt_qplib_pd;
+struct bnxt_qplib_dev_attr;
+
+void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq);
+int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
+ struct scatterlist *sl, int nmap, u32 *elements,
+ u32 elements_per_page, u32 aux, u32 pg_size,
+ enum bnxt_qplib_hwq_type hwq_type);
+void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid);
+int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl,
+ struct bnxt_qplib_pd *pd);
+int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pd_tbl *pd_tbl,
+ struct bnxt_qplib_pd *pd);
+int bnxt_qplib_alloc_dpi(struct bnxt_qplib_dpi_tbl *dpit,
+ struct bnxt_qplib_dpi *dpi,
+ void *app);
+int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi_tbl *dpi_tbl,
+ struct bnxt_qplib_dpi *dpi);
+void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res);
+int bnxt_qplib_init_res(struct bnxt_qplib_res *res);
+void bnxt_qplib_free_res(struct bnxt_qplib_res *res);
+int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
+ struct net_device *netdev,
+ struct bnxt_qplib_dev_attr *dev_attr);
+void bnxt_qplib_free_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_ctx *ctx);
+int bnxt_qplib_alloc_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_ctx *ctx,
+ bool virt_fn);
+#endif /* __BNXT_QPLIB_RES_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
new file mode 100644
index 000000000000..ef91ab786dd4
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -0,0 +1,687 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Slow Path Operators
+ */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+
+#include "roce_hsi.h"
+
+#include "qplib_res.h"
+#include "qplib_rcfw.h"
+#include "qplib_sp.h"
+
+const struct bnxt_qplib_gid bnxt_qplib_gid_zero = {{ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 } };
+
+/* Device */
+
+static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw)
+{
+ int rc;
+ u16 pcie_ctl2;
+
+ rc = pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2,
+ &pcie_ctl2);
+ if (rc)
+ return false;
+ return !!(pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ);
+}
+
+int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_dev_attr *attr)
+{
+ struct cmdq_query_func req;
+ struct creq_query_func_resp resp;
+ struct bnxt_qplib_rcfw_sbuf *sbuf;
+ struct creq_query_func_resp_sb *sb;
+ u16 cmd_flags = 0;
+ u32 temp;
+ u8 *tqm_alloc;
+ int i, rc = 0;
+
+ RCFW_CMD_PREP(req, QUERY_FUNC, cmd_flags);
+
+ sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+ if (!sbuf) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: SP: QUERY_FUNC alloc side buffer failed");
+ return -ENOMEM;
+ }
+
+ sb = sbuf->sb;
+ req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ (void *)sbuf, 0);
+ if (rc)
+ goto bail;
+
+ /* Extract the context from the side buffer */
+ attr->max_qp = le32_to_cpu(sb->max_qp);
+ /* max_qp value reported by FW for PF doesn't include the QP1 for PF */
+ attr->max_qp += 1;
+ attr->max_qp_rd_atom =
+ sb->max_qp_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
+ BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom;
+ attr->max_qp_init_rd_atom =
+ sb->max_qp_init_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
+ BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom;
+ attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr);
+ /*
+ * 128 WQEs needs to be reserved for the HW (8916). Prevent
+ * reporting the max number
+ */
+ attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS;
+ attr->max_qp_sges = sb->max_sge;
+ attr->max_cq = le32_to_cpu(sb->max_cq);
+ attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
+ attr->max_cq_sges = attr->max_qp_sges;
+ attr->max_mr = le32_to_cpu(sb->max_mr);
+ attr->max_mw = le32_to_cpu(sb->max_mw);
+
+ attr->max_mr_size = le64_to_cpu(sb->max_mr_size);
+ attr->max_pd = 64 * 1024;
+ attr->max_raw_ethy_qp = le32_to_cpu(sb->max_raw_eth_qp);
+ attr->max_ah = le32_to_cpu(sb->max_ah);
+
+ attr->max_fmr = le32_to_cpu(sb->max_fmr);
+ attr->max_map_per_fmr = sb->max_map_per_fmr;
+
+ attr->max_srq = le16_to_cpu(sb->max_srq);
+ attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
+ attr->max_srq_sges = sb->max_srq_sge;
+ /* Bono only reports 1 PKEY for now, but it can support > 1 */
+ attr->max_pkey = le32_to_cpu(sb->max_pkeys);
+
+ attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
+ attr->l2_db_size = (sb->l2_db_space_size + 1) * PAGE_SIZE;
+ attr->max_sgid = le32_to_cpu(sb->max_gid);
+
+ strlcpy(attr->fw_ver, "20.6.28.0", sizeof(attr->fw_ver));
+
+ for (i = 0; i < MAX_TQM_ALLOC_REQ / 4; i++) {
+ temp = le32_to_cpu(sb->tqm_alloc_reqs[i]);
+ tqm_alloc = (u8 *)&temp;
+ attr->tqm_alloc_reqs[i * 4] = *tqm_alloc;
+ attr->tqm_alloc_reqs[i * 4 + 1] = *(++tqm_alloc);
+ attr->tqm_alloc_reqs[i * 4 + 2] = *(++tqm_alloc);
+ attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
+ }
+
+ attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw);
+bail:
+ bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+ return rc;
+}
+
+/* SGID */
+int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
+ struct bnxt_qplib_gid *gid)
+{
+ if (index > sgid_tbl->max) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: Index %d exceeded SGID table max (%d)",
+ index, sgid_tbl->max);
+ return -EINVAL;
+ }
+ memcpy(gid, &sgid_tbl->tbl[index], sizeof(*gid));
+ return 0;
+}
+
+int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct bnxt_qplib_gid *gid, bool update)
+{
+ struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
+ struct bnxt_qplib_res,
+ sgid_tbl);
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ int index;
+
+ if (!sgid_tbl) {
+ dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
+ return -EINVAL;
+ }
+ /* Do we need a sgid_lock here? */
+ if (!sgid_tbl->active) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: SGID table has no active entries");
+ return -ENOMEM;
+ }
+ for (index = 0; index < sgid_tbl->max; index++) {
+ if (!memcmp(&sgid_tbl->tbl[index], gid, sizeof(*gid)))
+ break;
+ }
+ if (index == sgid_tbl->max) {
+ dev_warn(&res->pdev->dev, "GID not found in the SGID table");
+ return 0;
+ }
+ /* Remove GID from the SGID table */
+ if (update) {
+ struct cmdq_delete_gid req;
+ struct creq_delete_gid_resp resp;
+ u16 cmd_flags = 0;
+ int rc;
+
+ RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
+ if (sgid_tbl->hw_id[index] == 0xFFFF) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: GID entry contains an invalid HW id");
+ return -EINVAL;
+ }
+ req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ return rc;
+ }
+ memcpy(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
+ sizeof(bnxt_qplib_gid_zero));
+ sgid_tbl->active--;
+ dev_dbg(&res->pdev->dev,
+ "QPLIB: SGID deleted hw_id[0x%x] = 0x%x active = 0x%x",
+ index, sgid_tbl->hw_id[index], sgid_tbl->active);
+ sgid_tbl->hw_id[index] = (u16)-1;
+
+ /* unlock */
+ return 0;
+}
+
+int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct bnxt_qplib_gid *gid, u8 *smac, u16 vlan_id,
+ bool update, u32 *index)
+{
+ struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
+ struct bnxt_qplib_res,
+ sgid_tbl);
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ int i, free_idx;
+
+ if (!sgid_tbl) {
+ dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
+ return -EINVAL;
+ }
+ /* Do we need a sgid_lock here? */
+ if (sgid_tbl->active == sgid_tbl->max) {
+ dev_err(&res->pdev->dev, "QPLIB: SGID table is full");
+ return -ENOMEM;
+ }
+ free_idx = sgid_tbl->max;
+ for (i = 0; i < sgid_tbl->max; i++) {
+ if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) {
+ dev_dbg(&res->pdev->dev,
+ "QPLIB: SGID entry already exist in entry %d!",
+ i);
+ *index = i;
+ return -EALREADY;
+ } else if (!memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
+ sizeof(bnxt_qplib_gid_zero)) &&
+ free_idx == sgid_tbl->max) {
+ free_idx = i;
+ }
+ }
+ if (free_idx == sgid_tbl->max) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: SGID table is FULL but count is not MAX??");
+ return -ENOMEM;
+ }
+ if (update) {
+ struct cmdq_add_gid req;
+ struct creq_add_gid_resp resp;
+ u16 cmd_flags = 0;
+ u32 temp32[4];
+ u16 temp16[3];
+ int rc;
+
+ RCFW_CMD_PREP(req, ADD_GID, cmd_flags);
+
+ memcpy(temp32, gid->data, sizeof(struct bnxt_qplib_gid));
+ req.gid[0] = cpu_to_be32(temp32[3]);
+ req.gid[1] = cpu_to_be32(temp32[2]);
+ req.gid[2] = cpu_to_be32(temp32[1]);
+ req.gid[3] = cpu_to_be32(temp32[0]);
+ if (vlan_id != 0xFFFF)
+ req.vlan = cpu_to_le16((vlan_id &
+ CMDQ_ADD_GID_VLAN_VLAN_ID_MASK) |
+ CMDQ_ADD_GID_VLAN_TPID_TPID_8100 |
+ CMDQ_ADD_GID_VLAN_VLAN_EN);
+
+ /* MAC in network format */
+ memcpy(temp16, smac, 6);
+ req.src_mac[0] = cpu_to_be16(temp16[0]);
+ req.src_mac[1] = cpu_to_be16(temp16[1]);
+ req.src_mac[2] = cpu_to_be16(temp16[2]);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ return rc;
+ sgid_tbl->hw_id[free_idx] = le32_to_cpu(resp.xid);
+ }
+ /* Add GID to the sgid_tbl */
+ memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid));
+ sgid_tbl->active++;
+ dev_dbg(&res->pdev->dev,
+ "QPLIB: SGID added hw_id[0x%x] = 0x%x active = 0x%x",
+ free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active);
+
+ *index = free_idx;
+ /* unlock */
+ return 0;
+}
+
+/* pkeys */
+int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index,
+ u16 *pkey)
+{
+ if (index == 0xFFFF) {
+ *pkey = 0xFFFF;
+ return 0;
+ }
+ if (index > pkey_tbl->max) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: Index %d exceeded PKEY table max (%d)",
+ index, pkey_tbl->max);
+ return -EINVAL;
+ }
+ memcpy(pkey, &pkey_tbl->tbl[index], sizeof(*pkey));
+ return 0;
+}
+
+int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
+ bool update)
+{
+ int i, rc = 0;
+
+ if (!pkey_tbl) {
+ dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
+ return -EINVAL;
+ }
+
+ /* Do we need a pkey_lock here? */
+ if (!pkey_tbl->active) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: PKEY table has no active entries");
+ return -ENOMEM;
+ }
+ for (i = 0; i < pkey_tbl->max; i++) {
+ if (!memcmp(&pkey_tbl->tbl[i], pkey, sizeof(*pkey)))
+ break;
+ }
+ if (i == pkey_tbl->max) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: PKEY 0x%04x not found in the pkey table",
+ *pkey);
+ return -ENOMEM;
+ }
+ memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey));
+ pkey_tbl->active--;
+
+ /* unlock */
+ return rc;
+}
+
+int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
+ bool update)
+{
+ int i, free_idx, rc = 0;
+
+ if (!pkey_tbl) {
+ dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
+ return -EINVAL;
+ }
+
+ /* Do we need a pkey_lock here? */
+ if (pkey_tbl->active == pkey_tbl->max) {
+ dev_err(&res->pdev->dev, "QPLIB: PKEY table is full");
+ return -ENOMEM;
+ }
+ free_idx = pkey_tbl->max;
+ for (i = 0; i < pkey_tbl->max; i++) {
+ if (!memcmp(&pkey_tbl->tbl[i], pkey, sizeof(*pkey)))
+ return -EALREADY;
+ else if (!pkey_tbl->tbl[i] && free_idx == pkey_tbl->max)
+ free_idx = i;
+ }
+ if (free_idx == pkey_tbl->max) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: PKEY table is FULL but count is not MAX??");
+ return -ENOMEM;
+ }
+ /* Add PKEY to the pkey_tbl */
+ memcpy(&pkey_tbl->tbl[free_idx], pkey, sizeof(*pkey));
+ pkey_tbl->active++;
+
+ /* unlock */
+ return rc;
+}
+
+/* AH */
+int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_create_ah req;
+ struct creq_create_ah_resp resp;
+ u16 cmd_flags = 0;
+ u32 temp32[4];
+ u16 temp16[3];
+ int rc;
+
+ RCFW_CMD_PREP(req, CREATE_AH, cmd_flags);
+
+ memcpy(temp32, ah->dgid.data, sizeof(struct bnxt_qplib_gid));
+ req.dgid[0] = cpu_to_le32(temp32[0]);
+ req.dgid[1] = cpu_to_le32(temp32[1]);
+ req.dgid[2] = cpu_to_le32(temp32[2]);
+ req.dgid[3] = cpu_to_le32(temp32[3]);
+
+ req.type = ah->nw_type;
+ req.hop_limit = ah->hop_limit;
+ req.sgid_index = cpu_to_le16(res->sgid_tbl.hw_id[ah->sgid_index]);
+ req.dest_vlan_id_flow_label = cpu_to_le32((ah->flow_label &
+ CMDQ_CREATE_AH_FLOW_LABEL_MASK) |
+ CMDQ_CREATE_AH_DEST_VLAN_ID_MASK);
+ req.pd_id = cpu_to_le32(ah->pd->id);
+ req.traffic_class = ah->traffic_class;
+
+ /* MAC in network format */
+ memcpy(temp16, ah->dmac, 6);
+ req.dest_mac[0] = cpu_to_le16(temp16[0]);
+ req.dest_mac[1] = cpu_to_le16(temp16[1]);
+ req.dest_mac[2] = cpu_to_le16(temp16[2]);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ NULL, 1);
+ if (rc)
+ return rc;
+
+ ah->id = le32_to_cpu(resp.xid);
+ return 0;
+}
+
+int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_destroy_ah req;
+ struct creq_destroy_ah_resp resp;
+ u16 cmd_flags = 0;
+ int rc;
+
+ /* Clean up the AH table in the device */
+ RCFW_CMD_PREP(req, DESTROY_AH, cmd_flags);
+
+ req.ah_cid = cpu_to_le32(ah->id);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ NULL, 1);
+ if (rc)
+ return rc;
+ return 0;
+}
+
+/* MRW */
+int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_deallocate_key req;
+ struct creq_deallocate_key_resp resp;
+ u16 cmd_flags = 0;
+ int rc;
+
+ if (mrw->lkey == 0xFFFFFFFF) {
+ dev_info(&res->pdev->dev,
+ "QPLIB: SP: Free a reserved lkey MRW");
+ return 0;
+ }
+
+ RCFW_CMD_PREP(req, DEALLOCATE_KEY, cmd_flags);
+
+ req.mrw_flags = mrw->type;
+
+ if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1) ||
+ (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A) ||
+ (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B))
+ req.key = cpu_to_le32(mrw->rkey);
+ else
+ req.key = cpu_to_le32(mrw->lkey);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ NULL, 0);
+ if (rc)
+ return rc;
+
+ /* Free the qplib's MRW memory */
+ if (mrw->hwq.max_elements)
+ bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
+
+ return 0;
+}
+
+int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_allocate_mrw req;
+ struct creq_allocate_mrw_resp resp;
+ u16 cmd_flags = 0;
+ unsigned long tmp;
+ int rc;
+
+ RCFW_CMD_PREP(req, ALLOCATE_MRW, cmd_flags);
+
+ req.pd_id = cpu_to_le32(mrw->pd->id);
+ req.mrw_flags = mrw->type;
+ if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR &&
+ mrw->flags & BNXT_QPLIB_FR_PMR) ||
+ mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A ||
+ mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B)
+ req.access = CMDQ_ALLOCATE_MRW_ACCESS_CONSUMER_OWNED_KEY;
+ tmp = (unsigned long)mrw;
+ req.mrw_handle = cpu_to_le64(tmp);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ return rc;
+
+ if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1) ||
+ (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A) ||
+ (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B))
+ mrw->rkey = le32_to_cpu(resp.xid);
+ else
+ mrw->lkey = le32_to_cpu(resp.xid);
+ return 0;
+}
+
+int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
+ bool block)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_deregister_mr req;
+ struct creq_deregister_mr_resp resp;
+ u16 cmd_flags = 0;
+ int rc;
+
+ RCFW_CMD_PREP(req, DEREGISTER_MR, cmd_flags);
+
+ req.lkey = cpu_to_le32(mrw->lkey);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, block);
+ if (rc)
+ return rc;
+
+ /* Free the qplib's MR memory */
+ if (mrw->hwq.max_elements) {
+ mrw->va = 0;
+ mrw->total_size = 0;
+ bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
+ }
+
+ return 0;
+}
+
+int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
+ u64 *pbl_tbl, int num_pbls, bool block)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_register_mr req;
+ struct creq_register_mr_resp resp;
+ u16 cmd_flags = 0, level;
+ int pg_ptrs, pages, i, rc;
+ dma_addr_t **pbl_ptr;
+ u32 pg_size;
+
+ if (num_pbls) {
+ pg_ptrs = roundup_pow_of_two(num_pbls);
+ pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT;
+ if (!pages)
+ pages++;
+
+ if (pages > MAX_PBL_LVL_1_PGS) {
+ dev_err(&res->pdev->dev, "QPLIB: SP: Reg MR pages ");
+ dev_err(&res->pdev->dev,
+ "requested (0x%x) exceeded max (0x%x)",
+ pages, MAX_PBL_LVL_1_PGS);
+ return -ENOMEM;
+ }
+ /* Free the hwq if it already exist, must be a rereg */
+ if (mr->hwq.max_elements)
+ bnxt_qplib_free_hwq(res->pdev, &mr->hwq);
+
+ mr->hwq.max_elements = pages;
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, &mr->hwq, NULL, 0,
+ &mr->hwq.max_elements,
+ PAGE_SIZE, 0, PAGE_SIZE,
+ HWQ_TYPE_CTX);
+ if (rc) {
+ dev_err(&res->pdev->dev,
+ "SP: Reg MR memory allocation failed");
+ return -ENOMEM;
+ }
+ /* Write to the hwq */
+ pbl_ptr = (dma_addr_t **)mr->hwq.pbl_ptr;
+ for (i = 0; i < num_pbls; i++)
+ pbl_ptr[PTR_PG(i)][PTR_IDX(i)] =
+ (pbl_tbl[i] & PAGE_MASK) | PTU_PTE_VALID;
+ }
+
+ RCFW_CMD_PREP(req, REGISTER_MR, cmd_flags);
+
+ /* Configure the request */
+ if (mr->hwq.level == PBL_LVL_MAX) {
+ level = 0;
+ req.pbl = 0;
+ pg_size = PAGE_SIZE;
+ } else {
+ level = mr->hwq.level + 1;
+ req.pbl = cpu_to_le64(mr->hwq.pbl[PBL_LVL_0].pg_map_arr[0]);
+ pg_size = mr->hwq.pbl[PBL_LVL_0].pg_size;
+ }
+ req.log2_pg_size_lvl = (level << CMDQ_REGISTER_MR_LVL_SFT) |
+ ((ilog2(pg_size) <<
+ CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT) &
+ CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK);
+ req.access = (mr->flags & 0xFFFF);
+ req.va = cpu_to_le64(mr->va);
+ req.key = cpu_to_le32(mr->lkey);
+ req.mr_size = cpu_to_le64(mr->total_size);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, block);
+ if (rc)
+ goto fail;
+
+ return 0;
+
+fail:
+ if (mr->hwq.max_elements)
+ bnxt_qplib_free_hwq(res->pdev, &mr->hwq);
+ return rc;
+}
+
+int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_frpl *frpl,
+ int max_pg_ptrs)
+{
+ int pg_ptrs, pages, rc;
+
+ /* Re-calculate the max to fit the HWQ allocation model */
+ pg_ptrs = roundup_pow_of_two(max_pg_ptrs);
+ pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT;
+ if (!pages)
+ pages++;
+
+ if (pages > MAX_PBL_LVL_1_PGS)
+ return -ENOMEM;
+
+ frpl->hwq.max_elements = pages;
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, &frpl->hwq, NULL, 0,
+ &frpl->hwq.max_elements, PAGE_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_CTX);
+ if (!rc)
+ frpl->max_pg_ptrs = pg_ptrs;
+
+ return rc;
+}
+
+int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_frpl *frpl)
+{
+ bnxt_qplib_free_hwq(res->pdev, &frpl->hwq);
+ return 0;
+}
+
+int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_map_tc_to_cos req;
+ struct creq_map_tc_to_cos_resp resp;
+ u16 cmd_flags = 0;
+ int rc = 0;
+
+ RCFW_CMD_PREP(req, MAP_TC_TO_COS, cmd_flags);
+ req.cos0 = cpu_to_le16(cids[0]);
+ req.cos1 = cpu_to_le16(cids[1]);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ return 0;
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
new file mode 100644
index 000000000000..2ce7e2a32cf0
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -0,0 +1,165 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Slow Path Operators (header)
+ *
+ */
+
+#ifndef __BNXT_QPLIB_SP_H__
+#define __BNXT_QPLIB_SP_H__
+
+#define BNXT_QPLIB_RESERVED_QP_WRS 128
+
+#define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040
+
+struct bnxt_qplib_dev_attr {
+ char fw_ver[32];
+ u16 max_sgid;
+ u16 max_mrw;
+ u32 max_qp;
+#define BNXT_QPLIB_MAX_OUT_RD_ATOM 126
+ u32 max_qp_rd_atom;
+ u32 max_qp_init_rd_atom;
+ u32 max_qp_wqes;
+ u32 max_qp_sges;
+ u32 max_cq;
+ u32 max_cq_wqes;
+ u32 max_cq_sges;
+ u32 max_mr;
+ u64 max_mr_size;
+ u32 max_pd;
+ u32 max_mw;
+ u32 max_raw_ethy_qp;
+ u32 max_ah;
+ u32 max_fmr;
+ u32 max_map_per_fmr;
+ u32 max_srq;
+ u32 max_srq_wqes;
+ u32 max_srq_sges;
+ u32 max_pkey;
+ u32 max_inline_data;
+ u32 l2_db_size;
+ u8 tqm_alloc_reqs[MAX_TQM_ALLOC_REQ];
+ bool is_atomic;
+};
+
+struct bnxt_qplib_pd {
+ u32 id;
+};
+
+struct bnxt_qplib_gid {
+ u8 data[16];
+};
+
+struct bnxt_qplib_ah {
+ struct bnxt_qplib_gid dgid;
+ struct bnxt_qplib_pd *pd;
+ u32 id;
+ u8 sgid_index;
+ /* For Query AH if the hw table and SW table are differnt */
+ u8 host_sgid_index;
+ u8 traffic_class;
+ u32 flow_label;
+ u8 hop_limit;
+ u8 sl;
+ u8 dmac[6];
+ u16 vlan_id;
+ u8 nw_type;
+};
+
+struct bnxt_qplib_mrw {
+ struct bnxt_qplib_pd *pd;
+ int type;
+ u32 flags;
+#define BNXT_QPLIB_FR_PMR 0x80000000
+ u32 lkey;
+ u32 rkey;
+#define BNXT_QPLIB_RSVD_LKEY 0xFFFFFFFF
+ u64 va;
+ u64 total_size;
+ u32 npages;
+ u64 mr_handle;
+ struct bnxt_qplib_hwq hwq;
+};
+
+struct bnxt_qplib_frpl {
+ int max_pg_ptrs;
+ struct bnxt_qplib_hwq hwq;
+};
+
+#define BNXT_QPLIB_ACCESS_LOCAL_WRITE BIT(0)
+#define BNXT_QPLIB_ACCESS_REMOTE_READ BIT(1)
+#define BNXT_QPLIB_ACCESS_REMOTE_WRITE BIT(2)
+#define BNXT_QPLIB_ACCESS_REMOTE_ATOMIC BIT(3)
+#define BNXT_QPLIB_ACCESS_MW_BIND BIT(4)
+#define BNXT_QPLIB_ACCESS_ZERO_BASED BIT(5)
+#define BNXT_QPLIB_ACCESS_ON_DEMAND BIT(6)
+
+int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
+ struct bnxt_qplib_gid *gid);
+int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct bnxt_qplib_gid *gid, bool update);
+int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct bnxt_qplib_gid *gid, u8 *mac, u16 vlan_id,
+ bool update, u32 *index);
+int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index,
+ u16 *pkey);
+int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
+ bool update);
+int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
+ bool update);
+int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_dev_attr *attr);
+int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
+int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
+int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_mrw *mrw);
+int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
+ bool block);
+int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
+ u64 *pbl_tbl, int num_pbls, bool block);
+int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr);
+int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_mrw *mr, int max);
+int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_frpl *frpl, int max);
+int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_frpl *frpl);
+int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids);
+#endif /* __BNXT_QPLIB_SP_H__*/
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
new file mode 100644
index 000000000000..fc23477ac52f
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -0,0 +1,2821 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: RoCE HSI File - Autogenerated
+ */
+
+#ifndef __BNXT_RE_HSI_H__
+#define __BNXT_RE_HSI_H__
+
+/* include bnxt_hsi.h from bnxt_en driver */
+#include "bnxt_hsi.h"
+
+/* CMP Door Bell Format (4 bytes) */
+struct cmpl_doorbell {
+ __le32 key_mask_valid_idx;
+ #define CMPL_DOORBELL_IDX_MASK 0xffffffUL
+ #define CMPL_DOORBELL_IDX_SFT 0
+ #define CMPL_DOORBELL_RESERVED_MASK 0x3000000UL
+ #define CMPL_DOORBELL_RESERVED_SFT 24
+ #define CMPL_DOORBELL_IDX_VALID 0x4000000UL
+ #define CMPL_DOORBELL_MASK 0x8000000UL
+ #define CMPL_DOORBELL_KEY_MASK 0xf0000000UL
+ #define CMPL_DOORBELL_KEY_SFT 28
+ #define CMPL_DOORBELL_KEY_CMPL (0x2UL << 28)
+};
+
+/* Status Door Bell Format (4 bytes) */
+struct status_doorbell {
+ __le32 key_idx;
+ #define STATUS_DOORBELL_IDX_MASK 0xffffffUL
+ #define STATUS_DOORBELL_IDX_SFT 0
+ #define STATUS_DOORBELL_RESERVED_MASK 0xf000000UL
+ #define STATUS_DOORBELL_RESERVED_SFT 24
+ #define STATUS_DOORBELL_KEY_MASK 0xf0000000UL
+ #define STATUS_DOORBELL_KEY_SFT 28
+ #define STATUS_DOORBELL_KEY_STAT (0x3UL << 28)
+};
+
+/* RoCE Host Structures */
+
+/* Doorbell Structures */
+/* 64b Doorbell Format (8 bytes) */
+struct dbr_dbr {
+ __le32 index;
+ #define DBR_DBR_INDEX_MASK 0xfffffUL
+ #define DBR_DBR_INDEX_SFT 0
+ #define DBR_DBR_RESERVED12_MASK 0xfff00000UL
+ #define DBR_DBR_RESERVED12_SFT 20
+ __le32 type_xid;
+ #define DBR_DBR_XID_MASK 0xfffffUL
+ #define DBR_DBR_XID_SFT 0
+ #define DBR_DBR_RESERVED8_MASK 0xff00000UL
+ #define DBR_DBR_RESERVED8_SFT 20
+ #define DBR_DBR_TYPE_MASK 0xf0000000UL
+ #define DBR_DBR_TYPE_SFT 28
+ #define DBR_DBR_TYPE_SQ (0x0UL << 28)
+ #define DBR_DBR_TYPE_RQ (0x1UL << 28)
+ #define DBR_DBR_TYPE_SRQ (0x2UL << 28)
+ #define DBR_DBR_TYPE_SRQ_ARM (0x3UL << 28)
+ #define DBR_DBR_TYPE_CQ (0x4UL << 28)
+ #define DBR_DBR_TYPE_CQ_ARMSE (0x5UL << 28)
+ #define DBR_DBR_TYPE_CQ_ARMALL (0x6UL << 28)
+ #define DBR_DBR_TYPE_CQ_ARMENA (0x7UL << 28)
+ #define DBR_DBR_TYPE_SRQ_ARMENA (0x8UL << 28)
+ #define DBR_DBR_TYPE_CQ_CUTOFF_ACK (0x9UL << 28)
+ #define DBR_DBR_TYPE_NULL (0xfUL << 28)
+};
+
+/* 32b Doorbell Format (4 bytes) */
+struct dbr_dbr32 {
+ __le32 type_abs_incr_xid;
+ #define DBR_DBR32_XID_MASK 0xfffffUL
+ #define DBR_DBR32_XID_SFT 0
+ #define DBR_DBR32_RESERVED4_MASK 0xf00000UL
+ #define DBR_DBR32_RESERVED4_SFT 20
+ #define DBR_DBR32_INCR_MASK 0xf000000UL
+ #define DBR_DBR32_INCR_SFT 24
+ #define DBR_DBR32_ABS 0x10000000UL
+ #define DBR_DBR32_TYPE_MASK 0xe0000000UL
+ #define DBR_DBR32_TYPE_SFT 29
+ #define DBR_DBR32_TYPE_SQ (0x0UL << 29)
+};
+
+/* SQ WQE Structures */
+/* Base SQ WQE (8 bytes) */
+struct sq_base {
+ u8 wqe_type;
+ #define SQ_BASE_WQE_TYPE_SEND 0x0UL
+ #define SQ_BASE_WQE_TYPE_SEND_W_IMMEAD 0x1UL
+ #define SQ_BASE_WQE_TYPE_SEND_W_INVALID 0x2UL
+ #define SQ_BASE_WQE_TYPE_WRITE_WQE 0x4UL
+ #define SQ_BASE_WQE_TYPE_WRITE_W_IMMEAD 0x5UL
+ #define SQ_BASE_WQE_TYPE_READ_WQE 0x6UL
+ #define SQ_BASE_WQE_TYPE_ATOMIC_CS 0x8UL
+ #define SQ_BASE_WQE_TYPE_ATOMIC_FA 0xbUL
+ #define SQ_BASE_WQE_TYPE_LOCAL_INVALID 0xcUL
+ #define SQ_BASE_WQE_TYPE_FR_PMR 0xdUL
+ #define SQ_BASE_WQE_TYPE_BIND 0xeUL
+ u8 unused_0[7];
+};
+
+/* WQE SGE (16 bytes) */
+struct sq_sge {
+ __le64 va_or_pa;
+ __le32 l_key;
+ __le32 size;
+};
+
+/* PSN Search Structure (8 bytes) */
+struct sq_psn_search {
+ __le32 opcode_start_psn;
+ #define SQ_PSN_SEARCH_START_PSN_MASK 0xffffffUL
+ #define SQ_PSN_SEARCH_START_PSN_SFT 0
+ #define SQ_PSN_SEARCH_OPCODE_MASK 0xff000000UL
+ #define SQ_PSN_SEARCH_OPCODE_SFT 24
+ __le32 flags_next_psn;
+ #define SQ_PSN_SEARCH_NEXT_PSN_MASK 0xffffffUL
+ #define SQ_PSN_SEARCH_NEXT_PSN_SFT 0
+ #define SQ_PSN_SEARCH_FLAGS_MASK 0xff000000UL
+ #define SQ_PSN_SEARCH_FLAGS_SFT 24
+};
+
+/* Send SQ WQE (40 bytes) */
+struct sq_send {
+ u8 wqe_type;
+ #define SQ_SEND_WQE_TYPE_SEND 0x0UL
+ #define SQ_SEND_WQE_TYPE_SEND_W_IMMEAD 0x1UL
+ #define SQ_SEND_WQE_TYPE_SEND_W_INVALID 0x2UL
+ u8 flags;
+ #define SQ_SEND_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_SEND_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_SEND_FLAGS_UC_FENCE 0x4UL
+ #define SQ_SEND_FLAGS_SE 0x8UL
+ #define SQ_SEND_FLAGS_INLINE 0x10UL
+ u8 wqe_size;
+ u8 reserved8_1;
+ __le32 inv_key_or_imm_data;
+ __le32 length;
+ __le32 q_key;
+ __le32 dst_qp;
+ #define SQ_SEND_DST_QP_MASK 0xffffffUL
+ #define SQ_SEND_DST_QP_SFT 0
+ #define SQ_SEND_RESERVED8_2_MASK 0xff000000UL
+ #define SQ_SEND_RESERVED8_2_SFT 24
+ __le32 avid;
+ #define SQ_SEND_AVID_MASK 0xfffffUL
+ #define SQ_SEND_AVID_SFT 0
+ #define SQ_SEND_RESERVED_AVID_MASK 0xfff00000UL
+ #define SQ_SEND_RESERVED_AVID_SFT 20
+ __le64 reserved64;
+ __le32 data[24];
+};
+
+/* Send Raw Ethernet and QP1 SQ WQE (40 bytes) */
+struct sq_send_raweth_qp1 {
+ u8 wqe_type;
+ #define SQ_SEND_RAWETH_QP1_WQE_TYPE_SEND 0x0UL
+ u8 flags;
+ #define SQ_SEND_RAWETH_QP1_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_UC_FENCE 0x4UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_SE 0x8UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_INLINE 0x10UL
+ u8 wqe_size;
+ u8 reserved8;
+ __le16 lflags;
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_TCP_UDP_CHKSUM 0x1UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_IP_CHKSUM 0x2UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_NOCRC 0x4UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_STAMP 0x8UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_T_IP_CHKSUM 0x10UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_RESERVED1_1 0x20UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_RESERVED1_2 0x40UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_RESERVED1_3 0x80UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_ROCE_CRC 0x100UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_FCOE_CRC 0x200UL
+ __le16 cfa_action;
+ __le32 length;
+ __le32 reserved32_1;
+ __le32 cfa_meta;
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK 0xfffUL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT 0
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_DE 0x1000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_PRI_MASK 0xe000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_PRI_SFT 13
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_MASK 0x70000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_SFT 16
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID88A8 (0x0UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID8100 (0x1UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID9100 (0x2UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID9200 (0x3UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID9300 (0x4UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPIDCFG (0x5UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_LAST \
+ SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPIDCFG
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_RESERVED_MASK 0xff80000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_RESERVED_SFT 19
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_MASK 0xf0000000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_SFT 28
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_NONE (0x0UL << 28)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_VLAN_TAG (0x1UL << 28)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_LAST \
+ SQ_SEND_RAWETH_QP1_CFA_META_KEY_VLAN_TAG
+ __le32 reserved32_2;
+ __le64 reserved64;
+ __le32 data[24];
+};
+
+/* RDMA SQ WQE (40 bytes) */
+struct sq_rdma {
+ u8 wqe_type;
+ #define SQ_RDMA_WQE_TYPE_WRITE_WQE 0x4UL
+ #define SQ_RDMA_WQE_TYPE_WRITE_W_IMMEAD 0x5UL
+ #define SQ_RDMA_WQE_TYPE_READ_WQE 0x6UL
+ u8 flags;
+ #define SQ_RDMA_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_RDMA_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_RDMA_FLAGS_UC_FENCE 0x4UL
+ #define SQ_RDMA_FLAGS_SE 0x8UL
+ #define SQ_RDMA_FLAGS_INLINE 0x10UL
+ u8 wqe_size;
+ u8 reserved8;
+ __le32 imm_data;
+ __le32 length;
+ __le32 reserved32_1;
+ __le64 remote_va;
+ __le32 remote_key;
+ __le32 reserved32_2;
+ __le32 data[24];
+};
+
+/* Atomic SQ WQE (40 bytes) */
+struct sq_atomic {
+ u8 wqe_type;
+ #define SQ_ATOMIC_WQE_TYPE_ATOMIC_CS 0x8UL
+ #define SQ_ATOMIC_WQE_TYPE_ATOMIC_FA 0xbUL
+ u8 flags;
+ #define SQ_ATOMIC_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_ATOMIC_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_ATOMIC_FLAGS_UC_FENCE 0x4UL
+ #define SQ_ATOMIC_FLAGS_SE 0x8UL
+ #define SQ_ATOMIC_FLAGS_INLINE 0x10UL
+ __le16 reserved16;
+ __le32 remote_key;
+ __le64 remote_va;
+ __le64 swap_data;
+ __le64 cmp_data;
+ __le32 data[24];
+};
+
+/* Local Invalidate SQ WQE (40 bytes) */
+struct sq_localinvalidate {
+ u8 wqe_type;
+ #define SQ_LOCALINVALIDATE_WQE_TYPE_LOCAL_INVALID 0xcUL
+ u8 flags;
+ #define SQ_LOCALINVALIDATE_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_LOCALINVALIDATE_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_LOCALINVALIDATE_FLAGS_UC_FENCE 0x4UL
+ #define SQ_LOCALINVALIDATE_FLAGS_SE 0x8UL
+ #define SQ_LOCALINVALIDATE_FLAGS_INLINE 0x10UL
+ __le16 reserved16;
+ __le32 inv_l_key;
+ __le64 reserved64;
+ __le32 reserved128[4];
+ __le32 data[24];
+};
+
+/* FR-PMR SQ WQE (40 bytes) */
+struct sq_fr_pmr {
+ u8 wqe_type;
+ #define SQ_FR_PMR_WQE_TYPE_FR_PMR 0xdUL
+ u8 flags;
+ #define SQ_FR_PMR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_FR_PMR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_FR_PMR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_FR_PMR_FLAGS_SE 0x8UL
+ #define SQ_FR_PMR_FLAGS_INLINE 0x10UL
+ u8 access_cntl;
+ #define SQ_FR_PMR_ACCESS_CNTL_LOCAL_WRITE 0x1UL
+ #define SQ_FR_PMR_ACCESS_CNTL_REMOTE_READ 0x2UL
+ #define SQ_FR_PMR_ACCESS_CNTL_REMOTE_WRITE 0x4UL
+ #define SQ_FR_PMR_ACCESS_CNTL_REMOTE_ATOMIC 0x8UL
+ #define SQ_FR_PMR_ACCESS_CNTL_WINDOW_BIND 0x10UL
+ u8 zero_based_page_size_log;
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_MASK 0x1fUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_SFT 0
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_4K 0x0UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_8K 0x1UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_64K 0x4UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_256K 0x6UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_1M 0x8UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_2M 0x9UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_4M 0xaUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_1G 0x12UL
+ #define SQ_FR_PMR_ZERO_BASED 0x20UL
+ #define SQ_FR_PMR_RESERVED2_MASK 0xc0UL
+ #define SQ_FR_PMR_RESERVED2_SFT 6
+ __le32 l_key;
+ u8 length[5];
+ u8 reserved8_1;
+ u8 reserved8_2;
+ u8 numlevels_pbl_page_size_log;
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_MASK 0x1fUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_SFT 0
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_4K 0x0UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_8K 0x1UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_64K 0x4UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_256K 0x6UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_1M 0x8UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_2M 0x9UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_4M 0xaUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_1G 0x12UL
+ #define SQ_FR_PMR_RESERVED1 0x20UL
+ #define SQ_FR_PMR_NUMLEVELS_MASK 0xc0UL
+ #define SQ_FR_PMR_NUMLEVELS_SFT 6
+ #define SQ_FR_PMR_NUMLEVELS_PHYSICAL (0x0UL << 6)
+ #define SQ_FR_PMR_NUMLEVELS_LAYER1 (0x1UL << 6)
+ #define SQ_FR_PMR_NUMLEVELS_LAYER2 (0x2UL << 6)
+ __le64 pblptr;
+ __le64 va;
+ __le32 data[24];
+};
+
+/* Bind SQ WQE (40 bytes) */
+struct sq_bind {
+ u8 wqe_type;
+ #define SQ_BIND_WQE_TYPE_BIND 0xeUL
+ u8 flags;
+ #define SQ_BIND_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_BIND_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_BIND_FLAGS_UC_FENCE 0x4UL
+ #define SQ_BIND_FLAGS_SE 0x8UL
+ #define SQ_BIND_FLAGS_INLINE 0x10UL
+ u8 access_cntl;
+ #define SQ_BIND_ACCESS_CNTL_LOCAL_WRITE 0x1UL
+ #define SQ_BIND_ACCESS_CNTL_REMOTE_READ 0x2UL
+ #define SQ_BIND_ACCESS_CNTL_REMOTE_WRITE 0x4UL
+ #define SQ_BIND_ACCESS_CNTL_REMOTE_ATOMIC 0x8UL
+ #define SQ_BIND_ACCESS_CNTL_WINDOW_BIND 0x10UL
+ u8 reserved8_1;
+ u8 mw_type_zero_based;
+ #define SQ_BIND_ZERO_BASED 0x1UL
+ #define SQ_BIND_MW_TYPE 0x2UL
+ #define SQ_BIND_MW_TYPE_TYPE1 (0x0UL << 1)
+ #define SQ_BIND_MW_TYPE_TYPE2 (0x1UL << 1)
+ #define SQ_BIND_RESERVED6_MASK 0xfcUL
+ #define SQ_BIND_RESERVED6_SFT 2
+ u8 reserved8_2;
+ __le16 reserved16;
+ __le32 parent_l_key;
+ __le32 l_key;
+ __le64 va;
+ u8 length[5];
+ u8 data_reserved24[99];
+ #define SQ_BIND_RESERVED24_MASK 0xffffff00UL
+ #define SQ_BIND_RESERVED24_SFT 8
+ #define SQ_BIND_DATA_MASK 0xffffffffUL
+ #define SQ_BIND_DATA_SFT 0
+};
+
+/* RQ/SRQ WQE Structures */
+/* RQ/SRQ WQE (40 bytes) */
+struct rq_wqe {
+ u8 wqe_type;
+ #define RQ_WQE_WQE_TYPE_RCV 0x80UL
+ u8 flags;
+ u8 wqe_size;
+ u8 reserved8;
+ __le32 reserved32;
+ __le32 wr_id[2];
+ #define RQ_WQE_WR_ID_MASK 0xfffffUL
+ #define RQ_WQE_WR_ID_SFT 0
+ #define RQ_WQE_RESERVED44_MASK 0xfff00000UL
+ #define RQ_WQE_RESERVED44_SFT 20
+ __le32 reserved128[4];
+ __le32 data[24];
+};
+
+/* CQ CQE Structures */
+/* Base CQE (32 bytes) */
+struct cq_base {
+ __le64 reserved64_1;
+ __le64 reserved64_2;
+ __le64 reserved64_3;
+ u8 cqe_type_toggle;
+ #define CQ_BASE_TOGGLE 0x1UL
+ #define CQ_BASE_CQE_TYPE_MASK 0x1eUL
+ #define CQ_BASE_CQE_TYPE_SFT 1
+ #define CQ_BASE_CQE_TYPE_REQ (0x0UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_RC (0x1UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_UD (0x2UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_RAWETH_QP1 (0x3UL << 1)
+ #define CQ_BASE_CQE_TYPE_TERMINAL (0xeUL << 1)
+ #define CQ_BASE_CQE_TYPE_CUT_OFF (0xfUL << 1)
+ #define CQ_BASE_RESERVED3_MASK 0xe0UL
+ #define CQ_BASE_RESERVED3_SFT 5
+ u8 status;
+ __le16 reserved16;
+ __le32 reserved32;
+};
+
+/* Requester CQ CQE (32 bytes) */
+struct cq_req {
+ __le64 qp_handle;
+ __le16 sq_cons_idx;
+ __le16 reserved16_1;
+ __le32 reserved32_2;
+ __le64 reserved64;
+ u8 cqe_type_toggle;
+ #define CQ_REQ_TOGGLE 0x1UL
+ #define CQ_REQ_CQE_TYPE_MASK 0x1eUL
+ #define CQ_REQ_CQE_TYPE_SFT 1
+ #define CQ_REQ_CQE_TYPE_REQ (0x0UL << 1)
+ #define CQ_REQ_RESERVED3_MASK 0xe0UL
+ #define CQ_REQ_RESERVED3_SFT 5
+ u8 status;
+ #define CQ_REQ_STATUS_OK 0x0UL
+ #define CQ_REQ_STATUS_BAD_RESPONSE_ERR 0x1UL
+ #define CQ_REQ_STATUS_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_REQ_STATUS_LOCAL_QP_OPERATION_ERR 0x3UL
+ #define CQ_REQ_STATUS_LOCAL_PROTECTION_ERR 0x4UL
+ #define CQ_REQ_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_REQ_STATUS_REMOTE_INVALID_REQUEST_ERR 0x6UL
+ #define CQ_REQ_STATUS_REMOTE_ACCESS_ERR 0x7UL
+ #define CQ_REQ_STATUS_REMOTE_OPERATION_ERR 0x8UL
+ #define CQ_REQ_STATUS_RNR_NAK_RETRY_CNT_ERR 0x9UL
+ #define CQ_REQ_STATUS_TRANSPORT_RETRY_CNT_ERR 0xaUL
+ #define CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR 0xbUL
+ __le16 reserved16_2;
+ __le32 reserved32_1;
+};
+
+/* Responder RC CQE (32 bytes) */
+struct cq_res_rc {
+ __le32 length;
+ __le32 imm_data_or_inv_r_key;
+ __le64 qp_handle;
+ __le64 mr_handle;
+ u8 cqe_type_toggle;
+ #define CQ_RES_RC_TOGGLE 0x1UL
+ #define CQ_RES_RC_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_RC_CQE_TYPE_SFT 1
+ #define CQ_RES_RC_CQE_TYPE_RES_RC (0x1UL << 1)
+ #define CQ_RES_RC_RESERVED3_MASK 0xe0UL
+ #define CQ_RES_RC_RESERVED3_SFT 5
+ u8 status;
+ #define CQ_RES_RC_STATUS_OK 0x0UL
+ #define CQ_RES_RC_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_RC_STATUS_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_RC_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_RC_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_RC_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_RC_STATUS_REMOTE_INVALID_REQUEST_ERR 0x6UL
+ #define CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_RC_STATUS_HW_FLUSH_ERR 0x8UL
+ __le16 flags;
+ #define CQ_RES_RC_FLAGS_SRQ 0x1UL
+ #define CQ_RES_RC_FLAGS_SRQ_RQ (0x0UL << 0)
+ #define CQ_RES_RC_FLAGS_SRQ_SRQ (0x1UL << 0)
+ #define CQ_RES_RC_FLAGS_SRQ_LAST CQ_RES_RC_FLAGS_SRQ_SRQ
+ #define CQ_RES_RC_FLAGS_IMM 0x2UL
+ #define CQ_RES_RC_FLAGS_INV 0x4UL
+ #define CQ_RES_RC_FLAGS_RDMA 0x8UL
+ #define CQ_RES_RC_FLAGS_RDMA_SEND (0x0UL << 3)
+ #define CQ_RES_RC_FLAGS_RDMA_RDMA_WRITE (0x1UL << 3)
+ #define CQ_RES_RC_FLAGS_RDMA_LAST CQ_RES_RC_FLAGS_RDMA_RDMA_WRITE
+ __le32 srq_or_rq_wr_id;
+ #define CQ_RES_RC_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_RC_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_RC_RESERVED12_MASK 0xfff00000UL
+ #define CQ_RES_RC_RESERVED12_SFT 20
+};
+
+/* Responder UD CQE (32 bytes) */
+struct cq_res_ud {
+ __le32 length;
+ #define CQ_RES_UD_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_UD_LENGTH_SFT 0
+ #define CQ_RES_UD_RESERVED18_MASK 0xffffc000UL
+ #define CQ_RES_UD_RESERVED18_SFT 14
+ __le32 imm_data;
+ __le64 qp_handle;
+ __le16 src_mac[3];
+ __le16 src_qp_low;
+ u8 cqe_type_toggle;
+ #define CQ_RES_UD_TOGGLE 0x1UL
+ #define CQ_RES_UD_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_UD_CQE_TYPE_SFT 1
+ #define CQ_RES_UD_CQE_TYPE_RES_UD (0x2UL << 1)
+ #define CQ_RES_UD_RESERVED3_MASK 0xe0UL
+ #define CQ_RES_UD_RESERVED3_SFT 5
+ u8 status;
+ #define CQ_RES_UD_STATUS_OK 0x0UL
+ #define CQ_RES_UD_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_UD_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_UD_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_UD_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_UD_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_UD_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_UD_STATUS_HW_FLUSH_ERR 0x8UL
+ __le16 flags;
+ #define CQ_RES_UD_FLAGS_SRQ 0x1UL
+ #define CQ_RES_UD_FLAGS_SRQ_RQ (0x0UL << 0)
+ #define CQ_RES_UD_FLAGS_SRQ_SRQ (0x1UL << 0)
+ #define CQ_RES_UD_FLAGS_SRQ_LAST CQ_RES_UD_FLAGS_SRQ_SRQ
+ #define CQ_RES_UD_FLAGS_IMM 0x2UL
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK 0xcUL
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT 2
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V1 (0x0UL << 2)
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 2)
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 2)
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_LAST \
+ CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6
+ __le32 src_qp_high_srq_or_rq_wr_id;
+ #define CQ_RES_UD_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_UD_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_UD_RESERVED4_MASK 0xf00000UL
+ #define CQ_RES_UD_RESERVED4_SFT 20
+ #define CQ_RES_UD_SRC_QP_HIGH_MASK 0xff000000UL
+ #define CQ_RES_UD_SRC_QP_HIGH_SFT 24
+};
+
+/* Responder RawEth and QP1 CQE (32 bytes) */
+struct cq_res_raweth_qp1 {
+ __le16 length;
+ #define CQ_RES_RAWETH_QP1_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_RAWETH_QP1_LENGTH_SFT 0
+ #define CQ_RES_RAWETH_QP1_RESERVED2_MASK 0xc000UL
+ #define CQ_RES_RAWETH_QP1_RESERVED2_SFT 14
+ __le16 raweth_qp1_flags;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ERROR 0x1UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_RESERVED5_1_MASK 0x3eUL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_RESERVED5_1_SFT 1
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_MASK 0x3c0UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_SFT 6
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_NOT_KNOWN (0x0UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_IP (0x1UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_TCP (0x2UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_UDP (0x3UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_FCOE (0x4UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ROCE (0x5UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ICMP (0x7UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_PTP_WO_TIMESTAMP \
+ (0x8UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_PTP_W_TIMESTAMP \
+ (0x9UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_LAST \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_PTP_W_TIMESTAMP
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_MASK 0x3ffUL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_SFT 0
+ #define CQ_RES_RAWETH_QP1_RESERVED6_MASK 0xfc00UL
+ #define CQ_RES_RAWETH_QP1_RESERVED6_SFT 10
+ __le16 raweth_qp1_errors;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_RESERVED4_MASK 0xfUL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_RESERVED4_SFT 0
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_IP_CS_ERROR 0x10UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_L4_CS_ERROR 0x20UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_IP_CS_ERROR 0x40UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_L4_CS_ERROR 0x80UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_CRC_ERROR 0x100UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_MASK 0xe00UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_SFT 9
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_NO_ERROR \
+ (0x0UL << 9)
+ #define \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_VERSION \
+ (0x1UL << 9)
+ #define \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_HDR_LEN \
+ (0x2UL << 9)
+ #define \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_TUNNEL_TOTAL_ERROR \
+ (0x3UL << 9)
+ #define \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_IP_TOTAL_ERROR \
+ (0x4UL << 9)
+ #define \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_UDP_TOTAL_ERROR \
+ (0x5UL << 9)
+ #define \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_TTL \
+ (0x6UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_LAST \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_TTL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_MASK 0xf000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_SFT 12
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_NO_ERROR \
+ (0x0UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_VERSION \
+ (0x1UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_HDR_LEN \
+ (0x2UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_TTL \
+ (0x3UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_IP_TOTAL_ERROR \
+ (0x4UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_UDP_TOTAL_ERROR \
+ (0x5UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_HDR_LEN \
+ (0x6UL << 12)
+ #define \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_HDR_LEN_TOO_SMALL\
+ (0x7UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN \
+ (0x8UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_LAST \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN
+ __le16 raweth_qp1_cfa_code;
+ __le64 qp_handle;
+ __le32 raweth_qp1_flags2;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_CS_CALC 0x1UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_L4_CS_CALC 0x2UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_T_IP_CS_CALC 0x4UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_T_L4_CS_CALC 0x8UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_MASK 0xf0UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_SFT 4
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_NONE \
+ (0x0UL << 4)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_VLAN \
+ (0x1UL << 4)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_LAST\
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_VLAN
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_TYPE 0x100UL
+ __le32 raweth_qp1_metadata;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_VID_MASK 0xfffUL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_VID_SFT 0
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_DE 0x1000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_MASK 0xe000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_SFT 13
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_MASK 0xffff0000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_SFT 16
+ u8 cqe_type_toggle;
+ #define CQ_RES_RAWETH_QP1_TOGGLE 0x1UL
+ #define CQ_RES_RAWETH_QP1_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_RAWETH_QP1_CQE_TYPE_SFT 1
+ #define CQ_RES_RAWETH_QP1_CQE_TYPE_RES_RAWETH_QP1 (0x3UL << 1)
+ #define CQ_RES_RAWETH_QP1_RESERVED3_MASK 0xe0UL
+ #define CQ_RES_RAWETH_QP1_RESERVED3_SFT 5
+ u8 status;
+ #define CQ_RES_RAWETH_QP1_STATUS_OK 0x0UL
+ #define CQ_RES_RAWETH_QP1_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_RAWETH_QP1_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_RAWETH_QP1_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_RAWETH_QP1_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_RAWETH_QP1_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_RAWETH_QP1_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_RAWETH_QP1_STATUS_HW_FLUSH_ERR 0x8UL
+ __le16 flags;
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ 0x1UL
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ_LAST \
+ CQ_RES_RAWETH_QP1_FLAGS_SRQ_SRQ
+ __le32 raweth_qp1_payload_offset_srq_or_rq_wr_id;
+ #define CQ_RES_RAWETH_QP1_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_RAWETH_QP1_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_RAWETH_QP1_RESERVED4_MASK 0xf00000UL
+ #define CQ_RES_RAWETH_QP1_RESERVED4_SFT 20
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_PAYLOAD_OFFSET_MASK 0xff000000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_PAYLOAD_OFFSET_SFT 24
+};
+
+/* Terminal CQE (32 bytes) */
+struct cq_terminal {
+ __le64 qp_handle;
+ __le16 sq_cons_idx;
+ __le16 rq_cons_idx;
+ __le32 reserved32_1;
+ __le64 reserved64_3;
+ u8 cqe_type_toggle;
+ #define CQ_TERMINAL_TOGGLE 0x1UL
+ #define CQ_TERMINAL_CQE_TYPE_MASK 0x1eUL
+ #define CQ_TERMINAL_CQE_TYPE_SFT 1
+ #define CQ_TERMINAL_CQE_TYPE_TERMINAL (0xeUL << 1)
+ #define CQ_TERMINAL_RESERVED3_MASK 0xe0UL
+ #define CQ_TERMINAL_RESERVED3_SFT 5
+ u8 status;
+ #define CQ_TERMINAL_STATUS_OK 0x0UL
+ __le16 reserved16;
+ __le32 reserved32_2;
+};
+
+/* Cutoff CQE (32 bytes) */
+struct cq_cutoff {
+ __le64 reserved64_1;
+ __le64 reserved64_2;
+ __le64 reserved64_3;
+ u8 cqe_type_toggle;
+ #define CQ_CUTOFF_TOGGLE 0x1UL
+ #define CQ_CUTOFF_CQE_TYPE_MASK 0x1eUL
+ #define CQ_CUTOFF_CQE_TYPE_SFT 1
+ #define CQ_CUTOFF_CQE_TYPE_CUT_OFF (0xfUL << 1)
+ #define CQ_CUTOFF_RESERVED3_MASK 0xe0UL
+ #define CQ_CUTOFF_RESERVED3_SFT 5
+ u8 status;
+ #define CQ_CUTOFF_STATUS_OK 0x0UL
+ __le16 reserved16;
+ __le32 reserved32;
+};
+
+/* Notification Queue (NQ) Structures */
+/* Base NQ Record (16 bytes) */
+struct nq_base {
+ __le16 info10_type;
+ #define NQ_BASE_TYPE_MASK 0x3fUL
+ #define NQ_BASE_TYPE_SFT 0
+ #define NQ_BASE_TYPE_CQ_NOTIFICATION 0x30UL
+ #define NQ_BASE_TYPE_SRQ_EVENT 0x32UL
+ #define NQ_BASE_TYPE_DBQ_EVENT 0x34UL
+ #define NQ_BASE_TYPE_QP_EVENT 0x38UL
+ #define NQ_BASE_TYPE_FUNC_EVENT 0x3aUL
+ #define NQ_BASE_INFO10_MASK 0xffc0UL
+ #define NQ_BASE_INFO10_SFT 6
+ __le16 info16;
+ __le32 info32;
+ __le32 info63_v[2];
+ #define NQ_BASE_V 0x1UL
+ #define NQ_BASE_INFO63_MASK 0xfffffffeUL
+ #define NQ_BASE_INFO63_SFT 1
+};
+
+/* Completion Queue Notification (16 bytes) */
+struct nq_cn {
+ __le16 type;
+ #define NQ_CN_TYPE_MASK 0x3fUL
+ #define NQ_CN_TYPE_SFT 0
+ #define NQ_CN_TYPE_CQ_NOTIFICATION 0x30UL
+ #define NQ_CN_RESERVED9_MASK 0xffc0UL
+ #define NQ_CN_RESERVED9_SFT 6
+ __le16 reserved16;
+ __le32 cq_handle_low;
+ __le32 v;
+ #define NQ_CN_V 0x1UL
+ #define NQ_CN_RESERVED31_MASK 0xfffffffeUL
+ #define NQ_CN_RESERVED31_SFT 1
+ __le32 cq_handle_high;
+};
+
+/* SRQ Event Notification (16 bytes) */
+struct nq_srq_event {
+ u8 type;
+ #define NQ_SRQ_EVENT_TYPE_MASK 0x3fUL
+ #define NQ_SRQ_EVENT_TYPE_SFT 0
+ #define NQ_SRQ_EVENT_TYPE_SRQ_EVENT 0x32UL
+ #define NQ_SRQ_EVENT_RESERVED1_MASK 0xc0UL
+ #define NQ_SRQ_EVENT_RESERVED1_SFT 6
+ u8 event;
+ #define NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT 0x1UL
+ __le16 reserved16;
+ __le32 srq_handle_low;
+ __le32 v;
+ #define NQ_SRQ_EVENT_V 0x1UL
+ #define NQ_SRQ_EVENT_RESERVED31_MASK 0xfffffffeUL
+ #define NQ_SRQ_EVENT_RESERVED31_SFT 1
+ __le32 srq_handle_high;
+};
+
+/* DBQ Async Event Notification (16 bytes) */
+struct nq_dbq_event {
+ u8 type;
+ #define NQ_DBQ_EVENT_TYPE_MASK 0x3fUL
+ #define NQ_DBQ_EVENT_TYPE_SFT 0
+ #define NQ_DBQ_EVENT_TYPE_DBQ_EVENT 0x34UL
+ #define NQ_DBQ_EVENT_RESERVED1_MASK 0xc0UL
+ #define NQ_DBQ_EVENT_RESERVED1_SFT 6
+ u8 event;
+ #define NQ_DBQ_EVENT_EVENT_DBQ_THRESHOLD_EVENT 0x1UL
+ __le16 db_pfid;
+ #define NQ_DBQ_EVENT_DB_PFID_MASK 0xfUL
+ #define NQ_DBQ_EVENT_DB_PFID_SFT 0
+ #define NQ_DBQ_EVENT_RESERVED12_MASK 0xfff0UL
+ #define NQ_DBQ_EVENT_RESERVED12_SFT 4
+ __le32 db_dpi;
+ #define NQ_DBQ_EVENT_DB_DPI_MASK 0xfffffUL
+ #define NQ_DBQ_EVENT_DB_DPI_SFT 0
+ #define NQ_DBQ_EVENT_RESERVED12_2_MASK 0xfff00000UL
+ #define NQ_DBQ_EVENT_RESERVED12_2_SFT 20
+ __le32 v;
+ #define NQ_DBQ_EVENT_V 0x1UL
+ #define NQ_DBQ_EVENT_RESERVED32_MASK 0xfffffffeUL
+ #define NQ_DBQ_EVENT_RESERVED32_SFT 1
+ __le32 db_type_db_xid;
+ #define NQ_DBQ_EVENT_DB_XID_MASK 0xfffffUL
+ #define NQ_DBQ_EVENT_DB_XID_SFT 0
+ #define NQ_DBQ_EVENT_RESERVED8_MASK 0xff00000UL
+ #define NQ_DBQ_EVENT_RESERVED8_SFT 20
+ #define NQ_DBQ_EVENT_DB_TYPE_MASK 0xf0000000UL
+ #define NQ_DBQ_EVENT_DB_TYPE_SFT 28
+};
+
+/* Read Request/Response Queue Structures */
+/* Input Read Request Queue (IRRQ) Message (32 bytes) */
+struct xrrq_irrq {
+ __le16 credits_type;
+ #define XRRQ_IRRQ_TYPE 0x1UL
+ #define XRRQ_IRRQ_TYPE_READ_REQ 0x0UL
+ #define XRRQ_IRRQ_TYPE_ATOMIC_REQ 0x1UL
+ #define XRRQ_IRRQ_RESERVED10_MASK 0x7feUL
+ #define XRRQ_IRRQ_RESERVED10_SFT 1
+ #define XRRQ_IRRQ_CREDITS_MASK 0xf800UL
+ #define XRRQ_IRRQ_CREDITS_SFT 11
+ __le16 reserved16;
+ __le32 reserved32;
+ __le32 psn;
+ #define XRRQ_IRRQ_PSN_MASK 0xffffffUL
+ #define XRRQ_IRRQ_PSN_SFT 0
+ #define XRRQ_IRRQ_RESERVED8_1_MASK 0xff000000UL
+ #define XRRQ_IRRQ_RESERVED8_1_SFT 24
+ __le32 msn;
+ #define XRRQ_IRRQ_MSN_MASK 0xffffffUL
+ #define XRRQ_IRRQ_MSN_SFT 0
+ #define XRRQ_IRRQ_RESERVED8_2_MASK 0xff000000UL
+ #define XRRQ_IRRQ_RESERVED8_2_SFT 24
+ __le64 va_or_atomic_result;
+ __le32 rdma_r_key;
+ __le32 length;
+};
+
+/* Output Read Request Queue (ORRQ) Message (32 bytes) */
+struct xrrq_orrq {
+ __le16 num_sges_type;
+ #define XRRQ_ORRQ_TYPE 0x1UL
+ #define XRRQ_ORRQ_TYPE_READ_REQ 0x0UL
+ #define XRRQ_ORRQ_TYPE_ATOMIC_REQ 0x1UL
+ #define XRRQ_ORRQ_RESERVED10_MASK 0x7feUL
+ #define XRRQ_ORRQ_RESERVED10_SFT 1
+ #define XRRQ_ORRQ_NUM_SGES_MASK 0xf800UL
+ #define XRRQ_ORRQ_NUM_SGES_SFT 11
+ __le16 reserved16;
+ __le32 length;
+ __le32 psn;
+ #define XRRQ_ORRQ_PSN_MASK 0xffffffUL
+ #define XRRQ_ORRQ_PSN_SFT 0
+ #define XRRQ_ORRQ_RESERVED8_1_MASK 0xff000000UL
+ #define XRRQ_ORRQ_RESERVED8_1_SFT 24
+ __le32 end_psn;
+ #define XRRQ_ORRQ_END_PSN_MASK 0xffffffUL
+ #define XRRQ_ORRQ_END_PSN_SFT 0
+ #define XRRQ_ORRQ_RESERVED8_2_MASK 0xff000000UL
+ #define XRRQ_ORRQ_RESERVED8_2_SFT 24
+ __le64 first_sge_phy_or_sing_sge_va;
+ __le32 single_sge_l_key;
+ __le32 single_sge_size;
+};
+
+/* Page Buffer List Memory Structures (PBL) */
+/* Page Table Entry (PTE) (8 bytes) */
+struct ptu_pte {
+ __le32 page_next_to_last_last_valid[2];
+ #define PTU_PTE_VALID 0x1UL
+ #define PTU_PTE_LAST 0x2UL
+ #define PTU_PTE_NEXT_TO_LAST 0x4UL
+ #define PTU_PTE_PAGE_MASK 0xfffff000UL
+ #define PTU_PTE_PAGE_SFT 12
+};
+
+/* Page Directory Entry (PDE) (8 bytes) */
+struct ptu_pde {
+ __le32 page_valid[2];
+ #define PTU_PDE_VALID 0x1UL
+ #define PTU_PDE_PAGE_MASK 0xfffff000UL
+ #define PTU_PDE_PAGE_SFT 12
+};
+
+/* RoCE Fastpath Host Structures */
+/* Command Queue (CMDQ) Interface */
+/* Init CMDQ (16 bytes) */
+struct cmdq_init {
+ __le64 cmdq_pbl;
+ __le16 cmdq_size_cmdq_lvl;
+ #define CMDQ_INIT_CMDQ_LVL_MASK 0x3UL
+ #define CMDQ_INIT_CMDQ_LVL_SFT 0
+ #define CMDQ_INIT_CMDQ_SIZE_MASK 0xfffcUL
+ #define CMDQ_INIT_CMDQ_SIZE_SFT 2
+ __le16 creq_ring_id;
+ __le32 prod_idx;
+};
+
+/* Update CMDQ producer index (16 bytes) */
+struct cmdq_update {
+ __le64 reserved64;
+ __le32 reserved32;
+ __le32 prod_idx;
+};
+
+/* CMDQ common header structure (16 bytes) */
+struct cmdq_base {
+ u8 opcode;
+ #define CMDQ_BASE_OPCODE_CREATE_QP 0x1UL
+ #define CMDQ_BASE_OPCODE_DESTROY_QP 0x2UL
+ #define CMDQ_BASE_OPCODE_MODIFY_QP 0x3UL
+ #define CMDQ_BASE_OPCODE_QUERY_QP 0x4UL
+ #define CMDQ_BASE_OPCODE_CREATE_SRQ 0x5UL
+ #define CMDQ_BASE_OPCODE_DESTROY_SRQ 0x6UL
+ #define CMDQ_BASE_OPCODE_QUERY_SRQ 0x8UL
+ #define CMDQ_BASE_OPCODE_CREATE_CQ 0x9UL
+ #define CMDQ_BASE_OPCODE_DESTROY_CQ 0xaUL
+ #define CMDQ_BASE_OPCODE_RESIZE_CQ 0xcUL
+ #define CMDQ_BASE_OPCODE_ALLOCATE_MRW 0xdUL
+ #define CMDQ_BASE_OPCODE_DEALLOCATE_KEY 0xeUL
+ #define CMDQ_BASE_OPCODE_REGISTER_MR 0xfUL
+ #define CMDQ_BASE_OPCODE_DEREGISTER_MR 0x10UL
+ #define CMDQ_BASE_OPCODE_ADD_GID 0x11UL
+ #define CMDQ_BASE_OPCODE_DELETE_GID 0x12UL
+ #define CMDQ_BASE_OPCODE_MODIFY_GID 0x17UL
+ #define CMDQ_BASE_OPCODE_QUERY_GID 0x18UL
+ #define CMDQ_BASE_OPCODE_CREATE_QP1 0x13UL
+ #define CMDQ_BASE_OPCODE_DESTROY_QP1 0x14UL
+ #define CMDQ_BASE_OPCODE_CREATE_AH 0x15UL
+ #define CMDQ_BASE_OPCODE_DESTROY_AH 0x16UL
+ #define CMDQ_BASE_OPCODE_INITIALIZE_FW 0x80UL
+ #define CMDQ_BASE_OPCODE_DEINITIALIZE_FW 0x81UL
+ #define CMDQ_BASE_OPCODE_STOP_FUNC 0x82UL
+ #define CMDQ_BASE_OPCODE_QUERY_FUNC 0x83UL
+ #define CMDQ_BASE_OPCODE_SET_FUNC_RESOURCES 0x84UL
+ #define CMDQ_BASE_OPCODE_READ_CONTEXT 0x85UL
+ #define CMDQ_BASE_OPCODE_VF_BACKCHANNEL_REQUEST 0x86UL
+ #define CMDQ_BASE_OPCODE_READ_VF_MEMORY 0x87UL
+ #define CMDQ_BASE_OPCODE_COMPLETE_VF_REQUEST 0x88UL
+ #define CMDQ_BASE_OPCODE_EXTEND_CONTEXT_ARRRAY 0x89UL
+ #define CMDQ_BASE_OPCODE_MAP_TC_TO_COS 0x8aUL
+ #define CMDQ_BASE_OPCODE_QUERY_VERSION 0x8bUL
+ #define CMDQ_BASE_OPCODE_MODIFY_CC 0x8cUL
+ #define CMDQ_BASE_OPCODE_QUERY_CC 0x8dUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* Create QP command (96 bytes) */
+struct cmdq_create_qp {
+ u8 opcode;
+ #define CMDQ_CREATE_QP_OPCODE_CREATE_QP 0x1UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 qp_handle;
+ __le32 qp_flags;
+ #define CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED 0x1UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION 0x2UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED 0x8UL
+ u8 type;
+ #define CMDQ_CREATE_QP_TYPE_RC 0x2UL
+ #define CMDQ_CREATE_QP_TYPE_UD 0x4UL
+ #define CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE 0x6UL
+ u8 sq_pg_size_sq_lvl;
+ #define CMDQ_CREATE_QP_SQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP_SQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP_SQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP_SQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP_SQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 rq_pg_size_rq_lvl;
+ #define CMDQ_CREATE_QP_RQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP_RQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP_RQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP_RQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP_RQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 unused_0;
+ __le32 dpi;
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_fwo_sq_sge;
+ #define CMDQ_CREATE_QP_SQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP_SQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP_SQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP_SQ_FWO_SFT 4
+ __le16 rq_fwo_rq_sge;
+ #define CMDQ_CREATE_QP_RQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP_RQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP_RQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP_RQ_FWO_SFT 4
+ __le32 scq_cid;
+ __le32 rcq_cid;
+ __le32 srq_cid;
+ __le32 pd_id;
+ __le64 sq_pbl;
+ __le64 rq_pbl;
+ __le64 irrq_addr;
+ __le64 orrq_addr;
+};
+
+/* Destroy QP command (24 bytes) */
+struct cmdq_destroy_qp {
+ u8 opcode;
+ #define CMDQ_DESTROY_QP_OPCODE_DESTROY_QP 0x2UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 qp_cid;
+ __le32 unused_0;
+};
+
+/* Modify QP command (112 bytes) */
+struct cmdq_modify_qp {
+ u8 opcode;
+ #define CMDQ_MODIFY_QP_OPCODE_MODIFY_QP 0x3UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 modify_mask;
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_STATE 0x1UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_EN_SQD_ASYNC_NOTIFY 0x2UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS 0x4UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_PKEY 0x8UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_QKEY 0x10UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_DGID 0x20UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL 0x40UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX 0x80UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT 0x100UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS 0x200UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC 0x400UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU 0x1000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT 0x2000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RETRY_CNT 0x4000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RNR_RETRY 0x8000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN 0x10000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC 0x20000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER 0x40000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN 0x80000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC 0x100000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SIZE 0x200000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SIZE 0x400000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SGE 0x800000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SGE 0x1000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_MAX_INLINE_DATA 0x2000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID 0x4000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SRC_MAC 0x8000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID 0x10000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_ENABLE_CC 0x20000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TOS_ECN 0x40000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP 0x80000000UL
+ __le32 qp_cid;
+ u8 network_type_en_sqd_async_notify_new_state;
+ #define CMDQ_MODIFY_QP_NEW_STATE_MASK 0xfUL
+ #define CMDQ_MODIFY_QP_NEW_STATE_SFT 0
+ #define CMDQ_MODIFY_QP_NEW_STATE_RESET 0x0UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_INIT 0x1UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_RTR 0x2UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_RTS 0x3UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_SQD 0x4UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_SQE 0x5UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_ERR 0x6UL
+ #define CMDQ_MODIFY_QP_EN_SQD_ASYNC_NOTIFY 0x10UL
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_MASK 0xc0UL
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_SFT 6
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1 (0x0UL << 6)
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4 (0x2UL << 6)
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6 (0x3UL << 6)
+ u8 access;
+ #define CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE 0x1UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE 0x2UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_READ 0x4UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC 0x8UL
+ __le16 pkey;
+ __le32 qkey;
+ __le32 dgid[4];
+ __le32 flow_label;
+ __le16 sgid_index;
+ u8 hop_limit;
+ u8 traffic_class;
+ __le16 dest_mac[3];
+ u8 tos_dscp_tos_ecn;
+ #define CMDQ_MODIFY_QP_TOS_ECN_MASK 0x3UL
+ #define CMDQ_MODIFY_QP_TOS_ECN_SFT 0
+ #define CMDQ_MODIFY_QP_TOS_DSCP_MASK 0xfcUL
+ #define CMDQ_MODIFY_QP_TOS_DSCP_SFT 2
+ u8 path_mtu;
+ #define CMDQ_MODIFY_QP_PATH_MTU_MASK 0xf0UL
+ #define CMDQ_MODIFY_QP_PATH_MTU_SFT 4
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_256 (0x0UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_512 (0x1UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_1024 (0x2UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_2048 (0x3UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_4096 (0x4UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_8192 (0x5UL << 4)
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+ u8 min_rnr_timer;
+ __le32 rq_psn;
+ __le32 sq_psn;
+ u8 max_rd_atomic;
+ u8 max_dest_rd_atomic;
+ __le16 enable_cc;
+ #define CMDQ_MODIFY_QP_ENABLE_CC 0x1UL
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_sge;
+ __le16 rq_sge;
+ __le32 max_inline_data;
+ __le32 dest_qp_id;
+ __le32 unused_3;
+ __le16 src_mac[3];
+ __le16 vlan_pcp_vlan_dei_vlan_id;
+ #define CMDQ_MODIFY_QP_VLAN_ID_MASK 0xfffUL
+ #define CMDQ_MODIFY_QP_VLAN_ID_SFT 0
+ #define CMDQ_MODIFY_QP_VLAN_DEI 0x1000UL
+ #define CMDQ_MODIFY_QP_VLAN_PCP_MASK 0xe000UL
+ #define CMDQ_MODIFY_QP_VLAN_PCP_SFT 13
+};
+
+/* Query QP command (24 bytes) */
+struct cmdq_query_qp {
+ u8 opcode;
+ #define CMDQ_QUERY_QP_OPCODE_QUERY_QP 0x4UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 qp_cid;
+ __le32 unused_0;
+};
+
+/* Create SRQ command (48 bytes) */
+struct cmdq_create_srq {
+ u8 opcode;
+ #define CMDQ_CREATE_SRQ_OPCODE_CREATE_SRQ 0x5UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 srq_handle;
+ __le16 pg_size_lvl;
+ #define CMDQ_CREATE_SRQ_LVL_MASK 0x3UL
+ #define CMDQ_CREATE_SRQ_LVL_SFT 0
+ #define CMDQ_CREATE_SRQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_SRQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_SRQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_SRQ_PG_SIZE_MASK 0x1cUL
+ #define CMDQ_CREATE_SRQ_PG_SIZE_SFT 2
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_4K (0x0UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_8K (0x1UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_64K (0x2UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_2M (0x3UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_8M (0x4UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_1G (0x5UL << 2)
+ __le16 eventq_id;
+ #define CMDQ_CREATE_SRQ_EVENTQ_ID_MASK 0xfffUL
+ #define CMDQ_CREATE_SRQ_EVENTQ_ID_SFT 0
+ __le16 srq_size;
+ __le16 srq_fwo;
+ __le32 dpi;
+ __le32 pd_id;
+ __le64 pbl;
+};
+
+/* Destroy SRQ command (24 bytes) */
+struct cmdq_destroy_srq {
+ u8 opcode;
+ #define CMDQ_DESTROY_SRQ_OPCODE_DESTROY_SRQ 0x6UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 srq_cid;
+ __le32 unused_0;
+};
+
+/* Query SRQ command (24 bytes) */
+struct cmdq_query_srq {
+ u8 opcode;
+ #define CMDQ_QUERY_SRQ_OPCODE_QUERY_SRQ 0x8UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 srq_cid;
+ __le32 unused_0;
+};
+
+/* Create CQ command (48 bytes) */
+struct cmdq_create_cq {
+ u8 opcode;
+ #define CMDQ_CREATE_CQ_OPCODE_CREATE_CQ 0x9UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 cq_handle;
+ __le32 pg_size_lvl;
+ #define CMDQ_CREATE_CQ_LVL_MASK 0x3UL
+ #define CMDQ_CREATE_CQ_LVL_SFT 0
+ #define CMDQ_CREATE_CQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_CQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_CQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_CQ_PG_SIZE_MASK 0x1cUL
+ #define CMDQ_CREATE_CQ_PG_SIZE_SFT 2
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_4K (0x0UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_8K (0x1UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_64K (0x2UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_2M (0x3UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_8M (0x4UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_1G (0x5UL << 2)
+ __le32 cq_fco_cnq_id;
+ #define CMDQ_CREATE_CQ_CNQ_ID_MASK 0xfffUL
+ #define CMDQ_CREATE_CQ_CNQ_ID_SFT 0
+ #define CMDQ_CREATE_CQ_CQ_FCO_MASK 0xfffff000UL
+ #define CMDQ_CREATE_CQ_CQ_FCO_SFT 12
+ __le32 dpi;
+ __le32 cq_size;
+ __le64 pbl;
+};
+
+/* Destroy CQ command (24 bytes) */
+struct cmdq_destroy_cq {
+ u8 opcode;
+ #define CMDQ_DESTROY_CQ_OPCODE_DESTROY_CQ 0xaUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 cq_cid;
+ __le32 unused_0;
+};
+
+/* Resize CQ command (40 bytes) */
+struct cmdq_resize_cq {
+ u8 opcode;
+ #define CMDQ_RESIZE_CQ_OPCODE_RESIZE_CQ 0xcUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 cq_cid;
+ __le32 new_cq_size_pg_size_lvl;
+ #define CMDQ_RESIZE_CQ_LVL_MASK 0x3UL
+ #define CMDQ_RESIZE_CQ_LVL_SFT 0
+ #define CMDQ_RESIZE_CQ_LVL_LVL_0 0x0UL
+ #define CMDQ_RESIZE_CQ_LVL_LVL_1 0x1UL
+ #define CMDQ_RESIZE_CQ_LVL_LVL_2 0x2UL
+ #define CMDQ_RESIZE_CQ_PG_SIZE_MASK 0x1cUL
+ #define CMDQ_RESIZE_CQ_PG_SIZE_SFT 2
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_4K (0x0UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_8K (0x1UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_64K (0x2UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_2M (0x3UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_8M (0x4UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_1G (0x5UL << 2)
+ #define CMDQ_RESIZE_CQ_NEW_CQ_SIZE_MASK 0x1fffe0UL
+ #define CMDQ_RESIZE_CQ_NEW_CQ_SIZE_SFT 5
+ __le64 new_pbl;
+ __le32 new_cq_fco;
+ __le32 unused_2;
+};
+
+/* Allocate MRW command (32 bytes) */
+struct cmdq_allocate_mrw {
+ u8 opcode;
+ #define CMDQ_ALLOCATE_MRW_OPCODE_ALLOCATE_MRW 0xdUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 mrw_handle;
+ u8 mrw_flags;
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MASK 0xfUL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_SFT 0
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR 0x0UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR 0x1UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1 0x2UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A 0x3UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B 0x4UL
+ u8 access;
+ #define CMDQ_ALLOCATE_MRW_ACCESS_RESERVED_MASK 0x1fUL
+ #define CMDQ_ALLOCATE_MRW_ACCESS_RESERVED_SFT 0
+ #define CMDQ_ALLOCATE_MRW_ACCESS_CONSUMER_OWNED_KEY 0x20UL
+ __le16 unused_1;
+ __le32 pd_id;
+};
+
+/* De-allocate key command (24 bytes) */
+struct cmdq_deallocate_key {
+ u8 opcode;
+ #define CMDQ_DEALLOCATE_KEY_OPCODE_DEALLOCATE_KEY 0xeUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ u8 mrw_flags;
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MASK 0xfUL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_SFT 0
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MR 0x0UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_PMR 0x1UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE1 0x2UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE2A 0x3UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE2B 0x4UL
+ u8 unused_1[3];
+ __le32 key;
+};
+
+/* Register MR command (48 bytes) */
+struct cmdq_register_mr {
+ u8 opcode;
+ #define CMDQ_REGISTER_MR_OPCODE_REGISTER_MR 0xfUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ u8 log2_pg_size_lvl;
+ #define CMDQ_REGISTER_MR_LVL_MASK 0x3UL
+ #define CMDQ_REGISTER_MR_LVL_SFT 0
+ #define CMDQ_REGISTER_MR_LVL_LVL_0 0x0UL
+ #define CMDQ_REGISTER_MR_LVL_LVL_1 0x1UL
+ #define CMDQ_REGISTER_MR_LVL_LVL_2 0x2UL
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK 0x7cUL
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT 2
+ u8 access;
+ #define CMDQ_REGISTER_MR_ACCESS_LOCAL_WRITE 0x1UL
+ #define CMDQ_REGISTER_MR_ACCESS_REMOTE_READ 0x2UL
+ #define CMDQ_REGISTER_MR_ACCESS_REMOTE_WRITE 0x4UL
+ #define CMDQ_REGISTER_MR_ACCESS_REMOTE_ATOMIC 0x8UL
+ #define CMDQ_REGISTER_MR_ACCESS_MW_BIND 0x10UL
+ #define CMDQ_REGISTER_MR_ACCESS_ZERO_BASED 0x20UL
+ __le16 unused_1;
+ __le32 key;
+ __le64 pbl;
+ __le64 va;
+ __le64 mr_size;
+};
+
+/* Deregister MR command (24 bytes) */
+struct cmdq_deregister_mr {
+ u8 opcode;
+ #define CMDQ_DEREGISTER_MR_OPCODE_DEREGISTER_MR 0x10UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 lkey;
+ __le32 unused_0;
+};
+
+/* Add GID command (48 bytes) */
+struct cmdq_add_gid {
+ u8 opcode;
+ #define CMDQ_ADD_GID_OPCODE_ADD_GID 0x11UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __be32 gid[4];
+ __be16 src_mac[3];
+ __le16 vlan;
+ #define CMDQ_ADD_GID_VLAN_VLAN_ID_MASK 0xfffUL
+ #define CMDQ_ADD_GID_VLAN_VLAN_ID_SFT 0
+ #define CMDQ_ADD_GID_VLAN_TPID_MASK 0x7000UL
+ #define CMDQ_ADD_GID_VLAN_TPID_SFT 12
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_88A8 (0x0UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_8100 (0x1UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_9100 (0x2UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_9200 (0x3UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_9300 (0x4UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_CFG1 (0x5UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_CFG2 (0x6UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_CFG3 (0x7UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_LAST CMDQ_ADD_GID_VLAN_TPID_TPID_CFG3
+ #define CMDQ_ADD_GID_VLAN_VLAN_EN 0x8000UL
+ __le16 ipid;
+ __le16 stats_ctx;
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_ID_MASK 0x7fffUL
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_ID_SFT 0
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_VALID 0x8000UL
+ __le32 unused_0;
+};
+
+/* Delete GID command (24 bytes) */
+struct cmdq_delete_gid {
+ u8 opcode;
+ #define CMDQ_DELETE_GID_OPCODE_DELETE_GID 0x12UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le16 gid_index;
+ __le16 unused_0;
+ __le32 unused_1;
+};
+
+/* Modify GID command (48 bytes) */
+struct cmdq_modify_gid {
+ u8 opcode;
+ #define CMDQ_MODIFY_GID_OPCODE_MODIFY_GID 0x17UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 gid[4];
+ __le16 src_mac[3];
+ __le16 vlan;
+ #define CMDQ_MODIFY_GID_VLAN_VLAN_ID_MASK 0xfffUL
+ #define CMDQ_MODIFY_GID_VLAN_VLAN_ID_SFT 0
+ #define CMDQ_MODIFY_GID_VLAN_TPID_MASK 0x7000UL
+ #define CMDQ_MODIFY_GID_VLAN_TPID_SFT 12
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_88A8 (0x0UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_8100 (0x1UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_9100 (0x2UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_9200 (0x3UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_9300 (0x4UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG1 (0x5UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG2 (0x6UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG3 (0x7UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_LAST \
+ CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG3
+ #define CMDQ_MODIFY_GID_VLAN_VLAN_EN 0x8000UL
+ __le16 ipid;
+ __le16 gid_index;
+ __le16 stats_ctx;
+ #define CMDQ_MODIFY_GID_STATS_CTX_STATS_CTX_ID_MASK 0x7fffUL
+ #define CMDQ_MODIFY_GID_STATS_CTX_STATS_CTX_ID_SFT 0
+ #define CMDQ_MODIFY_GID_STATS_CTX_STATS_CTX_VALID 0x8000UL
+ __le16 unused_0;
+};
+
+/* Query GID command (24 bytes) */
+struct cmdq_query_gid {
+ u8 opcode;
+ #define CMDQ_QUERY_GID_OPCODE_QUERY_GID 0x18UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le16 gid_index;
+ __le16 unused_0;
+ __le32 unused_1;
+};
+
+/* Create QP1 command (80 bytes) */
+struct cmdq_create_qp1 {
+ u8 opcode;
+ #define CMDQ_CREATE_QP1_OPCODE_CREATE_QP1 0x13UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 qp_handle;
+ __le32 qp_flags;
+ #define CMDQ_CREATE_QP1_QP_FLAGS_SRQ_USED 0x1UL
+ #define CMDQ_CREATE_QP1_QP_FLAGS_FORCE_COMPLETION 0x2UL
+ #define CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL
+ u8 type;
+ #define CMDQ_CREATE_QP1_TYPE_GSI 0x1UL
+ u8 sq_pg_size_sq_lvl;
+ #define CMDQ_CREATE_QP1_SQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_SQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP1_SQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP1_SQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP1_SQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 rq_pg_size_rq_lvl;
+ #define CMDQ_CREATE_QP1_RQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_RQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP1_RQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP1_RQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP1_RQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 unused_0;
+ __le32 dpi;
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_fwo_sq_sge;
+ #define CMDQ_CREATE_QP1_SQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_SQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP1_SQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP1_SQ_FWO_SFT 4
+ __le16 rq_fwo_rq_sge;
+ #define CMDQ_CREATE_QP1_RQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_RQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP1_RQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP1_RQ_FWO_SFT 4
+ __le32 scq_cid;
+ __le32 rcq_cid;
+ __le32 srq_cid;
+ __le32 pd_id;
+ __le64 sq_pbl;
+ __le64 rq_pbl;
+};
+
+/* Destroy QP1 command (24 bytes) */
+struct cmdq_destroy_qp1 {
+ u8 opcode;
+ #define CMDQ_DESTROY_QP1_OPCODE_DESTROY_QP1 0x14UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 qp1_cid;
+ __le32 unused_0;
+};
+
+/* Create AH command (64 bytes) */
+struct cmdq_create_ah {
+ u8 opcode;
+ #define CMDQ_CREATE_AH_OPCODE_CREATE_AH 0x15UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 ah_handle;
+ __le32 dgid[4];
+ u8 type;
+ #define CMDQ_CREATE_AH_TYPE_V1 0x0UL
+ #define CMDQ_CREATE_AH_TYPE_V2IPV4 0x2UL
+ #define CMDQ_CREATE_AH_TYPE_V2IPV6 0x3UL
+ u8 hop_limit;
+ __le16 sgid_index;
+ __le32 dest_vlan_id_flow_label;
+ #define CMDQ_CREATE_AH_FLOW_LABEL_MASK 0xfffffUL
+ #define CMDQ_CREATE_AH_FLOW_LABEL_SFT 0
+ #define CMDQ_CREATE_AH_DEST_VLAN_ID_MASK 0xfff00000UL
+ #define CMDQ_CREATE_AH_DEST_VLAN_ID_SFT 20
+ __le32 pd_id;
+ __le32 unused_0;
+ __le16 dest_mac[3];
+ u8 traffic_class;
+ u8 unused_1;
+};
+
+/* Destroy AH command (24 bytes) */
+struct cmdq_destroy_ah {
+ u8 opcode;
+ #define CMDQ_DESTROY_AH_OPCODE_DESTROY_AH 0x16UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 ah_cid;
+ __le32 unused_0;
+};
+
+/* Initialize Firmware command (112 bytes) */
+struct cmdq_initialize_fw {
+ u8 opcode;
+ #define CMDQ_INITIALIZE_FW_OPCODE_INITIALIZE_FW 0x80UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ u8 qpc_pg_size_qpc_lvl;
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 mrw_pg_size_mrw_lvl;
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 srq_pg_size_srq_lvl;
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 cq_pg_size_cq_lvl;
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 tqm_pg_size_tqm_lvl;
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 tim_pg_size_tim_lvl;
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_1G (0x5UL << 4)
+ __le16 reserved16;
+ __le64 qpc_page_dir;
+ __le64 mrw_page_dir;
+ __le64 srq_page_dir;
+ __le64 cq_page_dir;
+ __le64 tqm_page_dir;
+ __le64 tim_page_dir;
+ __le32 number_of_qp;
+ __le32 number_of_mrw;
+ __le32 number_of_srq;
+ __le32 number_of_cq;
+ __le32 max_qp_per_vf;
+ __le32 max_mrw_per_vf;
+ __le32 max_srq_per_vf;
+ __le32 max_cq_per_vf;
+ __le32 max_gid_per_vf;
+ __le32 stat_ctx_id;
+};
+
+/* De-initialize Firmware command (16 bytes) */
+struct cmdq_deinitialize_fw {
+ u8 opcode;
+ #define CMDQ_DEINITIALIZE_FW_OPCODE_DEINITIALIZE_FW 0x81UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* Stop function command (16 bytes) */
+struct cmdq_stop_func {
+ u8 opcode;
+ #define CMDQ_STOP_FUNC_OPCODE_STOP_FUNC 0x82UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* Query function command (16 bytes) */
+struct cmdq_query_func {
+ u8 opcode;
+ #define CMDQ_QUERY_FUNC_OPCODE_QUERY_FUNC 0x83UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* Set function resources command (16 bytes) */
+struct cmdq_set_func_resources {
+ u8 opcode;
+ #define CMDQ_SET_FUNC_RESOURCES_OPCODE_SET_FUNC_RESOURCES 0x84UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* Read hardware resource context command (24 bytes) */
+struct cmdq_read_context {
+ u8 opcode;
+ #define CMDQ_READ_CONTEXT_OPCODE_READ_CONTEXT 0x85UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 type_xid;
+ #define CMDQ_READ_CONTEXT_XID_MASK 0xffffffUL
+ #define CMDQ_READ_CONTEXT_XID_SFT 0
+ #define CMDQ_READ_CONTEXT_TYPE_MASK 0xff000000UL
+ #define CMDQ_READ_CONTEXT_TYPE_SFT 24
+ #define CMDQ_READ_CONTEXT_TYPE_QPC (0x0UL << 24)
+ #define CMDQ_READ_CONTEXT_TYPE_CQ (0x1UL << 24)
+ #define CMDQ_READ_CONTEXT_TYPE_MRW (0x2UL << 24)
+ #define CMDQ_READ_CONTEXT_TYPE_SRQ (0x3UL << 24)
+ __le32 unused_0;
+};
+
+/* Map TC to COS. Can only be issued from a PF (24 bytes) */
+struct cmdq_map_tc_to_cos {
+ u8 opcode;
+ #define CMDQ_MAP_TC_TO_COS_OPCODE_MAP_TC_TO_COS 0x8aUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le16 cos0;
+ #define CMDQ_MAP_TC_TO_COS_COS0_NO_CHANGE 0xffffUL
+ __le16 cos1;
+ #define CMDQ_MAP_TC_TO_COS_COS1_DISABLE 0x8000UL
+ #define CMDQ_MAP_TC_TO_COS_COS1_NO_CHANGE 0xffffUL
+ __le32 unused_0;
+};
+
+/* Query version command (16 bytes) */
+struct cmdq_query_version {
+ u8 opcode;
+ #define CMDQ_QUERY_VERSION_OPCODE_QUERY_VERSION 0x8bUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* Command-Response Event Queue (CREQ) Structures */
+/* Base CREQ Record (16 bytes) */
+struct creq_base {
+ u8 type;
+ #define CREQ_BASE_TYPE_MASK 0x3fUL
+ #define CREQ_BASE_TYPE_SFT 0
+ #define CREQ_BASE_TYPE_QP_EVENT 0x38UL
+ #define CREQ_BASE_TYPE_FUNC_EVENT 0x3aUL
+ #define CREQ_BASE_RESERVED2_MASK 0xc0UL
+ #define CREQ_BASE_RESERVED2_SFT 6
+ u8 reserved56[7];
+ u8 v;
+ #define CREQ_BASE_V 0x1UL
+ #define CREQ_BASE_RESERVED7_MASK 0xfeUL
+ #define CREQ_BASE_RESERVED7_SFT 1
+ u8 event;
+ __le16 reserved48[3];
+};
+
+/* RoCE Function Async Event Notification (16 bytes) */
+struct creq_func_event {
+ u8 type;
+ #define CREQ_FUNC_EVENT_TYPE_MASK 0x3fUL
+ #define CREQ_FUNC_EVENT_TYPE_SFT 0
+ #define CREQ_FUNC_EVENT_TYPE_FUNC_EVENT 0x3aUL
+ #define CREQ_FUNC_EVENT_RESERVED2_MASK 0xc0UL
+ #define CREQ_FUNC_EVENT_RESERVED2_SFT 6
+ u8 reserved56[7];
+ u8 v;
+ #define CREQ_FUNC_EVENT_V 0x1UL
+ #define CREQ_FUNC_EVENT_RESERVED7_MASK 0xfeUL
+ #define CREQ_FUNC_EVENT_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR 0x1UL
+ #define CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR 0x2UL
+ #define CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR 0x3UL
+ #define CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR 0x4UL
+ #define CREQ_FUNC_EVENT_EVENT_CQ_ERROR 0x5UL
+ #define CREQ_FUNC_EVENT_EVENT_TQM_ERROR 0x6UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR 0x7UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCS_ERROR 0x8UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCC_ERROR 0x9UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCM_ERROR 0xaUL
+ #define CREQ_FUNC_EVENT_EVENT_TIM_ERROR 0xbUL
+ #define CREQ_FUNC_EVENT_EVENT_VF_COMM_REQUEST 0x80UL
+ #define CREQ_FUNC_EVENT_EVENT_RESOURCE_EXHAUSTED 0x81UL
+ __le16 reserved48[3];
+};
+
+/* RoCE Slowpath Command Completion (16 bytes) */
+struct creq_qp_event {
+ u8 type;
+ #define CREQ_QP_EVENT_TYPE_MASK 0x3fUL
+ #define CREQ_QP_EVENT_TYPE_SFT 0
+ #define CREQ_QP_EVENT_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QP_EVENT_RESERVED2_MASK 0xc0UL
+ #define CREQ_QP_EVENT_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_QP_EVENT_V 0x1UL
+ #define CREQ_QP_EVENT_RESERVED7_MASK 0xfeUL
+ #define CREQ_QP_EVENT_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QP_EVENT_EVENT_CREATE_QP 0x1UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_QP 0x2UL
+ #define CREQ_QP_EVENT_EVENT_MODIFY_QP 0x3UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_QP 0x4UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_SRQ 0x5UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_SRQ 0x6UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_SRQ 0x8UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_CQ 0x9UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_CQ 0xaUL
+ #define CREQ_QP_EVENT_EVENT_RESIZE_CQ 0xcUL
+ #define CREQ_QP_EVENT_EVENT_ALLOCATE_MRW 0xdUL
+ #define CREQ_QP_EVENT_EVENT_DEALLOCATE_KEY 0xeUL
+ #define CREQ_QP_EVENT_EVENT_REGISTER_MR 0xfUL
+ #define CREQ_QP_EVENT_EVENT_DEREGISTER_MR 0x10UL
+ #define CREQ_QP_EVENT_EVENT_ADD_GID 0x11UL
+ #define CREQ_QP_EVENT_EVENT_DELETE_GID 0x12UL
+ #define CREQ_QP_EVENT_EVENT_MODIFY_GID 0x17UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_GID 0x18UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_QP1 0x13UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_QP1 0x14UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_AH 0x15UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_AH 0x16UL
+ #define CREQ_QP_EVENT_EVENT_INITIALIZE_FW 0x80UL
+ #define CREQ_QP_EVENT_EVENT_DEINITIALIZE_FW 0x81UL
+ #define CREQ_QP_EVENT_EVENT_STOP_FUNC 0x82UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_FUNC 0x83UL
+ #define CREQ_QP_EVENT_EVENT_SET_FUNC_RESOURCES 0x84UL
+ #define CREQ_QP_EVENT_EVENT_MAP_TC_TO_COS 0x8aUL
+ #define CREQ_QP_EVENT_EVENT_QUERY_VERSION 0x8bUL
+ #define CREQ_QP_EVENT_EVENT_MODIFY_CC 0x8cUL
+ #define CREQ_QP_EVENT_EVENT_QUERY_CC 0x8dUL
+ #define CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION 0xc0UL
+ __le16 reserved48[3];
+};
+
+/* Create QP command response (16 bytes) */
+struct creq_create_qp_resp {
+ u8 type;
+ #define CREQ_CREATE_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_QP_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_QP_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_CREATE_QP_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_QP_RESP_V 0x1UL
+ #define CREQ_CREATE_QP_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_CREATE_QP_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_CREATE_QP_RESP_EVENT_CREATE_QP 0x1UL
+ __le16 reserved48[3];
+};
+
+/* Destroy QP command response (16 bytes) */
+struct creq_destroy_qp_resp {
+ u8 type;
+ #define CREQ_DESTROY_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_QP_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_QP_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DESTROY_QP_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_QP_RESP_V 0x1UL
+ #define CREQ_DESTROY_QP_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DESTROY_QP_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DESTROY_QP_RESP_EVENT_DESTROY_QP 0x2UL
+ __le16 reserved48[3];
+};
+
+/* Modify QP command response (16 bytes) */
+struct creq_modify_qp_resp {
+ u8 type;
+ #define CREQ_MODIFY_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MODIFY_QP_RESP_TYPE_SFT 0
+ #define CREQ_MODIFY_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MODIFY_QP_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_MODIFY_QP_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_MODIFY_QP_RESP_V 0x1UL
+ #define CREQ_MODIFY_QP_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_MODIFY_QP_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_MODIFY_QP_RESP_EVENT_MODIFY_QP 0x3UL
+ __le16 reserved48[3];
+};
+
+/* Query QP command response (16 bytes) */
+struct creq_query_qp_resp {
+ u8 type;
+ #define CREQ_QUERY_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_QP_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_QP_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_QUERY_QP_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_QP_RESP_V 0x1UL
+ #define CREQ_QUERY_QP_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_QUERY_QP_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QUERY_QP_RESP_EVENT_QUERY_QP 0x4UL
+ __le16 reserved48[3];
+};
+
+/* Query QP command response side buffer structure (104 bytes) */
+struct creq_query_qp_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_QP_RESP_SB_OPCODE_QUERY_QP 0x4UL
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 xid;
+ u8 en_sqd_async_notify_state;
+ #define CREQ_QUERY_QP_RESP_SB_STATE_MASK 0xfUL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_STATE_RESET 0x0UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_INIT 0x1UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_RTR 0x2UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_RTS 0x3UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_SQD 0x4UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_SQE 0x5UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_ERR 0x6UL
+ #define CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY 0x10UL
+ u8 access;
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_LOCAL_WRITE 0x1UL
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_WRITE 0x2UL
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_READ 0x4UL
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_ATOMIC 0x8UL
+ __le16 pkey;
+ __le32 qkey;
+ __le32 reserved32;
+ __le32 dgid[4];
+ __le32 flow_label;
+ __le16 sgid_index;
+ u8 hop_limit;
+ u8 traffic_class;
+ __le16 dest_mac[3];
+ __le16 path_mtu_dest_vlan_id;
+ #define CREQ_QUERY_QP_RESP_SB_DEST_VLAN_ID_MASK 0xfffUL
+ #define CREQ_QUERY_QP_RESP_SB_DEST_VLAN_ID_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MASK 0xf000UL
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_SFT 12
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_256 (0x0UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_512 (0x1UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_1024 (0x2UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_2048 (0x3UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_4096 (0x4UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_8192 (0x5UL << 12)
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+ u8 min_rnr_timer;
+ __le32 rq_psn;
+ __le32 sq_psn;
+ u8 max_rd_atomic;
+ u8 max_dest_rd_atomic;
+ u8 tos_dscp_tos_ecn;
+ #define CREQ_QUERY_QP_RESP_SB_TOS_ECN_MASK 0x3UL
+ #define CREQ_QUERY_QP_RESP_SB_TOS_ECN_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_TOS_DSCP_MASK 0xfcUL
+ #define CREQ_QUERY_QP_RESP_SB_TOS_DSCP_SFT 2
+ u8 enable_cc;
+ #define CREQ_QUERY_QP_RESP_SB_ENABLE_CC 0x1UL
+ #define CREQ_QUERY_QP_RESP_SB_RESERVED7_MASK 0xfeUL
+ #define CREQ_QUERY_QP_RESP_SB_RESERVED7_SFT 1
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_sge;
+ __le16 rq_sge;
+ __le32 max_inline_data;
+ __le32 dest_qp_id;
+ __le32 unused_1;
+ __le16 src_mac[3];
+ __le16 vlan_pcp_vlan_dei_vlan_id;
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_ID_MASK 0xfffUL
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_ID_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_DEI 0x1000UL
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_PCP_MASK 0xe000UL
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_PCP_SFT 13
+};
+
+/* Create SRQ command response (16 bytes) */
+struct creq_create_srq_resp {
+ u8 type;
+ #define CREQ_CREATE_SRQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_SRQ_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_SRQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_SRQ_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_CREATE_SRQ_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_SRQ_RESP_V 0x1UL
+ #define CREQ_CREATE_SRQ_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_CREATE_SRQ_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_CREATE_SRQ_RESP_EVENT_CREATE_SRQ 0x5UL
+ __le16 reserved48[3];
+};
+
+/* Destroy SRQ command response (16 bytes) */
+struct creq_destroy_srq_resp {
+ u8 type;
+ #define CREQ_DESTROY_SRQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_SRQ_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_SRQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_SRQ_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DESTROY_SRQ_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_SRQ_RESP_V 0x1UL
+ #define CREQ_DESTROY_SRQ_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DESTROY_SRQ_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DESTROY_SRQ_RESP_EVENT_DESTROY_SRQ 0x6UL
+ __le16 enable_for_arm[3];
+ #define CREQ_DESTROY_SRQ_RESP_ENABLE_FOR_ARM_MASK 0x30000UL
+ #define CREQ_DESTROY_SRQ_RESP_ENABLE_FOR_ARM_SFT 16
+ #define CREQ_DESTROY_SRQ_RESP_RESERVED46_MASK 0xfffc0000UL
+ #define CREQ_DESTROY_SRQ_RESP_RESERVED46_SFT 18
+};
+
+/* Query SRQ command response (16 bytes) */
+struct creq_query_srq_resp {
+ u8 type;
+ #define CREQ_QUERY_SRQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_SRQ_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_SRQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_SRQ_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_QUERY_SRQ_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_SRQ_RESP_V 0x1UL
+ #define CREQ_QUERY_SRQ_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_QUERY_SRQ_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QUERY_SRQ_RESP_EVENT_QUERY_SRQ 0x8UL
+ __le16 reserved48[3];
+};
+
+/* Query SRQ command response side buffer structure (24 bytes) */
+struct creq_query_srq_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_SRQ_RESP_SB_OPCODE_QUERY_SRQ 0x8UL
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 xid;
+ __le16 srq_limit;
+ __le16 reserved16;
+ __le32 data[4];
+};
+
+/* Create CQ command Response (16 bytes) */
+struct creq_create_cq_resp {
+ u8 type;
+ #define CREQ_CREATE_CQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_CQ_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_CQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_CQ_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_CREATE_CQ_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_CQ_RESP_V 0x1UL
+ #define CREQ_CREATE_CQ_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_CREATE_CQ_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_CREATE_CQ_RESP_EVENT_CREATE_CQ 0x9UL
+ __le16 reserved48[3];
+};
+
+/* Destroy CQ command response (16 bytes) */
+struct creq_destroy_cq_resp {
+ u8 type;
+ #define CREQ_DESTROY_CQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_CQ_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_CQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_CQ_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DESTROY_CQ_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_CQ_RESP_V 0x1UL
+ #define CREQ_DESTROY_CQ_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DESTROY_CQ_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DESTROY_CQ_RESP_EVENT_DESTROY_CQ 0xaUL
+ __le16 cq_arm_lvl;
+ #define CREQ_DESTROY_CQ_RESP_CQ_ARM_LVL_MASK 0x3UL
+ #define CREQ_DESTROY_CQ_RESP_CQ_ARM_LVL_SFT 0
+ #define CREQ_DESTROY_CQ_RESP_RESERVED14_MASK 0xfffcUL
+ #define CREQ_DESTROY_CQ_RESP_RESERVED14_SFT 2
+ __le16 total_cnq_events;
+ __le16 reserved16;
+};
+
+/* Resize CQ command response (16 bytes) */
+struct creq_resize_cq_resp {
+ u8 type;
+ #define CREQ_RESIZE_CQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_RESIZE_CQ_RESP_TYPE_SFT 0
+ #define CREQ_RESIZE_CQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_RESIZE_CQ_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_RESIZE_CQ_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_RESIZE_CQ_RESP_V 0x1UL
+ #define CREQ_RESIZE_CQ_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_RESIZE_CQ_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_RESIZE_CQ_RESP_EVENT_RESIZE_CQ 0xcUL
+ __le16 reserved48[3];
+};
+
+/* Allocate MRW command response (16 bytes) */
+struct creq_allocate_mrw_resp {
+ u8 type;
+ #define CREQ_ALLOCATE_MRW_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_ALLOCATE_MRW_RESP_TYPE_SFT 0
+ #define CREQ_ALLOCATE_MRW_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_ALLOCATE_MRW_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_ALLOCATE_MRW_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_ALLOCATE_MRW_RESP_V 0x1UL
+ #define CREQ_ALLOCATE_MRW_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_ALLOCATE_MRW_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_ALLOCATE_MRW_RESP_EVENT_ALLOCATE_MRW 0xdUL
+ __le16 reserved48[3];
+};
+
+/* De-allocate key command response (16 bytes) */
+struct creq_deallocate_key_resp {
+ u8 type;
+ #define CREQ_DEALLOCATE_KEY_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DEALLOCATE_KEY_RESP_TYPE_SFT 0
+ #define CREQ_DEALLOCATE_KEY_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DEALLOCATE_KEY_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DEALLOCATE_KEY_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DEALLOCATE_KEY_RESP_V 0x1UL
+ #define CREQ_DEALLOCATE_KEY_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DEALLOCATE_KEY_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DEALLOCATE_KEY_RESP_EVENT_DEALLOCATE_KEY 0xeUL
+ __le16 reserved16;
+ __le32 bound_window_info;
+};
+
+/* Register MR command response (16 bytes) */
+struct creq_register_mr_resp {
+ u8 type;
+ #define CREQ_REGISTER_MR_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_REGISTER_MR_RESP_TYPE_SFT 0
+ #define CREQ_REGISTER_MR_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_REGISTER_MR_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_REGISTER_MR_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_REGISTER_MR_RESP_V 0x1UL
+ #define CREQ_REGISTER_MR_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_REGISTER_MR_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_REGISTER_MR_RESP_EVENT_REGISTER_MR 0xfUL
+ __le16 reserved48[3];
+};
+
+/* Deregister MR command response (16 bytes) */
+struct creq_deregister_mr_resp {
+ u8 type;
+ #define CREQ_DEREGISTER_MR_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DEREGISTER_MR_RESP_TYPE_SFT 0
+ #define CREQ_DEREGISTER_MR_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DEREGISTER_MR_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DEREGISTER_MR_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DEREGISTER_MR_RESP_V 0x1UL
+ #define CREQ_DEREGISTER_MR_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DEREGISTER_MR_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DEREGISTER_MR_RESP_EVENT_DEREGISTER_MR 0x10UL
+ __le16 reserved16;
+ __le32 bound_windows;
+};
+
+/* Add GID command response (16 bytes) */
+struct creq_add_gid_resp {
+ u8 type;
+ #define CREQ_ADD_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_ADD_GID_RESP_TYPE_SFT 0
+ #define CREQ_ADD_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_ADD_GID_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_ADD_GID_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_ADD_GID_RESP_V 0x1UL
+ #define CREQ_ADD_GID_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_ADD_GID_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_ADD_GID_RESP_EVENT_ADD_GID 0x11UL
+ __le16 reserved48[3];
+};
+
+/* Delete GID command response (16 bytes) */
+struct creq_delete_gid_resp {
+ u8 type;
+ #define CREQ_DELETE_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DELETE_GID_RESP_TYPE_SFT 0
+ #define CREQ_DELETE_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DELETE_GID_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DELETE_GID_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DELETE_GID_RESP_V 0x1UL
+ #define CREQ_DELETE_GID_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DELETE_GID_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DELETE_GID_RESP_EVENT_DELETE_GID 0x12UL
+ __le16 reserved48[3];
+};
+
+/* Modify GID command response (16 bytes) */
+struct creq_modify_gid_resp {
+ u8 type;
+ #define CREQ_MODIFY_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MODIFY_GID_RESP_TYPE_SFT 0
+ #define CREQ_MODIFY_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MODIFY_GID_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_MODIFY_GID_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_MODIFY_GID_RESP_V 0x1UL
+ #define CREQ_MODIFY_GID_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_MODIFY_GID_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_MODIFY_GID_RESP_EVENT_ADD_GID 0x11UL
+ __le16 reserved48[3];
+};
+
+/* Query GID command response (16 bytes) */
+struct creq_query_gid_resp {
+ u8 type;
+ #define CREQ_QUERY_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_GID_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_GID_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_QUERY_GID_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_GID_RESP_V 0x1UL
+ #define CREQ_QUERY_GID_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_QUERY_GID_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QUERY_GID_RESP_EVENT_QUERY_GID 0x18UL
+ __le16 reserved48[3];
+};
+
+/* Query GID command response side buffer structure (40 bytes) */
+struct creq_query_gid_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_GID_RESP_SB_OPCODE_QUERY_GID 0x18UL
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 gid[4];
+ __le16 src_mac[3];
+ __le16 vlan;
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_ID_MASK 0xfffUL
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_ID_SFT 0
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_MASK 0x7000UL
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_SFT 12
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_88A8 (0x0UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_8100 (0x1UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_9100 (0x2UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_9200 (0x3UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_9300 (0x4UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG1 (0x5UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG2 (0x6UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG3 (0x7UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_LAST \
+ CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG3
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_EN 0x8000UL
+ __le16 ipid;
+ __le16 gid_index;
+ __le32 unused_0;
+};
+
+/* Create QP1 command response (16 bytes) */
+struct creq_create_qp1_resp {
+ u8 type;
+ #define CREQ_CREATE_QP1_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_QP1_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_QP1_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_QP1_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_CREATE_QP1_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_QP1_RESP_V 0x1UL
+ #define CREQ_CREATE_QP1_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_CREATE_QP1_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_CREATE_QP1_RESP_EVENT_CREATE_QP1 0x13UL
+ __le16 reserved48[3];
+};
+
+/* Destroy QP1 command response (16 bytes) */
+struct creq_destroy_qp1_resp {
+ u8 type;
+ #define CREQ_DESTROY_QP1_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_QP1_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_QP1_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_QP1_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DESTROY_QP1_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_QP1_RESP_V 0x1UL
+ #define CREQ_DESTROY_QP1_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DESTROY_QP1_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DESTROY_QP1_RESP_EVENT_DESTROY_QP1 0x14UL
+ __le16 reserved48[3];
+};
+
+/* Create AH command response (16 bytes) */
+struct creq_create_ah_resp {
+ u8 type;
+ #define CREQ_CREATE_AH_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_AH_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_AH_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_AH_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_CREATE_AH_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_AH_RESP_V 0x1UL
+ #define CREQ_CREATE_AH_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_CREATE_AH_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_CREATE_AH_RESP_EVENT_CREATE_AH 0x15UL
+ __le16 reserved48[3];
+};
+
+/* Destroy AH command response (16 bytes) */
+struct creq_destroy_ah_resp {
+ u8 type;
+ #define CREQ_DESTROY_AH_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_AH_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_AH_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_AH_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DESTROY_AH_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_AH_RESP_V 0x1UL
+ #define CREQ_DESTROY_AH_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DESTROY_AH_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DESTROY_AH_RESP_EVENT_DESTROY_AH 0x16UL
+ __le16 reserved48[3];
+};
+
+/* Initialize Firmware command response (16 bytes) */
+struct creq_initialize_fw_resp {
+ u8 type;
+ #define CREQ_INITIALIZE_FW_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_INITIALIZE_FW_RESP_TYPE_SFT 0
+ #define CREQ_INITIALIZE_FW_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_INITIALIZE_FW_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_INITIALIZE_FW_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_INITIALIZE_FW_RESP_V 0x1UL
+ #define CREQ_INITIALIZE_FW_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_INITIALIZE_FW_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_INITIALIZE_FW_RESP_EVENT_INITIALIZE_FW 0x80UL
+ __le16 reserved48[3];
+};
+
+/* De-initialize Firmware command response (16 bytes) */
+struct creq_deinitialize_fw_resp {
+ u8 type;
+ #define CREQ_DEINITIALIZE_FW_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DEINITIALIZE_FW_RESP_TYPE_SFT 0
+ #define CREQ_DEINITIALIZE_FW_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DEINITIALIZE_FW_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DEINITIALIZE_FW_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_DEINITIALIZE_FW_RESP_V 0x1UL
+ #define CREQ_DEINITIALIZE_FW_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DEINITIALIZE_FW_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DEINITIALIZE_FW_RESP_EVENT_DEINITIALIZE_FW 0x81UL
+ __le16 reserved48[3];
+};
+
+/* Stop function command response (16 bytes) */
+struct creq_stop_func_resp {
+ u8 type;
+ #define CREQ_STOP_FUNC_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_STOP_FUNC_RESP_TYPE_SFT 0
+ #define CREQ_STOP_FUNC_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_STOP_FUNC_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_STOP_FUNC_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_STOP_FUNC_RESP_V 0x1UL
+ #define CREQ_STOP_FUNC_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_STOP_FUNC_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_STOP_FUNC_RESP_EVENT_STOP_FUNC 0x82UL
+ __le16 reserved48[3];
+};
+
+/* Query function command response (16 bytes) */
+struct creq_query_func_resp {
+ u8 type;
+ #define CREQ_QUERY_FUNC_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_FUNC_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_FUNC_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_FUNC_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_QUERY_FUNC_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_FUNC_RESP_V 0x1UL
+ #define CREQ_QUERY_FUNC_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_QUERY_FUNC_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QUERY_FUNC_RESP_EVENT_QUERY_FUNC 0x83UL
+ __le16 reserved48[3];
+};
+
+/* Query function command response side buffer structure (88 bytes) */
+struct creq_query_func_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_FUNC_RESP_SB_OPCODE_QUERY_FUNC 0x83UL
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 max_mr_size;
+ __le32 max_qp;
+ __le16 max_qp_wr;
+ __le16 dev_cap_flags;
+ #define CREQ_QUERY_FUNC_RESP_SB_DEV_CAP_FLAGS_RESIZE_QP 0x1UL
+ __le32 max_cq;
+ __le32 max_cqe;
+ __le32 max_pd;
+ u8 max_sge;
+ u8 max_srq_sge;
+ u8 max_qp_rd_atom;
+ u8 max_qp_init_rd_atom;
+ __le32 max_mr;
+ __le32 max_mw;
+ __le32 max_raw_eth_qp;
+ __le32 max_ah;
+ __le32 max_fmr;
+ __le32 max_srq_wr;
+ __le32 max_pkeys;
+ __le32 max_inline_data;
+ u8 max_map_per_fmr;
+ u8 l2_db_space_size;
+ __le16 max_srq;
+ __le32 max_gid;
+ __le32 tqm_alloc_reqs[8];
+};
+
+/* Set resources command response (16 bytes) */
+struct creq_set_func_resources_resp {
+ u8 type;
+ #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_SFT 0
+ #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_SET_FUNC_RESOURCES_RESP_V 0x1UL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_SET_FUNC_RESOURCES_RESP_EVENT_SET_FUNC_RESOURCES 0x84UL
+ __le16 reserved48[3];
+};
+
+/* Map TC to COS response (16 bytes) */
+struct creq_map_tc_to_cos_resp {
+ u8 type;
+ #define CREQ_MAP_TC_TO_COS_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MAP_TC_TO_COS_RESP_TYPE_SFT 0
+ #define CREQ_MAP_TC_TO_COS_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MAP_TC_TO_COS_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_MAP_TC_TO_COS_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_MAP_TC_TO_COS_RESP_V 0x1UL
+ #define CREQ_MAP_TC_TO_COS_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_MAP_TC_TO_COS_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_MAP_TC_TO_COS_RESP_EVENT_MAP_TC_TO_COS 0x8aUL
+ __le16 reserved48[3];
+};
+
+/* Query version response (16 bytes) */
+struct creq_query_version_resp {
+ u8 type;
+ #define CREQ_QUERY_VERSION_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_VERSION_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_VERSION_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_VERSION_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_QUERY_VERSION_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ u8 fw_maj;
+ u8 fw_minor;
+ u8 fw_bld;
+ u8 fw_rsvd;
+ u8 v;
+ #define CREQ_QUERY_VERSION_RESP_V 0x1UL
+ #define CREQ_QUERY_VERSION_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_QUERY_VERSION_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QUERY_VERSION_RESP_EVENT_QUERY_VERSION 0x8bUL
+ __le16 reserved16;
+ u8 intf_maj;
+ u8 intf_minor;
+ u8 intf_bld;
+ u8 intf_rsvd;
+};
+
+/* Modify congestion control command response (16 bytes) */
+struct creq_modify_cc_resp {
+ u8 type;
+ #define CREQ_MODIFY_CC_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MODIFY_CC_RESP_TYPE_SFT 0
+ #define CREQ_MODIFY_CC_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MODIFY_CC_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_MODIFY_CC_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_MODIFY_CC_RESP_V 0x1UL
+ #define CREQ_MODIFY_CC_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_MODIFY_CC_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_MODIFY_CC_RESP_EVENT_MODIFY_CC 0x8cUL
+ __le16 reserved48[3];
+};
+
+/* Query congestion control command response (16 bytes) */
+struct creq_query_cc_resp {
+ u8 type;
+ #define CREQ_QUERY_CC_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_CC_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_CC_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_CC_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_QUERY_CC_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_CC_RESP_V 0x1UL
+ #define CREQ_QUERY_CC_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_QUERY_CC_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QUERY_CC_RESP_EVENT_QUERY_CC 0x8dUL
+ __le16 reserved48[3];
+};
+
+/* Query congestion control command response side buffer structure (32 bytes) */
+struct creq_query_cc_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_CC_RESP_SB_OPCODE_QUERY_CC 0x8dUL
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ u8 enable_cc;
+ #define CREQ_QUERY_CC_RESP_SB_ENABLE_CC 0x1UL
+ u8 g;
+ #define CREQ_QUERY_CC_RESP_SB_G_MASK 0x7UL
+ #define CREQ_QUERY_CC_RESP_SB_G_SFT 0
+ u8 num_phases_per_state;
+ __le16 init_cr;
+ u8 unused_2;
+ __le16 unused_3;
+ u8 unused_4;
+ __le16 init_tr;
+ u8 tos_dscp_tos_ecn;
+ #define CREQ_QUERY_CC_RESP_SB_TOS_ECN_MASK 0x3UL
+ #define CREQ_QUERY_CC_RESP_SB_TOS_ECN_SFT 0
+ #define CREQ_QUERY_CC_RESP_SB_TOS_DSCP_MASK 0xfcUL
+ #define CREQ_QUERY_CC_RESP_SB_TOS_DSCP_SFT 2
+ __le64 reserved64;
+ __le64 reserved64_1;
+};
+
+/* QP error notification event (16 bytes) */
+struct creq_qp_error_notification {
+ u8 type;
+ #define CREQ_QP_ERROR_NOTIFICATION_TYPE_MASK 0x3fUL
+ #define CREQ_QP_ERROR_NOTIFICATION_TYPE_SFT 0
+ #define CREQ_QP_ERROR_NOTIFICATION_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RESERVED2_MASK 0xc0UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RESERVED2_SFT 6
+ u8 status;
+ u8 req_slow_path_state;
+ u8 req_err_state_reason;
+ __le32 xid;
+ u8 v;
+ #define CREQ_QP_ERROR_NOTIFICATION_V 0x1UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RESERVED7_MASK 0xfeUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QP_ERROR_NOTIFICATION_EVENT_QP_ERROR_NOTIFICATION 0xc0UL
+ u8 res_slow_path_state;
+ u8 res_err_state_reason;
+ __le16 sq_cons_idx;
+ __le16 rq_cons_idx;
+};
+
+/* RoCE Slowpath HSI Specification 1.6.0 */
+#define ROCE_SP_HSI_VERSION_MAJOR 1
+#define ROCE_SP_HSI_VERSION_MINOR 6
+#define ROCE_SP_HSI_VERSION_UPDATE 0
+
+#define ROCE_SP_HSI_VERSION_STR "1.6.0"
+/*
+ * Following is the signature for ROCE_SP_HSI message field that indicates not
+ * applicable (All F's). Need to cast it the size of the field if needed.
+ */
+#define ROCE_SP_HSI_NA_SIGNATURE ((__le32)(-1))
+#endif /* __BNXT_RE_HSI_H__ */
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
index 445e89e5e7cf..97dbe728520a 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
@@ -51,17 +51,18 @@ void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag)
m->mem_id = MEM_PMRX;
m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
m->len = size;
- PDBG("%s TPT addr 0x%x len %d\n", __func__, m->addr, m->len);
+ pr_debug("%s TPT addr 0x%x len %d\n", __func__, m->addr, m->len);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
+ pr_debug("%s toectl returned error %d\n", __func__, rc);
kfree(m);
return;
}
data = (u64 *)m->buf;
while (size > 0) {
- PDBG("TPT %08x: %016llx\n", m->addr, (unsigned long long) *data);
+ pr_debug("TPT %08x: %016llx\n",
+ m->addr, (unsigned long long)*data);
size -= 8;
data++;
m->addr += 8;
@@ -87,18 +88,19 @@ void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift)
m->mem_id = MEM_PMRX;
m->addr = pbl_addr;
m->len = size;
- PDBG("%s PBL addr 0x%x len %d depth %d\n",
- __func__, m->addr, m->len, npages);
+ pr_debug("%s PBL addr 0x%x len %d depth %d\n",
+ __func__, m->addr, m->len, npages);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
+ pr_debug("%s toectl returned error %d\n", __func__, rc);
kfree(m);
return;
}
data = (u64 *)m->buf;
while (size > 0) {
- PDBG("PBL %08x: %016llx\n", m->addr, (unsigned long long) *data);
+ pr_debug("PBL %08x: %016llx\n",
+ m->addr, (unsigned long long)*data);
size -= 8;
data++;
m->addr += 8;
@@ -114,8 +116,8 @@ void cxio_dump_wqe(union t3_wr *wqe)
if (size == 0)
size = 8;
while (size > 0) {
- PDBG("WQE %p: %016llx\n", data,
- (unsigned long long) be64_to_cpu(*data));
+ pr_debug("WQE %p: %016llx\n",
+ data, (unsigned long long)be64_to_cpu(*data));
size--;
data++;
}
@@ -127,8 +129,8 @@ void cxio_dump_wce(struct t3_cqe *wce)
int size = sizeof(*wce);
while (size > 0) {
- PDBG("WCE %p: %016llx\n", data,
- (unsigned long long) be64_to_cpu(*data));
+ pr_debug("WCE %p: %016llx\n",
+ data, (unsigned long long)be64_to_cpu(*data));
size -= 8;
data++;
}
@@ -148,17 +150,18 @@ void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents)
m->mem_id = MEM_PMRX;
m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
m->len = size;
- PDBG("%s RQT addr 0x%x len %d\n", __func__, m->addr, m->len);
+ pr_debug("%s RQT addr 0x%x len %d\n", __func__, m->addr, m->len);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
+ pr_debug("%s toectl returned error %d\n", __func__, rc);
kfree(m);
return;
}
data = (u64 *)m->buf;
while (size > 0) {
- PDBG("RQT %08x: %016llx\n", m->addr, (unsigned long long) *data);
+ pr_debug("RQT %08x: %016llx\n",
+ m->addr, (unsigned long long)*data);
size -= 8;
data++;
m->addr += 8;
@@ -180,10 +183,10 @@ void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid)
m->mem_id = MEM_CM;
m->addr = hwtid * size;
m->len = size;
- PDBG("%s TCB %d len %d\n", __func__, m->addr, m->len);
+ pr_debug("%s TCB %d len %d\n", __func__, m->addr, m->len);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
+ pr_debug("%s toectl returned error %d\n", __func__, rc);
kfree(m);
return;
}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index ada2e5009c86..3eff6541bd6f 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -110,8 +110,7 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
udelay(1);
if (i++ > 1000000) {
- printk(KERN_ERR "%s: stalled rnic\n",
- rdev_p->dev_name);
+ pr_err("%s: stalled rnic\n", rdev_p->dev_name);
return -EIO;
}
}
@@ -140,11 +139,10 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
struct t3_modify_qp_wr *wqe;
struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
if (!skb) {
- PDBG("%s alloc_skb failed\n", __func__);
+ pr_debug("%s alloc_skb failed\n", __func__);
return -ENOMEM;
}
- wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
- memset(wqe, 0, sizeof(*wqe));
+ wqe = skb_put_zero(skb, sizeof(*wqe));
build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD,
T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7,
T3_SOPEOP);
@@ -230,7 +228,7 @@ static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
}
out:
mutex_unlock(&uctx->lock);
- PDBG("%s qpid 0x%x\n", __func__, qpid);
+ pr_debug("%s qpid 0x%x\n", __func__, qpid);
return qpid;
}
@@ -242,7 +240,7 @@ static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid,
entry = kmalloc(sizeof *entry, GFP_KERNEL);
if (!entry)
return;
- PDBG("%s qpid 0x%x\n", __func__, qpid);
+ pr_debug("%s qpid 0x%x\n", __func__, qpid);
entry->qpid = qpid;
mutex_lock(&uctx->lock);
list_add_tail(&entry->entry, &uctx->qpids);
@@ -306,8 +304,8 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
(wq->qpid << rdev_p->qpshift);
wq->rdev = rdev_p;
- PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__,
- wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
+ pr_debug("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n",
+ __func__, wq->qpid, wq->doorbell, (unsigned long long)wq->udb);
return 0;
err4:
kfree(wq->sq);
@@ -351,8 +349,8 @@ static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
{
struct t3_cqe cqe;
- PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
- wq, cq, cq->sw_rptr, cq->sw_wptr);
+ pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
+ wq, cq, cq->sw_rptr, cq->sw_wptr);
memset(&cqe, 0, sizeof(cqe));
cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
V_CQE_OPCODE(T3_SEND) |
@@ -370,11 +368,11 @@ int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
u32 ptr;
int flushed = 0;
- PDBG("%s wq %p cq %p\n", __func__, wq, cq);
+ pr_debug("%s wq %p cq %p\n", __func__, wq, cq);
/* flush RQ */
- PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
- wq->rq_rptr, wq->rq_wptr, count);
+ pr_debug("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
+ wq->rq_rptr, wq->rq_wptr, count);
ptr = wq->rq_rptr + count;
while (ptr++ != wq->rq_wptr) {
insert_recv_cqe(wq, cq);
@@ -388,8 +386,8 @@ static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
{
struct t3_cqe cqe;
- PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
- wq, cq, cq->sw_rptr, cq->sw_wptr);
+ pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
+ wq, cq, cq->sw_rptr, cq->sw_wptr);
memset(&cqe, 0, sizeof(cqe));
cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
V_CQE_OPCODE(sqp->opcode) |
@@ -429,11 +427,11 @@ void cxio_flush_hw_cq(struct t3_cq *cq)
{
struct t3_cqe *cqe, *swcqe;
- PDBG("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid);
+ pr_debug("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid);
cqe = cxio_next_hw_cqe(cq);
while (cqe) {
- PDBG("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n",
- __func__, cq->rptr, cq->sw_wptr);
+ pr_debug("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n",
+ __func__, cq->rptr, cq->sw_wptr);
swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2);
*swcqe = *cqe;
swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
@@ -476,7 +474,7 @@ void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
(*count)++;
ptr++;
}
- PDBG("%s cq %p count %d\n", __func__, cq, *count);
+ pr_debug("%s cq %p count %d\n", __func__, cq, *count);
}
void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
@@ -485,7 +483,7 @@ void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
u32 ptr;
*count = 0;
- PDBG("%s count zero %d\n", __func__, *count);
+ pr_debug("%s count zero %d\n", __func__, *count);
ptr = cq->sw_rptr;
while (!Q_EMPTY(ptr, cq->sw_wptr)) {
cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
@@ -494,7 +492,7 @@ void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
(*count)++;
ptr++;
}
- PDBG("%s cq %p count %d\n", __func__, cq, *count);
+ pr_debug("%s cq %p count %d\n", __func__, cq, *count);
}
static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
@@ -521,12 +519,12 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
if (!skb) {
- PDBG("%s alloc_skb failed\n", __func__);
+ pr_debug("%s alloc_skb failed\n", __func__);
return -ENOMEM;
}
err = cxio_hal_init_ctrl_cq(rdev_p);
if (err) {
- PDBG("%s err %d initializing ctrl_cq\n", __func__, err);
+ pr_debug("%s err %d initializing ctrl_cq\n", __func__, err);
goto err;
}
rdev_p->ctrl_qp.workq = dma_alloc_coherent(
@@ -536,7 +534,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
&(rdev_p->ctrl_qp.dma_addr),
GFP_KERNEL);
if (!rdev_p->ctrl_qp.workq) {
- PDBG("%s dma_alloc_coherent failed\n", __func__);
+ pr_debug("%s dma_alloc_coherent failed\n", __func__);
err = -ENOMEM;
goto err;
}
@@ -562,8 +560,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) |
V_EC_TYPE(0) | V_EC_GEN(1) |
V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
- wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
- memset(wqe, 0, sizeof(*wqe));
+ wqe = skb_put_zero(skb, sizeof(*wqe));
build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
T3_CTL_QP_TID, 7, T3_SOPEOP);
wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
@@ -571,9 +568,9 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
wqe->sge_cmd = cpu_to_be64(sge_cmd);
wqe->ctx1 = cpu_to_be64(ctx1);
wqe->ctx0 = cpu_to_be64(ctx0);
- PDBG("CtrlQP dma_addr 0x%llx workq %p size %d\n",
- (unsigned long long) rdev_p->ctrl_qp.dma_addr,
- rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
+ pr_debug("CtrlQP dma_addr 0x%llx workq %p size %d\n",
+ (unsigned long long)rdev_p->ctrl_qp.dma_addr,
+ rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
skb->priority = CPL_PRIORITY_CONTROL;
return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
err:
@@ -605,26 +602,26 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
u64 utx_cmd;
addr &= 0x7FFFFFF;
nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */
- PDBG("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n",
- __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
- nr_wqe, data, addr);
+ pr_debug("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n",
+ __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
+ nr_wqe, data, addr);
utx_len = 3; /* in 32B unit */
for (i = 0; i < nr_wqe; i++) {
if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr,
T3_CTRL_QP_SIZE_LOG2)) {
- PDBG("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, "
- "wait for more space i %d\n", __func__,
- rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
+ pr_debug("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, wait for more space i %d\n",
+ __func__,
+ rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
!Q_FULL(rdev_p->ctrl_qp.rptr,
rdev_p->ctrl_qp.wptr,
T3_CTRL_QP_SIZE_LOG2))) {
- PDBG("%s ctrl_qp workq interrupted\n",
- __func__);
+ pr_debug("%s ctrl_qp workq interrupted\n",
+ __func__);
return -ERESTARTSYS;
}
- PDBG("%s ctrl_qp wakeup, continue posting work request "
- "i %d\n", __func__, i);
+ pr_debug("%s ctrl_qp wakeup, continue posting work request i %d\n",
+ __func__, i);
}
wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
(1 << T3_CTRL_QP_SIZE_LOG2)));
@@ -645,7 +642,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
if ((i != 0) &&
(i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) {
flag = T3_COMPLETION_FLAG;
- PDBG("%s force completion at i %d\n", __func__, i);
+ pr_debug("%s force completion at i %d\n", __func__, i);
}
/* build the utx mem command */
@@ -717,8 +714,8 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
return -ENOMEM;
*stag = (stag_idx << 8) | ((*stag) & 0xFF);
}
- PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
- __func__, stag_state, type, pdid, stag_idx);
+ pr_debug("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
+ __func__, stag_state, type, pdid, stag_idx);
mutex_lock(&rdev_p->ctrl_qp.lock);
@@ -767,9 +764,9 @@ int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
u32 wptr;
int err;
- PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
- __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
- pbl_size);
+ pr_debug("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
+ __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
+ pbl_size);
mutex_lock(&rdev_p->ctrl_qp.lock);
err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
@@ -837,8 +834,8 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC);
if (!skb)
return -ENOMEM;
- PDBG("%s rdev_p %p\n", __func__, rdev_p);
- wqe = (struct t3_rdma_init_wr *) __skb_put(skb, sizeof(*wqe));
+ pr_debug("%s rdev_p %p\n", __func__, rdev_p);
+ wqe = __skb_put(skb, sizeof(*wqe));
wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT));
wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) |
V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
@@ -880,22 +877,20 @@ static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb)
static int cnt;
struct cxio_rdev *rdev_p = NULL;
struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
- PDBG("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x"
- " se %0x notify %0x cqbranch %0x creditth %0x\n",
- cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
- RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg),
- RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
- RSPQ_CREDIT_THRESH(rsp_msg));
- PDBG("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d "
- "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe),
- CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
- CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe),
- CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+ pr_debug("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x se %0x notify %0x cqbranch %0x creditth %0x\n",
+ cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
+ RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg),
+ RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
+ RSPQ_CREDIT_THRESH(rsp_msg));
+ pr_debug("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+ CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe),
+ CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
+ CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe),
+ CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
if (!rdev_p) {
- PDBG("%s called by t3cdev %p with null ulp\n", __func__,
- t3cdev_p);
+ pr_debug("%s called by t3cdev %p with null ulp\n", __func__,
+ t3cdev_p);
return 0;
}
if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
@@ -934,13 +929,13 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p)
strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
T3_MAX_DEV_NAME_LEN);
} else {
- PDBG("%s t3cdev_p or dev_name must be set\n", __func__);
+ pr_debug("%s t3cdev_p or dev_name must be set\n", __func__);
return -EINVAL;
}
list_add_tail(&rdev_p->entry, &rdev_list);
- PDBG("%s opening rnic dev %s\n", __func__, rdev_p->dev_name);
+ pr_debug("%s opening rnic dev %s\n", __func__, rdev_p->dev_name);
memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
if (!rdev_p->t3cdev_p)
rdev_p->t3cdev_p = dev2t3cdev(netdev_p);
@@ -949,13 +944,12 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p)
err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO,
&(rdev_p->fw_info));
if (err) {
- printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
- __func__, rdev_p->t3cdev_p, err);
+ pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
+ __func__, rdev_p->t3cdev_p, err);
goto err1;
}
if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) {
- printk(KERN_ERR MOD "fatal firmware version mismatch: "
- "need version %u but adapter has version %u\n",
+ pr_err("fatal firmware version mismatch: need version %u but adapter has version %u\n",
CXIO_FW_MAJ,
G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers));
err = -EINVAL;
@@ -965,15 +959,15 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p)
err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
&(rdev_p->rnic_info));
if (err) {
- printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
- __func__, rdev_p->t3cdev_p, err);
+ pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
+ __func__, rdev_p->t3cdev_p, err);
goto err1;
}
err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
&(rdev_p->port_info));
if (err) {
- printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
- __func__, rdev_p->t3cdev_p, err);
+ pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
+ __func__, rdev_p->t3cdev_p, err);
goto err1;
}
@@ -988,42 +982,39 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p)
PAGE_SHIFT));
rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
- PDBG("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d "
- "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n",
- __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
- rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p),
- rdev_p->rnic_info.pbl_base,
- rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
- rdev_p->rnic_info.rqt_top);
- PDBG("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu "
- "qpnr %d qpmask 0x%x\n",
- rdev_p->rnic_info.udbell_len,
- rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
- rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
+ pr_debug("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n",
+ __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
+ rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p),
+ rdev_p->rnic_info.pbl_base,
+ rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
+ rdev_p->rnic_info.rqt_top);
+ pr_debug("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu qpnr %d qpmask 0x%x\n",
+ rdev_p->rnic_info.udbell_len,
+ rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
+ rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
err = cxio_hal_init_ctrl_qp(rdev_p);
if (err) {
- printk(KERN_ERR "%s error %d initializing ctrl_qp.\n",
- __func__, err);
+ pr_err("%s error %d initializing ctrl_qp\n", __func__, err);
goto err1;
}
err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0,
0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ,
T3_MAX_NUM_PD);
if (err) {
- printk(KERN_ERR "%s error %d initializing hal resources.\n",
+ pr_err("%s error %d initializing hal resources\n",
__func__, err);
goto err2;
}
err = cxio_hal_pblpool_create(rdev_p);
if (err) {
- printk(KERN_ERR "%s error %d initializing pbl mem pool.\n",
+ pr_err("%s error %d initializing pbl mem pool\n",
__func__, err);
goto err3;
}
err = cxio_hal_rqtpool_create(rdev_p);
if (err) {
- printk(KERN_ERR "%s error %d initializing rqt mem pool.\n",
+ pr_err("%s error %d initializing rqt mem pool\n",
__func__, err);
goto err4;
}
@@ -1086,9 +1077,9 @@ static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
/*
* Insert this completed cqe into the swcq.
*/
- PDBG("%s moving cqe into swcq sq idx %ld cq idx %ld\n",
- __func__, Q_PTR2IDX(ptr, wq->sq_size_log2),
- Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
+ pr_debug("%s moving cqe into swcq sq idx %ld cq idx %ld\n",
+ __func__, Q_PTR2IDX(ptr, wq->sq_size_log2),
+ Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
sqp->cqe.header |= htonl(V_CQE_SWCQE(1));
*(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2))
= sqp->cqe;
@@ -1154,12 +1145,11 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
*credit = 0;
hw_cqe = cxio_next_cqe(cq);
- PDBG("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x"
- " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
- CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe),
- CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
- CQE_WRID_LOW(*hw_cqe));
+ pr_debug("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+ __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
+ CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe),
+ CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
+ CQE_WRID_LOW(*hw_cqe));
/*
* skip cqe's not affiliated with a QP.
@@ -1278,9 +1268,10 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) {
struct t3_swsq *sqp;
- PDBG("%s out of order completion going in swsq at idx %ld\n",
- __func__,
- Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2));
+ pr_debug("%s out of order completion going in swsq at idx %ld\n",
+ __func__,
+ Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe),
+ wq->sq_size_log2));
sqp = wq->sq +
Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2);
sqp->cqe = *hw_cqe;
@@ -1298,13 +1289,13 @@ proc_cqe:
*/
if (SQ_TYPE(*hw_cqe)) {
wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
- PDBG("%s completing sq idx %ld\n", __func__,
- Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
+ pr_debug("%s completing sq idx %ld\n", __func__,
+ Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
*cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id;
wq->sq_rptr++;
} else {
- PDBG("%s completing rq idx %ld\n", __func__,
- Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
+ pr_debug("%s completing rq idx %ld\n", __func__,
+ Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
*cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id;
if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr)
cxio_hal_pblpool_free(wq->rdev,
@@ -1322,12 +1313,12 @@ flush_wq:
skip_cqe:
if (SW_CQE(*hw_cqe)) {
- PDBG("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n",
- __func__, cq, cq->cqid, cq->sw_rptr);
+ pr_debug("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n",
+ __func__, cq, cq->cqid, cq->sw_rptr);
++cq->sw_rptr;
} else {
- PDBG("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n",
- __func__, cq, cq->cqid, cq->rptr);
+ pr_debug("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n",
+ __func__, cq, cq->cqid, cq->rptr);
++cq->rptr;
/*
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 78fbe9ffe7f0..7e70c5492262 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -196,8 +196,11 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
u8 *cqe_flushed, u64 *cookie, u32 *credit);
int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb);
-#define MOD "iw_cxgb3: "
-#define PDBG(fmt, args...) pr_debug(MOD fmt, ## args)
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#ifdef DEBUG
void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index c40088ecf9f3..c6e7bc4420b6 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -209,13 +209,13 @@ u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
{
u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo,
&rscp->qpid_fifo_lock);
- PDBG("%s qpid 0x%x\n", __func__, qpid);
+ pr_debug("%s qpid 0x%x\n", __func__, qpid);
return qpid;
}
void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
{
- PDBG("%s qpid 0x%x\n", __func__, qpid);
+ pr_debug("%s qpid 0x%x\n", __func__, qpid);
cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid);
}
@@ -257,13 +257,13 @@ void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
{
unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
return (u32)addr;
}
void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, addr, size);
gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size);
}
@@ -282,17 +282,18 @@ int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1,
pbl_chunk);
if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) {
- PDBG("%s failed to add PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
+ pr_debug("%s failed to add PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
- printk(KERN_WARNING MOD "%s: Failed to add all PBL chunks (%x/%x)\n",
- __func__, pbl_start, rdev_p->rnic_info.pbl_top - pbl_start);
+ pr_warn("%s: Failed to add all PBL chunks (%x/%x)\n",
+ __func__, pbl_start,
+ rdev_p->rnic_info.pbl_top - pbl_start);
return 0;
}
pbl_chunk >>= 1;
} else {
- PDBG("%s added PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
+ pr_debug("%s added PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
pbl_start += pbl_chunk;
}
}
@@ -315,13 +316,13 @@ void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size)
{
unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
+ pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
return (u32)addr;
}
void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6);
+ pr_debug("%s addr 0x%x size %d\n", __func__, addr, size << 6);
gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6);
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index b3e11329801d..47b2ce2ef203 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -105,7 +105,7 @@ static void iwch_db_drop_task(struct work_struct *work)
static void rnic_init(struct iwch_dev *rnicp)
{
- PDBG("%s iwch_dev %p\n", __func__, rnicp);
+ pr_debug("%s iwch_dev %p\n", __func__, rnicp);
idr_init(&rnicp->cqidr);
idr_init(&rnicp->qpidr);
idr_init(&rnicp->mmidr);
@@ -145,12 +145,11 @@ static void open_rnic_dev(struct t3cdev *tdev)
{
struct iwch_dev *rnicp;
- PDBG("%s t3cdev %p\n", __func__, tdev);
- printk_once(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
- DRV_VERSION);
+ pr_debug("%s t3cdev %p\n", __func__, tdev);
+ pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION);
rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
if (!rnicp) {
- printk(KERN_ERR MOD "Cannot allocate ib device\n");
+ pr_err("Cannot allocate ib device\n");
return;
}
rnicp->rdev.ulp = rnicp;
@@ -160,7 +159,7 @@ static void open_rnic_dev(struct t3cdev *tdev)
if (cxio_rdev_open(&rnicp->rdev)) {
mutex_unlock(&dev_mutex);
- printk(KERN_ERR MOD "Unable to open CXIO rdev\n");
+ pr_err("Unable to open CXIO rdev\n");
ib_dealloc_device(&rnicp->ibdev);
return;
}
@@ -171,18 +170,18 @@ static void open_rnic_dev(struct t3cdev *tdev)
mutex_unlock(&dev_mutex);
if (iwch_register_device(rnicp)) {
- printk(KERN_ERR MOD "Unable to register device\n");
+ pr_err("Unable to register device\n");
close_rnic_dev(tdev);
}
- printk(KERN_INFO MOD "Initialized device %s\n",
- pci_name(rnicp->rdev.rnic_info.pdev));
+ pr_info("Initialized device %s\n",
+ pci_name(rnicp->rdev.rnic_info.pdev));
return;
}
static void close_rnic_dev(struct t3cdev *tdev)
{
struct iwch_dev *dev, *tmp;
- PDBG("%s t3cdev %p\n", __func__, tdev);
+ pr_debug("%s t3cdev %p\n", __func__, tdev);
mutex_lock(&dev_mutex);
list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
if (dev->rdev.t3cdev_p == tdev) {
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 65ee64400deb..86975370a4c0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -112,9 +112,9 @@ static void connect_reply_upcall(struct iwch_ep *ep, int status);
static void start_ep_timer(struct iwch_ep *ep)
{
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
if (timer_pending(&ep->timer)) {
- PDBG("%s stopped / restarted timer ep %p\n", __func__, ep);
+ pr_debug("%s stopped / restarted timer ep %p\n", __func__, ep);
del_timer_sync(&ep->timer);
} else
get_ep(&ep->com);
@@ -126,7 +126,7 @@ static void start_ep_timer(struct iwch_ep *ep)
static void stop_ep_timer(struct iwch_ep *ep)
{
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
if (!timer_pending(&ep->timer)) {
WARN(1, "%s timer stopped when its not running! ep %p state %u\n",
__func__, ep, ep->com.state);
@@ -175,7 +175,7 @@ static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
skb = get_skb(skb, sizeof *req, GFP_KERNEL);
if (!skb)
return;
- req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
+ req = skb_put(skb, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
skb->priority = CPL_PRIORITY_SETUP;
@@ -190,7 +190,7 @@ int iwch_quiesce_tid(struct iwch_ep *ep)
if (!skb)
return -ENOMEM;
- req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
+ req = skb_put(skb, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
@@ -211,7 +211,7 @@ int iwch_resume_tid(struct iwch_ep *ep)
if (!skb)
return -ENOMEM;
- req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
+ req = skb_put(skb, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
@@ -227,13 +227,13 @@ int iwch_resume_tid(struct iwch_ep *ep)
static void set_emss(struct iwch_ep *ep, u16 opt)
{
- PDBG("%s ep %p opt %u\n", __func__, ep, opt);
+ pr_debug("%s ep %p opt %u\n", __func__, ep, opt);
ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
if (G_TCPOPT_TSTAMP(opt))
ep->emss -= 12;
if (ep->emss < 128)
ep->emss = 128;
- PDBG("emss=%d\n", ep->emss);
+ pr_debug("emss=%d\n", ep->emss);
}
static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
@@ -257,7 +257,7 @@ static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
unsigned long flags;
spin_lock_irqsave(&epc->lock, flags);
- PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
+ pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
__state_set(epc, new);
spin_unlock_irqrestore(&epc->lock, flags);
return;
@@ -273,7 +273,7 @@ static void *alloc_ep(int size, gfp_t gfp)
spin_lock_init(&epc->lock);
init_waitqueue_head(&epc->waitq);
}
- PDBG("%s alloc ep %p\n", __func__, epc);
+ pr_debug("%s alloc ep %p\n", __func__, epc);
return epc;
}
@@ -282,7 +282,8 @@ void __free_ep(struct kref *kref)
struct iwch_ep *ep;
ep = container_of(container_of(kref, struct iwch_ep_common, kref),
struct iwch_ep, com);
- PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
+ pr_debug("%s ep %p state %s\n",
+ __func__, ep, states[state_read(&ep->com)]);
if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
dst_release(ep->dst);
@@ -293,7 +294,7 @@ void __free_ep(struct kref *kref)
static void release_ep_resources(struct iwch_ep *ep)
{
- PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
set_bit(RELEASE_RESOURCES, &ep->com.flags);
put_ep(&ep->com);
}
@@ -358,7 +359,7 @@ static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
{
- PDBG("%s t3cdev %p\n", __func__, dev);
+ pr_debug("%s t3cdev %p\n", __func__, dev);
kfree_skb(skb);
}
@@ -367,7 +368,7 @@ static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
*/
static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
{
- printk(KERN_ERR MOD "ARP failure during connect\n");
+ pr_err("ARP failure during connect\n");
kfree_skb(skb);
}
@@ -379,7 +380,7 @@ static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
{
struct cpl_abort_req *req = cplhdr(skb);
- PDBG("%s t3cdev %p\n", __func__, dev);
+ pr_debug("%s t3cdev %p\n", __func__, dev);
req->cmd = CPL_ABORT_NO_RST;
iwch_cxgb3_ofld_send(dev, skb);
}
@@ -389,15 +390,15 @@ static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
struct cpl_close_con_req *req;
struct sk_buff *skb;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb = get_skb(NULL, sizeof(*req), gfp);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
skb->priority = CPL_PRIORITY_DATA;
set_arp_failure_handler(skb, arp_failure_discard);
- req = (struct cpl_close_con_req *) skb_put(skb, sizeof(*req));
+ req = skb_put(skb, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
@@ -408,17 +409,15 @@ static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
{
struct cpl_abort_req *req;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb = get_skb(skb, sizeof(*req), gfp);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
- __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
skb->priority = CPL_PRIORITY_DATA;
set_arp_failure_handler(skb, abort_arp_failure);
- req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req));
- memset(req, 0, sizeof(*req));
+ req = skb_put_zero(skb, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
@@ -434,12 +433,11 @@ static int send_connect(struct iwch_ep *ep)
unsigned int mtu_idx;
int wscale;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
- __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
@@ -457,7 +455,7 @@ static int send_connect(struct iwch_ep *ep)
skb->priority = CPL_PRIORITY_SETUP;
set_arp_failure_handler(skb, act_open_req_arp_failure);
- req = (struct cpl_act_open_req *) skb_put(skb, sizeof(*req));
+ req = skb_put(skb, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid));
req->local_port = ep->com.local_addr.sin_port;
@@ -478,7 +476,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
struct mpa_message *mpa;
int len;
- PDBG("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
+ pr_debug("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
BUG_ON(skb_cloned(skb));
@@ -515,7 +513,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
set_arp_failure_handler(skb, arp_failure_discard);
skb_reset_transport_header(skb);
len = skb->len;
- req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
+ req = skb_push(skb, sizeof(*req));
req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
req->wr_lo = htonl(V_WR_TID(ep->hwtid));
req->len = htonl(len);
@@ -538,17 +536,17 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
struct mpa_message *mpa;
struct sk_buff *skb;
- PDBG("%s ep %p plen %d\n", __func__, ep, plen);
+ pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
mpalen = sizeof(*mpa) + plen;
skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ pr_err("%s - cannot alloc skb!\n", __func__);
return -ENOMEM;
}
skb_reserve(skb, sizeof(*req));
- mpa = (struct mpa_message *) skb_put(skb, mpalen);
+ mpa = skb_put(skb, mpalen);
memset(mpa, 0, sizeof(*mpa));
memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
mpa->flags = MPA_REJECT;
@@ -566,7 +564,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
skb->priority = CPL_PRIORITY_DATA;
set_arp_failure_handler(skb, arp_failure_discard);
skb_reset_transport_header(skb);
- req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
+ req = skb_push(skb, sizeof(*req));
req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
req->wr_lo = htonl(V_WR_TID(ep->hwtid));
req->len = htonl(mpalen);
@@ -587,18 +585,18 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
int len;
struct sk_buff *skb;
- PDBG("%s ep %p plen %d\n", __func__, ep, plen);
+ pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
mpalen = sizeof(*mpa) + plen;
skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ pr_err("%s - cannot alloc skb!\n", __func__);
return -ENOMEM;
}
skb->priority = CPL_PRIORITY_DATA;
skb_reserve(skb, sizeof(*req));
- mpa = (struct mpa_message *) skb_put(skb, mpalen);
+ mpa = skb_put(skb, mpalen);
memset(mpa, 0, sizeof(*mpa));
memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
@@ -617,7 +615,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
set_arp_failure_handler(skb, arp_failure_discard);
skb_reset_transport_header(skb);
len = skb->len;
- req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
+ req = skb_push(skb, sizeof(*req));
req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
req->wr_lo = htonl(V_WR_TID(ep->hwtid));
req->len = htonl(len);
@@ -636,7 +634,7 @@ static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct cpl_act_establish *req = cplhdr(skb);
unsigned int tid = GET_TID(req);
- PDBG("%s ep %p tid %d\n", __func__, ep, tid);
+ pr_debug("%s ep %p tid %d\n", __func__, ep, tid);
dst_confirm(ep->dst);
@@ -660,7 +658,7 @@ static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
{
- PDBG("%s ep %p\n", __FILE__, ep);
+ pr_debug("%s ep %p\n", __FILE__, ep);
state_set(&ep->com, ABORTING);
send_abort(ep, skb, gfp);
}
@@ -669,12 +667,12 @@ static void close_complete_upcall(struct iwch_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
if (ep->com.cm_id) {
- PDBG("close complete delivered ep %p cm_id %p tid %d\n",
- ep, ep->com.cm_id, ep->hwtid);
+ pr_debug("close complete delivered ep %p cm_id %p tid %d\n",
+ ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
ep->com.cm_id->rem_ref(ep->com.cm_id);
ep->com.cm_id = NULL;
@@ -686,12 +684,12 @@ static void peer_close_upcall(struct iwch_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_DISCONNECT;
if (ep->com.cm_id) {
- PDBG("peer close delivered ep %p cm_id %p tid %d\n",
- ep, ep->com.cm_id, ep->hwtid);
+ pr_debug("peer close delivered ep %p cm_id %p tid %d\n",
+ ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
}
}
@@ -700,13 +698,13 @@ static void peer_abort_upcall(struct iwch_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
event.status = -ECONNRESET;
if (ep->com.cm_id) {
- PDBG("abort delivered ep %p cm_id %p tid %d\n", ep,
- ep->com.cm_id, ep->hwtid);
+ pr_debug("abort delivered ep %p cm_id %p tid %d\n", ep,
+ ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
ep->com.cm_id->rem_ref(ep->com.cm_id);
ep->com.cm_id = NULL;
@@ -718,7 +716,7 @@ static void connect_reply_upcall(struct iwch_ep *ep, int status)
{
struct iw_cm_event event;
- PDBG("%s ep %p status %d\n", __func__, ep, status);
+ pr_debug("%s ep %p status %d\n", __func__, ep, status);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REPLY;
event.status = status;
@@ -732,8 +730,8 @@ static void connect_reply_upcall(struct iwch_ep *ep, int status)
event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
}
if (ep->com.cm_id) {
- PDBG("%s ep %p tid %d status %d\n", __func__, ep,
- ep->hwtid, status);
+ pr_debug("%s ep %p tid %d status %d\n", __func__, ep,
+ ep->hwtid, status);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
}
if (status < 0) {
@@ -747,7 +745,7 @@ static void connect_request_upcall(struct iwch_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REQUEST;
memcpy(&event.local_addr, &ep->com.local_addr,
@@ -776,7 +774,7 @@ static void established_upcall(struct iwch_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_ESTABLISHED;
/*
@@ -785,7 +783,7 @@ static void established_upcall(struct iwch_ep *ep)
*/
event.ird = event.ord = 8;
if (ep->com.cm_id) {
- PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
}
}
@@ -795,14 +793,14 @@ static int update_rx_credits(struct iwch_ep *ep, u32 credits)
struct cpl_rx_data_ack *req;
struct sk_buff *skb;
- PDBG("%s ep %p credits %u\n", __func__, ep, credits);
+ pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
+ pr_err("update_rx_credits - cannot alloc skb!\n");
return 0;
}
- req = (struct cpl_rx_data_ack *) skb_put(skb, sizeof(*req));
+ req = skb_put(skb, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
@@ -819,7 +817,7 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
enum iwch_qp_attr_mask mask;
int err;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
/*
* Stop mpa timer. If it expired, then the state has
@@ -906,10 +904,10 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
ep->mpa_attr.version = mpa_rev;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d\n", __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+ pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
+ __func__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
attrs.mpa_attr = ep->mpa_attr;
attrs.max_ird = ep->ird;
@@ -944,7 +942,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
struct mpa_message *mpa;
u16 plen;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
/*
* Stop mpa timer. If it expired, then the state has
@@ -964,7 +962,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
return;
}
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
/*
* Copy the new data into our accumulation buffer.
@@ -979,7 +977,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
*/
if (ep->mpa_pkt_len < sizeof(*mpa))
return;
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
mpa = (struct mpa_message *) ep->mpa_pkt;
/*
@@ -1029,10 +1027,10 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
ep->mpa_attr.version = mpa_rev;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d\n", __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+ pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
+ __func__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
state_set(&ep->com, MPA_REQ_RCVD);
@@ -1047,7 +1045,7 @@ static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct cpl_rx_data *hdr = cplhdr(skb);
unsigned int dlen = ntohs(hdr->len);
- PDBG("%s ep %p dlen %u\n", __func__, ep, dlen);
+ pr_debug("%s ep %p dlen %u\n", __func__, ep, dlen);
skb_pull(skb, sizeof(*hdr));
skb_trim(skb, dlen);
@@ -1065,8 +1063,7 @@ static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
case MPA_REP_SENT:
break;
default:
- printk(KERN_ERR MOD "%s Unexpected streaming data."
- " ep %p state %d tid %d\n",
+ pr_err("%s Unexpected streaming data. ep %p state %d tid %d\n",
__func__, ep, state_read(&ep->com), ep->hwtid);
/*
@@ -1095,11 +1092,11 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
unsigned long flags;
int post_zb = 0;
- PDBG("%s ep %p credits %u\n", __func__, ep, credits);
+ pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
if (credits == 0) {
- PDBG("%s 0 credit ack ep %p state %u\n",
- __func__, ep, state_read(&ep->com));
+ pr_debug("%s 0 credit ack ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
return CPL_RET_BUF_DONE;
}
@@ -1107,24 +1104,24 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
BUG_ON(credits != 1);
dst_confirm(ep->dst);
if (!ep->mpa_skb) {
- PDBG("%s rdma_init wr_ack ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s rdma_init wr_ack ep %p state %u\n",
+ __func__, ep, ep->com.state);
if (ep->mpa_attr.initiator) {
- PDBG("%s initiator ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s initiator ep %p state %u\n",
+ __func__, ep, ep->com.state);
if (peer2peer && ep->com.state == FPDU_MODE)
post_zb = 1;
} else {
- PDBG("%s responder ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s responder ep %p state %u\n",
+ __func__, ep, ep->com.state);
if (ep->com.state == MPA_REQ_RCVD) {
ep->com.rpl_done = 1;
wake_up(&ep->com.waitq);
}
}
} else {
- PDBG("%s lsm ack ep %p state %u freeing skb\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s lsm ack ep %p state %u freeing skb\n",
+ __func__, ep, ep->com.state);
kfree_skb(ep->mpa_skb);
ep->mpa_skb = NULL;
}
@@ -1140,7 +1137,7 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
unsigned long flags;
int release = 0;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
BUG_ON(!ep);
/*
@@ -1159,8 +1156,7 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
release = 1;
break;
default:
- printk(KERN_ERR "%s ep %p state %d\n",
- __func__, ep, ep->com.state);
+ pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
break;
}
spin_unlock_irqrestore(&ep->com.lock, flags);
@@ -1184,8 +1180,8 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct iwch_ep *ep = ctx;
struct cpl_act_open_rpl *rpl = cplhdr(skb);
- PDBG("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
- status2errno(rpl->status));
+ pr_debug("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
+ status2errno(rpl->status));
connect_reply_upcall(ep, status2errno(rpl->status));
state_set(&ep->com, DEAD);
if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status))
@@ -1202,14 +1198,14 @@ static int listen_start(struct iwch_listen_ep *ep)
struct sk_buff *skb;
struct cpl_pass_open_req *req;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "t3c_listen_start failed to alloc skb!\n");
+ pr_err("t3c_listen_start failed to alloc skb!\n");
return -ENOMEM;
}
- req = (struct cpl_pass_open_req *) skb_put(skb, sizeof(*req));
+ req = skb_put(skb, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid));
req->local_port = ep->com.local_addr.sin_port;
@@ -1230,8 +1226,8 @@ static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct iwch_listen_ep *ep = ctx;
struct cpl_pass_open_rpl *rpl = cplhdr(skb);
- PDBG("%s ep %p status %d error %d\n", __func__, ep,
- rpl->status, status2errno(rpl->status));
+ pr_debug("%s ep %p status %d error %d\n", __func__, ep,
+ rpl->status, status2errno(rpl->status));
ep->com.rpl_err = status2errno(rpl->status);
ep->com.rpl_done = 1;
wake_up(&ep->com.waitq);
@@ -1244,13 +1240,13 @@ static int listen_stop(struct iwch_listen_ep *ep)
struct sk_buff *skb;
struct cpl_close_listserv_req *req;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
- req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req));
+ req = skb_put(skb, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
req->cpu_idx = 0;
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
@@ -1264,7 +1260,7 @@ static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
struct iwch_listen_ep *ep = ctx;
struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
ep->com.rpl_err = status2errno(rpl->status);
ep->com.rpl_done = 1;
wake_up(&ep->com.waitq);
@@ -1278,7 +1274,7 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
u32 opt0h, opt0l, opt2;
int wscale;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
BUG_ON(skb_cloned(skb));
skb_trim(skb, sizeof(*rpl));
skb_get(skb);
@@ -1312,8 +1308,8 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
struct sk_buff *skb)
{
- PDBG("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
- peer_ip);
+ pr_debug("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
+ peer_ip);
BUG_ON(skb_cloned(skb));
skb_trim(skb, sizeof(struct cpl_tid_release));
skb_get(skb);
@@ -1347,11 +1343,10 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct rtable *rt;
struct iff_mac tim;
- PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
+ pr_debug("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
if (state_read(&parent_ep->com) != LISTEN) {
- printk(KERN_ERR "%s - listening ep not in LISTEN\n",
- __func__);
+ pr_err("%s - listening ep not in LISTEN\n", __func__);
goto reject;
}
@@ -1361,8 +1356,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
tim.mac_addr = req->dst_mac;
tim.vlan_tag = ntohs(req->vlan_tag);
if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
- printk(KERN_ERR "%s bad dst mac %pM\n",
- __func__, req->dst_mac);
+ pr_err("%s bad dst mac %pM\n", __func__, req->dst_mac);
goto reject;
}
@@ -1373,22 +1367,19 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
req->local_port,
req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
if (!rt) {
- printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
- __func__);
+ pr_err("%s - failed to find dst entry!\n", __func__);
goto reject;
}
dst = &rt->dst;
l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip);
if (!l2t) {
- printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
- __func__);
+ pr_err("%s - failed to allocate l2t entry!\n", __func__);
dst_release(dst);
goto reject;
}
child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
if (!child_ep) {
- printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
- __func__);
+ pr_err("%s - failed to allocate ep entry!\n", __func__);
l2t_release(tdev, l2t);
dst_release(dst);
goto reject;
@@ -1423,7 +1414,7 @@ static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct iwch_ep *ep = ctx;
struct cpl_pass_establish *req = cplhdr(skb);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
ep->snd_seq = ntohl(req->snd_isn);
ep->rcv_seq = ntohl(req->rcv_isn);
@@ -1444,7 +1435,7 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
int disconnect = 1;
int release = 0;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
dst_confirm(ep->dst);
spin_lock_irqsave(&ep->com.lock, flags);
@@ -1467,14 +1458,14 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
__state_set(&ep->com, CLOSING);
ep->com.rpl_done = 1;
ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
+ pr_debug("waking up ep %p\n", ep);
wake_up(&ep->com.waitq);
break;
case MPA_REP_SENT:
__state_set(&ep->com, CLOSING);
ep->com.rpl_done = 1;
ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
+ pr_debug("waking up ep %p\n", ep);
wake_up(&ep->com.waitq);
break;
case FPDU_MODE:
@@ -1539,8 +1530,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
unsigned long flags;
if (is_neg_adv_abort(req->status)) {
- PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
- ep->hwtid);
+ pr_debug("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
+ ep->hwtid);
t3_l2t_send_event(ep->com.tdev, ep->l2t);
return CPL_RET_BUF_DONE;
}
@@ -1554,7 +1545,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
}
spin_lock_irqsave(&ep->com.lock, flags);
- PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state);
+ pr_debug("%s ep %p state %u\n", __func__, ep, ep->com.state);
switch (ep->com.state) {
case CONNECTING:
break;
@@ -1568,7 +1559,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
case MPA_REP_SENT:
ep->com.rpl_done = 1;
ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
+ pr_debug("waking up ep %p\n", ep);
wake_up(&ep->com.waitq);
break;
case MPA_REQ_RCVD:
@@ -1581,7 +1572,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
*/
ep->com.rpl_done = 1;
ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
+ pr_debug("waking up ep %p\n", ep);
wake_up(&ep->com.waitq);
break;
case MORIBUND:
@@ -1595,16 +1586,14 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
&attrs, 1);
if (ret)
- printk(KERN_ERR MOD
- "%s - qp <- error failed!\n",
- __func__);
+ pr_err("%s - qp <- error failed!\n", __func__);
}
peer_abort_upcall(ep);
break;
case ABORTING:
break;
case DEAD:
- PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
+ pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
spin_unlock_irqrestore(&ep->com.lock, flags);
return CPL_RET_BUF_DONE;
default:
@@ -1620,13 +1609,12 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
if (!rpl_skb) {
- printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
- __func__);
+ pr_err("%s - cannot allocate skb!\n", __func__);
release = 1;
goto out;
}
rpl_skb->priority = CPL_PRIORITY_DATA;
- rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
+ rpl = skb_put(rpl_skb, sizeof(*rpl));
rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
@@ -1645,7 +1633,7 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
unsigned long flags;
int release = 0;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
BUG_ON(!ep);
/* The cm_id may be null if we failed to connect */
@@ -1699,9 +1687,9 @@ static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
if (state_read(&ep->com) != FPDU_MODE)
return CPL_RET_BUF_DONE;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb_pull(skb, sizeof(struct cpl_rdma_terminate));
- PDBG("%s saving %d bytes of term msg\n", __func__, skb->len);
+ pr_debug("%s saving %d bytes of term msg\n", __func__, skb->len);
skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
skb->len);
ep->com.qp->attr.terminate_msg_len = skb->len;
@@ -1714,12 +1702,12 @@ static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct cpl_rdma_ec_status *rep = cplhdr(skb);
struct iwch_ep *ep = ctx;
- PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
- rep->status);
+ pr_debug("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
+ rep->status);
if (rep->status) {
struct iwch_qp_attributes attrs;
- printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n",
+ pr_err("%s BAD CLOSE - Aborting tid %u\n",
__func__, ep->hwtid);
stop_ep_timer(ep);
attrs.next_state = IWCH_QP_STATE_ERROR;
@@ -1739,8 +1727,8 @@ static void ep_timeout(unsigned long arg)
int abort = 1;
spin_lock_irqsave(&ep->com.lock, flags);
- PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
- ep->com.state);
+ pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
+ ep->com.state);
switch (ep->com.state) {
case MPA_REQ_SENT:
__state_set(&ep->com, ABORTING);
@@ -1774,7 +1762,7 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
{
int err;
struct iwch_ep *ep = to_ep(cm_id);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (state_read(&ep->com) == DEAD) {
put_ep(&ep->com);
@@ -1800,7 +1788,7 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct iwch_dev *h = to_iwch_dev(cm_id->device);
struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (state_read(&ep->com) == DEAD) {
err = -ECONNRESET;
goto err;
@@ -1826,7 +1814,7 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (peer2peer && ep->ird == 0)
ep->ird = 1;
- PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
+ pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
/* bind QP to EP and move to RTS */
attrs.mpa_attr = ep->mpa_attr;
@@ -1907,7 +1895,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ pr_err("%s - cannot alloc ep\n", __func__);
err = -ENOMEM;
goto out;
}
@@ -1928,15 +1916,15 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->com.cm_id = cm_id;
ep->com.qp = get_qhp(h, conn_param->qpn);
BUG_ON(!ep->com.qp);
- PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
- ep->com.qp, cm_id);
+ pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
+ ep->com.qp, cm_id);
/*
* Allocate an active TID to initiate a TCP connection.
*/
ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
if (ep->atid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
+ pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM;
goto fail2;
}
@@ -1946,7 +1934,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
raddr->sin_addr.s_addr, laddr->sin_port,
raddr->sin_port, IPTOS_LOWDELAY);
if (!rt) {
- printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
+ pr_err("%s - cannot find route\n", __func__);
err = -EHOSTUNREACH;
goto fail3;
}
@@ -1954,7 +1942,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL,
&raddr->sin_addr.s_addr);
if (!ep->l2t) {
- printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
+ pr_err("%s - cannot alloc l2e\n", __func__);
err = -ENOMEM;
goto fail4;
}
@@ -1999,11 +1987,11 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ pr_err("%s - cannot alloc ep\n", __func__);
err = -ENOMEM;
goto fail1;
}
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
ep->com.tdev = h->rdev.t3cdev_p;
cm_id->add_ref(cm_id);
ep->com.cm_id = cm_id;
@@ -2016,7 +2004,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
*/
ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
if (ep->stid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
+ pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM;
goto fail2;
}
@@ -2048,7 +2036,7 @@ int iwch_destroy_listen(struct iw_cm_id *cm_id)
int err;
struct iwch_listen_ep *ep = to_listen_ep(cm_id);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
might_sleep();
state_set(&ep->com, DEAD);
@@ -2077,8 +2065,8 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
spin_lock_irqsave(&ep->com.lock, flags);
- PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
- states[ep->com.state], abrupt);
+ pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep,
+ states[ep->com.state], abrupt);
tdev = (struct t3cdev *)ep->com.tdev;
rdev = (struct cxio_rdev *)tdev->ulp;
@@ -2115,8 +2103,8 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
case MORIBUND:
case ABORTING:
case DEAD:
- PDBG("%s ignoring disconnect ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s ignoring disconnect ep %p state %u\n",
+ __func__, ep, ep->com.state);
break;
default:
BUG();
@@ -2145,8 +2133,8 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
if (ep->dst != old)
return 0;
- PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
- l2t);
+ pr_debug("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
+ l2t);
dst_hold(new);
l2t_release(ep->com.tdev, ep->l2t);
ep->l2t = l2t;
@@ -2225,8 +2213,8 @@ static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
if (rpl->status != CPL_ERR_NONE) {
- printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
- "for tid %u\n", rpl->status, GET_TID(rpl));
+ pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
+ rpl->status, GET_TID(rpl));
}
return CPL_RET_BUF_DONE;
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index b9efadfffb4f..cc7fe644d260 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -53,17 +53,17 @@
#define MPA_MARKERS 0x80
#define MPA_FLAGS_MASK 0xE0
-#define put_ep(ep) { \
- PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
- WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
- kref_put(&((ep)->kref), __free_ep); \
+#define put_ep(ep) { \
+ pr_debug("put_ep (via %s:%u) ep %p refcnt %d\n", \
+ __func__, __LINE__, ep, kref_read(&((ep)->kref))); \
+ WARN_ON(kref_read(&((ep)->kref)) < 1); \
+ kref_put(&((ep)->kref), __free_ep); \
}
-#define get_ep(ep) { \
- PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
- kref_get(&((ep)->kref)); \
+#define get_ep(ep) { \
+ pr_debug("get_ep (via %s:%u) ep %p, refcnt %d\n", \
+ __func__, __LINE__, ep, kref_read(&((ep)->kref))); \
+ kref_get(&((ep)->kref)); \
}
struct mpa_message {
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index 97fbfd2c298e..dd5348e48806 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -67,8 +67,8 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
&credit);
if (t3a_device(chp->rhp) && credit) {
- PDBG("%s updating %d cq credits on id %d\n", __func__,
- credit, chp->cq.cqid);
+ pr_debug("%s updating %d cq credits on id %d\n", __func__,
+ credit, chp->cq.cqid);
cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit);
}
@@ -83,11 +83,11 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
wc->vendor_err = CQE_STATUS(cqe);
wc->wc_flags = 0;
- PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x "
- "lo 0x%x cookie 0x%llx\n", __func__,
- CQE_QPID(cqe), CQE_TYPE(cqe),
- CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
- CQE_WRID_LOW(cqe), (unsigned long long) cookie);
+ pr_debug("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
+ __func__,
+ CQE_QPID(cqe), CQE_TYPE(cqe),
+ CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
+ CQE_WRID_LOW(cqe), (unsigned long long)cookie);
if (CQE_TYPE(cqe) == 0) {
if (!CQE_STATUS(cqe))
@@ -122,8 +122,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
wc->opcode = IB_WC_REG_MR;
break;
default:
- printk(KERN_ERR MOD "Unexpected opcode %d "
- "in the CQE received for QPID=0x%0x\n",
+ pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
CQE_OPCODE(cqe), CQE_QPID(cqe));
ret = -EINVAL;
goto out;
@@ -177,8 +176,8 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
wc->status = IB_WC_WR_FLUSH_ERR;
break;
default:
- printk(KERN_ERR MOD "Unexpected cqe_status 0x%x for "
- "QPID=0x%0x\n", CQE_STATUS(cqe), CQE_QPID(cqe));
+ pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n",
+ CQE_STATUS(cqe), CQE_QPID(cqe));
ret = -EINVAL;
}
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
index abcc9e76962b..4a0c82a8fb60 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c
@@ -52,7 +52,7 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
if (!qhp) {
- printk(KERN_ERR "%s unaffiliated error 0x%x qpid 0x%x\n",
+ pr_err("%s unaffiliated error 0x%x qpid 0x%x\n",
__func__, CQE_STATUS(rsp_msg->cqe),
CQE_QPID(rsp_msg->cqe));
spin_unlock(&rnicp->lock);
@@ -61,15 +61,16 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
(qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
- PDBG("%s AE received after RTS - "
- "qp state %d qpid 0x%x status 0x%x\n", __func__,
- qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe));
+ pr_debug("%s AE received after RTS - qp state %d qpid 0x%x status 0x%x\n",
+ __func__,
+ qhp->attr.state, qhp->wq.qpid,
+ CQE_STATUS(rsp_msg->cqe));
spin_unlock(&rnicp->lock);
return;
}
- printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x "
- "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __func__,
+ pr_err("%s - AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ __func__,
CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
@@ -117,8 +118,7 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
chp = get_chp(rnicp, cqid);
qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
if (!chp || !qhp) {
- printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d "
- "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x \n",
+ pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
cqid, CQE_QPID(rsp_msg->cqe),
CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
@@ -137,12 +137,12 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) &&
(CQE_STATUS(rsp_msg->cqe) == 0)) {
if (SQ_TYPE(rsp_msg->cqe)) {
- PDBG("%s QPID 0x%x ep %p disconnecting\n",
- __func__, qhp->wq.qpid, qhp->ep);
+ pr_debug("%s QPID 0x%x ep %p disconnecting\n",
+ __func__, qhp->wq.qpid, qhp->ep);
iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
} else {
- PDBG("%s post REQ_ERR AE QPID 0x%x\n", __func__,
- qhp->wq.qpid);
+ pr_debug("%s post REQ_ERR AE QPID 0x%x\n", __func__,
+ qhp->wq.qpid);
post_qp_event(rnicp, chp, rsp_msg,
IB_EVENT_QP_REQ_ERR, 0);
iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
@@ -218,7 +218,7 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
break;
default:
- printk(KERN_ERR MOD "Unknown T3 status 0x%x QPID 0x%x\n",
+ pr_err("Unknown T3 status 0x%x QPID 0x%x\n",
CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid);
post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index 1d04c872c9d5..12886b1b4b10 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -48,7 +48,7 @@ static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+ pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 6262dc035f3c..0cd0c1fa27d4 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -37,7 +37,7 @@
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/list.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/spinlock.h>
#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
@@ -62,7 +62,7 @@
#include "common.h"
static struct ib_ah *iwch_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
return ERR_PTR(-ENOSYS);
@@ -103,7 +103,7 @@ static int iwch_dealloc_ucontext(struct ib_ucontext *context)
struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
struct iwch_mm_entry *mm, *tmp;
- PDBG("%s context %p\n", __func__, context);
+ pr_debug("%s context %p\n", __func__, context);
list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
kfree(mm);
cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
@@ -117,7 +117,7 @@ static struct ib_ucontext *iwch_alloc_ucontext(struct ib_device *ibdev,
struct iwch_ucontext *context;
struct iwch_dev *rhp = to_iwch_dev(ibdev);
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
@@ -131,7 +131,7 @@ static int iwch_destroy_cq(struct ib_cq *ib_cq)
{
struct iwch_cq *chp;
- PDBG("%s ib_cq %p\n", __func__, ib_cq);
+ pr_debug("%s ib_cq %p\n", __func__, ib_cq);
chp = to_iwch_cq(ib_cq);
remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
@@ -157,7 +157,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
static int warned;
size_t resplen;
- PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
+ pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
if (attr->flags)
return ERR_PTR(-EINVAL);
@@ -227,8 +227,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
mm->addr = virt_to_phys(chp->cq.queue);
if (udata->outlen < sizeof uresp) {
if (!warned++)
- printk(KERN_WARNING MOD "Warning - "
- "downlevel libcxgb3 (non-fatal).\n");
+ pr_warn("Warning - downlevel libcxgb3 (non-fatal)\n");
mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
sizeof(struct t3_cqe));
resplen = sizeof(struct iwch_create_cq_resp_v0);
@@ -246,9 +245,9 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
}
insert_mmap(ucontext, mm);
}
- PDBG("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n",
- chp->cq.cqid, chp, (1 << chp->cq.size_log2),
- (unsigned long long) chp->cq.dma_addr);
+ pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n",
+ chp->cq.cqid, chp, (1 << chp->cq.size_log2),
+ (unsigned long long)chp->cq.dma_addr);
return &chp->ibcq;
}
@@ -259,7 +258,7 @@ static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
struct t3_cq oldcq, newcq;
int ret;
- PDBG("%s ib_cq %p cqe %d\n", __func__, cq, cqe);
+ pr_debug("%s ib_cq %p cqe %d\n", __func__, cq, cqe);
/* We don't downsize... */
if (cqe <= cq->cqe)
@@ -306,8 +305,7 @@ static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
oldcq.cqid = newcq.cqid;
ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq);
if (ret) {
- printk(KERN_ERR MOD "%s - cxio_destroy_cq failed %d\n",
- __func__, ret);
+ pr_err("%s - cxio_destroy_cq failed %d\n", __func__, ret);
}
/* add user hooks here */
@@ -342,12 +340,11 @@ static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
chp->cq.rptr = rptr;
} else
spin_lock_irqsave(&chp->lock, flag);
- PDBG("%s rptr 0x%x\n", __func__, chp->cq.rptr);
+ pr_debug("%s rptr 0x%x\n", __func__, chp->cq.rptr);
err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
spin_unlock_irqrestore(&chp->lock, flag);
if (err < 0)
- printk(KERN_ERR MOD "Error %d rearming CQID 0x%x\n", err,
- chp->cq.cqid);
+ pr_err("Error %d rearming CQID 0x%x\n", err, chp->cq.cqid);
if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
err = 0;
return err;
@@ -363,8 +360,8 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
struct iwch_ucontext *ucontext;
u64 addr;
- PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
- key, len);
+ pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
+ key, len);
if (vma->vm_start & (PAGE_SIZE-1)) {
return -EINVAL;
@@ -416,7 +413,7 @@ static int iwch_deallocate_pd(struct ib_pd *pd)
php = to_iwch_pd(pd);
rhp = php->rhp;
- PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
+ pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
kfree(php);
return 0;
@@ -430,7 +427,7 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
u32 pdid;
struct iwch_dev *rhp;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
rhp = (struct iwch_dev *) ibdev;
pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
if (!pdid)
@@ -448,7 +445,7 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
return ERR_PTR(-EFAULT);
}
}
- PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
+ pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
return &php->ibpd;
}
@@ -458,7 +455,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
struct iwch_mr *mhp;
u32 mmid;
- PDBG("%s ib_mr %p\n", __func__, ib_mr);
+ pr_debug("%s ib_mr %p\n", __func__, ib_mr);
mhp = to_iwch_mr(ib_mr);
kfree(mhp->pages);
@@ -472,7 +469,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
kfree((void *) (unsigned long) mhp->kva);
if (mhp->umem)
ib_umem_release(mhp->umem);
- PDBG("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
+ pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
kfree(mhp);
return 0;
}
@@ -487,13 +484,13 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
__be64 *page_list;
int shift = 26, npages, ret, i;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
/*
* T3 only supports 32 bits of size.
*/
if (sizeof(phys_addr_t) > 4) {
- pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
+ pr_warn_once("Cannot support dma_mrs on this platform\n");
return ERR_PTR(-ENOTSUPP);
}
@@ -518,8 +515,8 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
for (i = 0; i < npages; i++)
page_list[i] = cpu_to_be64((u64)i << shift);
- PDBG("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
- __func__, mask, shift, total_size, npages);
+ pr_debug("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
+ __func__, mask, shift, total_size, npages);
ret = iwch_alloc_pbl(mhp, npages);
if (ret) {
@@ -567,7 +564,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct iwch_mr *mhp;
struct iwch_reg_user_mr_resp uresp;
struct scatterlist *sg;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
php = to_iwch_pd(pd);
rhp = php->rhp;
@@ -584,7 +581,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(err);
}
- shift = ffs(mhp->umem->page_size) - 1;
+ shift = mhp->umem->page_shift;
n = mhp->umem->nmap;
@@ -604,7 +601,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
- mhp->umem->page_size * k);
+ (k << shift));
if (i == PAGE_SIZE / sizeof *pages) {
err = iwch_write_pbl(mhp, pages, i, n);
if (err)
@@ -637,8 +634,8 @@ pbl_done:
if (udata && !t3a_device(rhp)) {
uresp.pbl_addr = (mhp->attr.pbl_addr -
rhp->rdev.rnic_info.pbl_base) >> 3;
- PDBG("%s user resp pbl_addr 0x%x\n", __func__,
- uresp.pbl_addr);
+ pr_debug("%s user resp pbl_addr 0x%x\n", __func__,
+ uresp.pbl_addr);
if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
iwch_dereg_mr(&mhp->ibmr);
@@ -692,7 +689,7 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
kfree(mhp);
return ERR_PTR(-ENOMEM);
}
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmw);
}
@@ -707,7 +704,7 @@ static int iwch_dealloc_mw(struct ib_mw *mw)
mmid = (mw->rkey) >> 8;
cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
remove_handle(rhp, &rhp->mmidr, mmid);
- PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
+ pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
kfree(mhp);
return 0;
}
@@ -721,7 +718,7 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
struct iwch_mr *mhp;
u32 mmid;
u32 stag = 0;
- int ret = 0;
+ int ret = -ENOMEM;
if (mr_type != IB_MR_TYPE_MEM_REG ||
max_num_sg > T3_MAX_FASTREG_DEPTH)
@@ -734,10 +731,8 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
goto err;
mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
- if (!mhp->pages) {
- ret = -ENOMEM;
+ if (!mhp->pages)
goto pl_err;
- }
mhp->rhp = rhp;
ret = iwch_alloc_pbl(mhp, max_num_sg);
@@ -754,10 +749,11 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
mhp->attr.state = 1;
mmid = (stag) >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- if (insert_handle(rhp, &rhp->mmidr, mhp, mmid))
+ ret = insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+ if (ret)
goto err3;
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmr);
err3:
cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
@@ -818,8 +814,8 @@ static int iwch_destroy_qp(struct ib_qp *ib_qp)
cxio_destroy_qp(&rhp->rdev, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
- PDBG("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__,
- ib_qp, qhp->wq.qpid, qhp);
+ pr_debug("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__,
+ ib_qp, qhp->wq.qpid, qhp);
kfree(qhp);
return 0;
}
@@ -837,7 +833,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
int wqsize, sqsize, rqsize;
struct iwch_ucontext *ucontext;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
if (attrs->qp_type != IB_QPT_RC)
return ERR_PTR(-EINVAL);
php = to_iwch_pd(pd);
@@ -878,8 +874,8 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
wqsize = roundup_pow_of_two(rqsize +
roundup_pow_of_two(attrs->cap.max_send_wr * 2));
- PDBG("%s wqsize %d sqsize %d rqsize %d\n", __func__,
- wqsize, sqsize, rqsize);
+ pr_debug("%s wqsize %d sqsize %d rqsize %d\n", __func__,
+ wqsize, sqsize, rqsize);
qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
if (!qhp)
return ERR_PTR(-ENOMEM);
@@ -974,11 +970,10 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
}
qhp->ibqp.qp_num = qhp->wq.qpid;
init_timer(&(qhp->timer));
- PDBG("%s sq_num_entries %d, rq_num_entries %d "
- "qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n",
- __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
- qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr,
- 1 << qhp->wq.size_log2, qhp->wq.rq_addr);
+ pr_debug("%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n",
+ __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
+ qhp->wq.qpid, qhp, (unsigned long long)qhp->wq.dma_addr,
+ 1 << qhp->wq.size_log2, qhp->wq.rq_addr);
return &qhp->ibqp;
}
@@ -990,7 +985,7 @@ static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
enum iwch_qp_attr_mask mask = 0;
struct iwch_qp_attributes attrs;
- PDBG("%s ib_qp %p\n", __func__, ibqp);
+ pr_debug("%s ib_qp %p\n", __func__, ibqp);
/* iwarp does not support the RTR state */
if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
@@ -1023,20 +1018,20 @@ static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
void iwch_qp_add_ref(struct ib_qp *qp)
{
- PDBG("%s ib_qp %p\n", __func__, qp);
+ pr_debug("%s ib_qp %p\n", __func__, qp);
atomic_inc(&(to_iwch_qp(qp)->refcnt));
}
void iwch_qp_rem_ref(struct ib_qp *qp)
{
- PDBG("%s ib_qp %p\n", __func__, qp);
+ pr_debug("%s ib_qp %p\n", __func__, qp);
if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt)))
wake_up(&(to_iwch_qp(qp)->wait));
}
static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
{
- PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
+ pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
}
@@ -1044,7 +1039,7 @@ static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
static int iwch_query_pkey(struct ib_device *ibdev,
u8 port, u16 index, u16 * pkey)
{
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
*pkey = 0;
return 0;
}
@@ -1054,8 +1049,8 @@ static int iwch_query_gid(struct ib_device *ibdev, u8 port,
{
struct iwch_dev *dev;
- PDBG("%s ibdev %p, port %d, index %d, gid %p\n",
- __func__, ibdev, port, index, gid);
+ pr_debug("%s ibdev %p, port %d, index %d, gid %p\n",
+ __func__, ibdev, port, index, gid);
dev = to_iwch_dev(ibdev);
BUG_ON(port == 0 || port > 2);
memset(&(gid->raw[0]), 0, sizeof(gid->raw));
@@ -1090,7 +1085,7 @@ static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
struct iwch_dev *dev;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
if (uhw->inlen || uhw->outlen)
return -EINVAL;
@@ -1128,12 +1123,12 @@ static int iwch_query_port(struct ib_device *ibdev,
struct net_device *netdev;
struct in_device *inetdev;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
dev = to_iwch_dev(ibdev);
netdev = dev->rdev.port_info.lldevs[port-1];
- memset(props, 0, sizeof(struct ib_port_attr));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -1171,7 +1166,7 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
}
@@ -1183,7 +1178,7 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
struct ethtool_drvinfo info;
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.driver);
}
@@ -1193,7 +1188,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
iwch_dev->rdev.rnic_info.pdev->device);
}
@@ -1278,7 +1273,7 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
if (port != 0 || !stats)
return -ENOSYS;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
dev = to_iwch_dev(ibdev);
ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m);
if (ret)
@@ -1329,13 +1324,14 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = iwch_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
@@ -1347,7 +1343,7 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str,
struct ethtool_drvinfo info;
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
- PDBG("%s dev 0x%p\n", __func__, iwch_dev);
+ pr_debug("%s dev 0x%p\n", __func__, iwch_dev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
snprintf(str, str_len, "%s", info.fw_version);
}
@@ -1357,7 +1353,7 @@ int iwch_register_device(struct iwch_dev *dev)
int ret;
int i;
- PDBG("%s iwch_dev %p\n", __func__, dev);
+ pr_debug("%s iwch_dev %p\n", __func__, dev);
strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
@@ -1392,7 +1388,7 @@ int iwch_register_device(struct iwch_dev *dev)
memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
dev->ibdev.num_comp_vectors = 1;
- dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
+ dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev;
dev->ibdev.query_device = iwch_query_device;
dev->ibdev.query_port = iwch_query_port;
dev->ibdev.query_pkey = iwch_query_pkey;
@@ -1468,7 +1464,7 @@ void iwch_unregister_device(struct iwch_dev *dev)
{
int i;
- PDBG("%s iwch_dev %p\n", __func__, dev);
+ pr_debug("%s iwch_dev %p\n", __func__, dev);
for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
device_remove_file(&dev->ibdev.dev,
iwch_class_attributes[i]);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 252c464a09f6..9e216edec4c0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -217,8 +217,9 @@ static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext,
if (mm->key == key && mm->len == len) {
list_del_init(&mm->entry);
spin_unlock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- key, (unsigned long long) mm->addr, mm->len);
+ pr_debug("%s key 0x%x addr 0x%llx len %d\n",
+ __func__, key,
+ (unsigned long long)mm->addr, mm->len);
return mm;
}
}
@@ -230,8 +231,8 @@ static inline void insert_mmap(struct iwch_ucontext *ucontext,
struct iwch_mm_entry *mm)
{
spin_lock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- mm->key, (unsigned long long) mm->addr, mm->len);
+ pr_debug("%s key 0x%x addr 0x%llx len %d\n",
+ __func__, mm->key, (unsigned long long)mm->addr, mm->len);
list_add_tail(&mm->entry, &ucontext->mmaps);
spin_unlock(&ucontext->mmap_lock);
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index d939980a708f..7f633da0185d 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -208,30 +208,30 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
if (!mhp) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EIO;
}
if (!mhp->attr.state) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EIO;
}
if (mhp->attr.zbva) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EIO;
}
if (sg_list[i].addr < mhp->attr.va_fbo) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EINVAL;
}
if (sg_list[i].addr + ((u64) sg_list[i].length) <
sg_list[i].addr) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EINVAL;
}
if (sg_list[i].addr + ((u64) sg_list[i].length) >
mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EINVAL;
}
offset = sg_list[i].addr - mhp->attr.va_fbo;
@@ -427,8 +427,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt);
break;
default:
- PDBG("%s post of type=%d TBD!\n", __func__,
- wr->opcode);
+ pr_debug("%s post of type=%d TBD!\n", __func__,
+ wr->opcode);
err = -EINVAL;
}
if (err)
@@ -444,10 +444,10 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
0, t3_wr_flit_cnt,
(wr_cnt == 1) ? T3_SOPEOP : T3_SOP);
- PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
- __func__, (unsigned long long) wr->wr_id, idx,
- Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
- sqp->opcode);
+ pr_debug("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
+ __func__, (unsigned long long)wr->wr_id, idx,
+ Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
+ sqp->opcode);
wr = wr->next;
num_wrs--;
qhp->wq.wptr += wr_cnt;
@@ -508,9 +508,9 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
- PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
- "wqe %p \n", __func__, (unsigned long long) wr->wr_id,
- idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
+ pr_debug("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x wqe %p\n",
+ __func__, (unsigned long long)wr->wr_id,
+ idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
++(qhp->wq.rq_wptr);
++(qhp->wq.wptr);
wr = wr->next;
@@ -664,14 +664,13 @@ int iwch_post_zb_read(struct iwch_ep *ep)
struct sk_buff *skb;
u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
- PDBG("%s enter\n", __func__);
+ pr_debug("%s enter\n", __func__);
skb = alloc_skb(40, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR "%s cannot send zb_read!!\n", __func__);
+ pr_err("%s cannot send zb_read!!\n", __func__);
return -ENOMEM;
}
- wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr));
- memset(wqe, 0, sizeof(struct t3_rdma_read_wr));
+ wqe = skb_put_zero(skb, sizeof(struct t3_rdma_read_wr));
wqe->read.rdmaop = T3_READ_REQ;
wqe->read.reserved[0] = 0;
wqe->read.reserved[1] = 0;
@@ -696,14 +695,13 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
struct terminate_message *term;
struct sk_buff *skb;
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
skb = alloc_skb(40, GFP_ATOMIC);
if (!skb) {
- printk(KERN_ERR "%s cannot send TERMINATE!\n", __func__);
+ pr_err("%s cannot send TERMINATE!\n", __func__);
return -ENOMEM;
}
- wqe = (union t3_wr *)skb_put(skb, 40);
- memset(wqe, 0, 40);
+ wqe = skb_put_zero(skb, 40);
wqe->send.rdmaop = T3_TERMINATE;
/* immediate data length */
@@ -729,7 +727,7 @@ static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
int flushed;
- PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
+ pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
/* take a ref on the qhp since we must release the lock */
atomic_inc(&qhp->refcnt);
spin_unlock(&qhp->lock);
@@ -807,7 +805,7 @@ u16 iwch_rqes_posted(struct iwch_qp *qhp)
count++;
wqe++;
}
- PDBG("%s qhp %p count %u\n", __func__, qhp, count);
+ pr_debug("%s qhp %p count %u\n", __func__, qhp, count);
return count;
}
@@ -854,12 +852,12 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
} else
init_attr.rtr_type = 0;
init_attr.irs = qhp->ep->rcv_seq;
- PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
- "flags 0x%x qpcaps 0x%x\n", __func__,
- init_attr.rq_addr, init_attr.rq_size,
- init_attr.flags, init_attr.qpcaps);
+ pr_debug("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d flags 0x%x qpcaps 0x%x\n",
+ __func__,
+ init_attr.rq_addr, init_attr.rq_size,
+ init_attr.flags, init_attr.qpcaps);
ret = cxio_rdma_init(&rhp->rdev, &init_attr);
- PDBG("%s ret %d\n", __func__, ret);
+ pr_debug("%s ret %d\n", __func__, ret);
return ret;
}
@@ -877,9 +875,9 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
int free = 0;
struct iwch_ep *ep = NULL;
- PDBG("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__,
- qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
- (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
+ pr_debug("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__,
+ qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
+ (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
spin_lock_irqsave(&qhp->lock, flag);
@@ -961,7 +959,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
case IWCH_QP_STATE_RTS:
switch (attrs->next_state) {
case IWCH_QP_STATE_CLOSING:
- BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
+ BUG_ON(kref_read(&qhp->ep->com.kref) < 2);
qhp->attr.state = IWCH_QP_STATE_CLOSING;
if (!internal) {
abort=0;
@@ -1034,16 +1032,15 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
goto err;
break;
default:
- printk(KERN_ERR "%s in a bad state %d\n",
- __func__, qhp->attr.state);
+ pr_err("%s in a bad state %d\n", __func__, qhp->attr.state);
ret = -EINVAL;
goto err;
break;
}
goto out;
err:
- PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
- qhp->wq.qpid);
+ pr_debug("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
+ qhp->wq.qpid);
/* disassociate the LLP connection */
qhp->attr.llp_stream_handle = NULL;
@@ -1077,6 +1074,6 @@ out:
if (free)
put_ep(&ep->com);
- PDBG("%s exit state %d\n", __func__, qhp->attr.state);
+ pr_debug("%s exit state %d\n", __func__, qhp->attr.state);
return ret;
}
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 9398143d7c5e..e49b34c3b136 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -101,7 +101,7 @@ MODULE_PARM_DESC(enable_tcp_window_scaling,
int c4iw_debug;
module_param(c4iw_debug, int, 0644);
-MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)");
+MODULE_PARM_DESC(c4iw_debug, "obsolete");
static int peer2peer = 1;
module_param(peer2peer, int, 0644);
@@ -180,7 +180,7 @@ static void ref_qp(struct c4iw_ep *ep)
static void start_ep_timer(struct c4iw_ep *ep)
{
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
if (timer_pending(&ep->timer)) {
pr_err("%s timer already started! ep %p\n",
__func__, ep);
@@ -196,7 +196,7 @@ static void start_ep_timer(struct c4iw_ep *ep)
static int stop_ep_timer(struct c4iw_ep *ep)
{
- PDBG("%s ep %p stopping\n", __func__, ep);
+ pr_debug("%s ep %p stopping\n", __func__, ep);
del_timer_sync(&ep->timer);
if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
c4iw_put_ep(&ep->com);
@@ -212,7 +212,7 @@ static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
if (c4iw_fatal_error(rdev)) {
kfree_skb(skb);
- PDBG("%s - device in error state - dropping\n", __func__);
+ pr_debug("%s - device in error state - dropping\n", __func__);
return -EIO;
}
error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
@@ -229,7 +229,7 @@ int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
if (c4iw_fatal_error(rdev)) {
kfree_skb(skb);
- PDBG("%s - device in error state - dropping\n", __func__);
+ pr_debug("%s - device in error state - dropping\n", __func__);
return -EIO;
}
error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
@@ -263,10 +263,10 @@ static void set_emss(struct c4iw_ep *ep, u16 opt)
if (ep->emss < 128)
ep->emss = 128;
if (ep->emss & 7)
- PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n",
- TCPOPT_MSS_G(opt), ep->mss, ep->emss);
- PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, TCPOPT_MSS_G(opt),
- ep->mss, ep->emss);
+ pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n",
+ TCPOPT_MSS_G(opt), ep->mss, ep->emss);
+ pr_debug("%s mss_idx %u mss %u emss=%u\n", __func__, TCPOPT_MSS_G(opt),
+ ep->mss, ep->emss);
}
static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
@@ -287,7 +287,7 @@ static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
{
mutex_lock(&epc->mutex);
- PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
+ pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
__state_set(epc, new);
mutex_unlock(&epc->mutex);
return;
@@ -322,7 +322,7 @@ static void *alloc_ep(int size, gfp_t gfp)
mutex_init(&epc->mutex);
c4iw_init_wr_wait(&epc->wr_wait);
}
- PDBG("%s alloc ep %p\n", __func__, epc);
+ pr_debug("%s alloc ep %p\n", __func__, epc);
return epc;
}
@@ -384,7 +384,7 @@ void _c4iw_free_ep(struct kref *kref)
struct c4iw_ep *ep;
ep = container_of(kref, struct c4iw_ep, com.kref);
- PDBG("%s ep %p state %s\n", __func__, ep, states[ep->com.state]);
+ pr_debug("%s ep %p state %s\n", __func__, ep, states[ep->com.state]);
if (test_bit(QP_REFERENCED, &ep->com.flags))
deref_qp(ep);
if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
@@ -398,7 +398,8 @@ void _c4iw_free_ep(struct kref *kref)
(const u32 *)&sin6->sin6_addr.s6_addr,
1);
}
- cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
+ cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
+ ep->com.local_addr.ss_family);
dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t);
if (ep->mpa_skb)
@@ -467,7 +468,7 @@ static struct net_device *get_real_dev(struct net_device *egress_dev)
static void arp_failure_discard(void *handle, struct sk_buff *skb)
{
- pr_err(MOD "ARP failure\n");
+ pr_err("ARP failure\n");
kfree_skb(skb);
}
@@ -488,6 +489,7 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
release_ep_resources(ep);
+ kfree_skb(skb);
return 0;
}
@@ -498,6 +500,7 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
c4iw_put_ep(&ep->parent_ep->com);
release_ep_resources(ep);
+ kfree_skb(skb);
return 0;
}
@@ -528,7 +531,7 @@ static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb)
{
struct c4iw_ep *ep = handle;
- pr_err(MOD "ARP failure during accept - tid %u -dropping connection\n",
+ pr_err("ARP failure during accept - tid %u - dropping connection\n",
ep->hwtid);
__state_set(&ep->com, DEAD);
@@ -542,7 +545,7 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
{
struct c4iw_ep *ep = handle;
- printk(KERN_ERR MOD "ARP failure during connect\n");
+ pr_err("ARP failure during connect\n");
connect_reply_upcall(ep, -EHOSTUNREACH);
__state_set(&ep->com, DEAD);
if (ep->com.remote_addr.ss_family == AF_INET6) {
@@ -567,13 +570,15 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb)
struct c4iw_rdev *rdev = &ep->com.dev->rdev;
struct cpl_abort_req *req = cplhdr(skb);
- PDBG("%s rdev %p\n", __func__, rdev);
+ pr_debug("%s rdev %p\n", __func__, rdev);
req->cmd = CPL_ABORT_NO_RST;
+ skb_get(skb);
ret = c4iw_ofld_send(rdev, skb);
if (ret) {
__state_set(&ep->com, DEAD);
queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
- }
+ } else
+ kfree_skb(skb);
}
static int send_flowc(struct c4iw_ep *ep)
@@ -592,7 +597,7 @@ static int send_flowc(struct c4iw_ep *ep)
else
nparams = 9;
- flowc = (struct fw_flowc_wr *)__skb_put(skb, FLOWC_LEN);
+ flowc = __skb_put(skb, FLOWC_LEN);
flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
FW_FLOWC_WR_NPARAMS_V(nparams));
@@ -642,7 +647,7 @@ static int send_halfclose(struct c4iw_ep *ep)
struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (WARN_ON(!skb))
return -ENOMEM;
@@ -657,7 +662,7 @@ static int send_abort(struct c4iw_ep *ep)
u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (WARN_ON(!req_skb))
return -ENOMEM;
@@ -692,6 +697,10 @@ static int send_connect(struct c4iw_ep *ep)
int ret;
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
u32 isn = (prandom_u32() & ~7UL) - 1;
+ struct net_device *netdev;
+ u64 params;
+
+ netdev = ep->com.dev->rdev.lldi.ports[0];
switch (CHELSIO_CHIP_VERSION(adapter_type)) {
case CHELSIO_T4:
@@ -716,12 +725,11 @@ static int send_connect(struct c4iw_ep *ep)
roundup(sizev4, 16) :
roundup(sizev6, 16);
- PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid);
+ pr_debug("%s ep %p atid %u\n", __func__, ep, ep->atid);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
- __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
@@ -768,6 +776,8 @@ static int send_connect(struct c4iw_ep *ep)
opt2 |= T5_ISS_F;
}
+ params = cxgb4_select_ntuple(netdev, ep->l2t);
+
if (ep->com.remote_addr.ss_family == AF_INET6)
cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&la6->sin6_addr.s6_addr, 1);
@@ -777,18 +787,16 @@ static int send_connect(struct c4iw_ep *ep)
if (ep->com.remote_addr.ss_family == AF_INET) {
switch (CHELSIO_CHIP_VERSION(adapter_type)) {
case CHELSIO_T4:
- req = (struct cpl_act_open_req *)skb_put(skb, wrlen);
+ req = skb_put(skb, wrlen);
INIT_TP_WR(req, 0);
break;
case CHELSIO_T5:
- t5req = (struct cpl_t5_act_open_req *)skb_put(skb,
- wrlen);
+ t5req = skb_put(skb, wrlen);
INIT_TP_WR(t5req, 0);
req = (struct cpl_act_open_req *)t5req;
break;
case CHELSIO_T6:
- t6req = (struct cpl_t6_act_open_req *)skb_put(skb,
- wrlen);
+ t6req = skb_put(skb, wrlen);
INIT_TP_WR(t6req, 0);
req = (struct cpl_act_open_req *)t6req;
t5req = (struct cpl_t5_act_open_req *)t6req;
@@ -809,34 +817,36 @@ static int send_connect(struct c4iw_ep *ep)
req->opt0 = cpu_to_be64(opt0);
if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
- req->params = cpu_to_be32(cxgb4_select_ntuple(
- ep->com.dev->rdev.lldi.ports[0],
- ep->l2t));
+ req->params = cpu_to_be32(params);
req->opt2 = cpu_to_be32(opt2);
} else {
- t5req->params = cpu_to_be64(FILTER_TUPLE_V(
- cxgb4_select_ntuple(
- ep->com.dev->rdev.lldi.ports[0],
- ep->l2t)));
- t5req->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__, t5req->rsvd);
- t5req->opt2 = cpu_to_be32(opt2);
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ t5req->params =
+ cpu_to_be64(FILTER_TUPLE_V(params));
+ t5req->rsvd = cpu_to_be32(isn);
+ pr_debug("%s snd_isn %u\n", __func__, t5req->rsvd);
+ t5req->opt2 = cpu_to_be32(opt2);
+ } else {
+ t6req->params =
+ cpu_to_be64(FILTER_TUPLE_V(params));
+ t6req->rsvd = cpu_to_be32(isn);
+ pr_debug("%s snd_isn %u\n", __func__, t6req->rsvd);
+ t6req->opt2 = cpu_to_be32(opt2);
+ }
}
} else {
switch (CHELSIO_CHIP_VERSION(adapter_type)) {
case CHELSIO_T4:
- req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen);
+ req6 = skb_put(skb, wrlen);
INIT_TP_WR(req6, 0);
break;
case CHELSIO_T5:
- t5req6 = (struct cpl_t5_act_open_req6 *)skb_put(skb,
- wrlen);
+ t5req6 = skb_put(skb, wrlen);
INIT_TP_WR(t5req6, 0);
req6 = (struct cpl_act_open_req6 *)t5req6;
break;
case CHELSIO_T6:
- t6req6 = (struct cpl_t6_act_open_req6 *)skb_put(skb,
- wrlen);
+ t6req6 = skb_put(skb, wrlen);
INIT_TP_WR(t6req6, 0);
req6 = (struct cpl_act_open_req6 *)t6req6;
t5req6 = (struct cpl_t5_act_open_req6 *)t6req6;
@@ -859,18 +869,24 @@ static int send_connect(struct c4iw_ep *ep)
req6->opt0 = cpu_to_be64(opt0);
if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
- req6->params = cpu_to_be32(cxgb4_select_ntuple(
- ep->com.dev->rdev.lldi.ports[0],
- ep->l2t));
+ req6->params = cpu_to_be32(cxgb4_select_ntuple(netdev,
+ ep->l2t));
req6->opt2 = cpu_to_be32(opt2);
} else {
- t5req6->params = cpu_to_be64(FILTER_TUPLE_V(
- cxgb4_select_ntuple(
- ep->com.dev->rdev.lldi.ports[0],
- ep->l2t)));
- t5req6->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__, t5req6->rsvd);
- t5req6->opt2 = cpu_to_be32(opt2);
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ t5req6->params =
+ cpu_to_be64(FILTER_TUPLE_V(params));
+ t5req6->rsvd = cpu_to_be32(isn);
+ pr_debug("%s snd_isn %u\n", __func__, t5req6->rsvd);
+ t5req6->opt2 = cpu_to_be32(opt2);
+ } else {
+ t6req6->params =
+ cpu_to_be64(FILTER_TUPLE_V(params));
+ t6req6->rsvd = cpu_to_be32(isn);
+ pr_debug("%s snd_isn %u\n", __func__, t6req6->rsvd);
+ t6req6->opt2 = cpu_to_be32(opt2);
+ }
+
}
}
@@ -891,7 +907,8 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
struct mpa_message *mpa;
struct mpa_v2_conn_params mpa_v2_params;
- PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
+ pr_debug("%s ep %p tid %u pd_len %d\n",
+ __func__, ep, ep->hwtid, ep->plen);
BUG_ON(skb_cloned(skb));
@@ -906,8 +923,7 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
- memset(req, 0, wrlen);
+ req = skb_put_zero(skb, wrlen);
req->op_to_immdlen = cpu_to_be32(
FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
FW_WR_COMPL_F |
@@ -945,8 +961,8 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
if (mpa_rev_to_use == 2) {
mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
sizeof (struct mpa_v2_conn_params));
- PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
- ep->ord);
+ pr_debug("%s initiator ird %u ord %u\n", __func__, ep->ird,
+ ep->ord);
mpa_v2_params.ird = htons((u16)ep->ird);
mpa_v2_params.ord = htons((u16)ep->ord);
@@ -998,7 +1014,8 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
struct sk_buff *skb;
struct mpa_v2_conn_params mpa_v2_params;
- PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
+ pr_debug("%s ep %p tid %u pd_len %d\n",
+ __func__, ep, ep->hwtid, ep->plen);
mpalen = sizeof(*mpa) + plen;
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
@@ -1007,13 +1024,12 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ pr_err("%s - cannot alloc skb!\n", __func__);
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
- memset(req, 0, wrlen);
+ req = skb_put_zero(skb, wrlen);
req->op_to_immdlen = cpu_to_be32(
FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
FW_WR_COMPL_F |
@@ -1078,7 +1094,8 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
struct sk_buff *skb;
struct mpa_v2_conn_params mpa_v2_params;
- PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
+ pr_debug("%s ep %p tid %u pd_len %d\n",
+ __func__, ep, ep->hwtid, ep->plen);
mpalen = sizeof(*mpa) + plen;
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
@@ -1087,13 +1104,12 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ pr_err("%s - cannot alloc skb!\n", __func__);
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- req = (struct fw_ofld_tx_data_wr *) skb_put(skb, wrlen);
- memset(req, 0, wrlen);
+ req = skb_put_zero(skb, wrlen);
req->op_to_immdlen = cpu_to_be32(
FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
FW_WR_COMPL_F |
@@ -1169,15 +1185,15 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
ep = lookup_atid(t, atid);
- PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
- be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
+ pr_debug("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
+ be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
mutex_lock(&ep->com.mutex);
dst_confirm(ep->dst);
/* setup the hwtid for this connection */
ep->hwtid = tid;
- cxgb4_insert_tid(t, ep, tid);
+ cxgb4_insert_tid(t, ep, tid, ep->com.local_addr.ss_family);
insert_ep_tid(ep);
ep->snd_seq = be32_to_cpu(req->snd_isn);
@@ -1213,13 +1229,13 @@ static void close_complete_upcall(struct c4iw_ep *ep, int status)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
event.status = status;
if (ep->com.cm_id) {
- PDBG("close complete delivered ep %p cm_id %p tid %u\n",
- ep, ep->com.cm_id, ep->hwtid);
+ pr_debug("close complete delivered ep %p cm_id %p tid %u\n",
+ ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
deref_cm_id(&ep->com);
set_bit(CLOSE_UPCALL, &ep->com.history);
@@ -1230,12 +1246,12 @@ static void peer_close_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_DISCONNECT;
if (ep->com.cm_id) {
- PDBG("peer close delivered ep %p cm_id %p tid %u\n",
- ep, ep->com.cm_id, ep->hwtid);
+ pr_debug("peer close delivered ep %p cm_id %p tid %u\n",
+ ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
set_bit(DISCONN_UPCALL, &ep->com.history);
}
@@ -1245,13 +1261,13 @@ static void peer_abort_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
event.status = -ECONNRESET;
if (ep->com.cm_id) {
- PDBG("abort delivered ep %p cm_id %p tid %u\n", ep,
- ep->com.cm_id, ep->hwtid);
+ pr_debug("abort delivered ep %p cm_id %p tid %u\n", ep,
+ ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
deref_cm_id(&ep->com);
set_bit(ABORT_UPCALL, &ep->com.history);
@@ -1262,7 +1278,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status);
+ pr_debug("%s ep %p tid %u status %d\n",
+ __func__, ep, ep->hwtid, status);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REPLY;
event.status = status;
@@ -1291,8 +1308,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
}
}
- PDBG("%s ep %p tid %u status %d\n", __func__, ep,
- ep->hwtid, status);
+ pr_debug("%s ep %p tid %u status %d\n", __func__, ep,
+ ep->hwtid, status);
set_bit(CONN_RPL_UPCALL, &ep->com.history);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
@@ -1305,7 +1322,7 @@ static int connect_request_upcall(struct c4iw_ep *ep)
struct iw_cm_event event;
int ret;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REQUEST;
memcpy(&event.local_addr, &ep->com.local_addr,
@@ -1342,13 +1359,13 @@ static void established_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_ESTABLISHED;
event.ird = ep->ord;
event.ord = ep->ird;
if (ep->com.cm_id) {
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
set_bit(ESTAB_UPCALL, &ep->com.history);
}
@@ -1360,10 +1377,11 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
u32 credit_dack;
- PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
+ pr_debug("%s ep %p tid %u credits %u\n",
+ __func__, ep, ep->hwtid, credits);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
+ pr_err("update_rx_credits - cannot alloc skb!\n");
return 0;
}
@@ -1411,7 +1429,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
int err;
int disconnect = 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
/*
* If we get more than the supported amount of private data
@@ -1438,8 +1456,8 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
/* Validate MPA header. */
if (mpa->revision > mpa_rev) {
- printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
- " Received = %d\n", __func__, mpa_rev, mpa->revision);
+ pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
+ __func__, mpa_rev, mpa->revision);
err = -EPROTO;
goto err_stop_timer;
}
@@ -1509,8 +1527,9 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
MPA_V2_IRD_ORD_MASK;
resp_ord = ntohs(mpa_v2_params->ord) &
MPA_V2_IRD_ORD_MASK;
- PDBG("%s responder ird %u ord %u ep ird %u ord %u\n",
- __func__, resp_ird, resp_ord, ep->ird, ep->ord);
+ pr_debug("%s responder ird %u ord %u ep ird %u ord %u\n",
+ __func__,
+ resp_ird, resp_ord, ep->ird, ep->ord);
/*
* This is a double-check. Ideally, below checks are
@@ -1554,12 +1573,11 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
if (peer2peer)
ep->mpa_attr.p2p_type = p2p_type;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = "
- "%d\n", __func__, ep->mpa_attr.crc_enabled,
- ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
- ep->mpa_attr.p2p_type, p2p_type);
+ pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = %d\n",
+ __func__, ep->mpa_attr.crc_enabled,
+ ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
+ ep->mpa_attr.p2p_type, p2p_type);
/*
* If responder's RTR does not match with that of initiator, assign
@@ -1594,7 +1612,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* supports, generate TERM message
*/
if (rtr_mismatch) {
- printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
+ pr_err("%s: RTR mismatch, sending TERM\n", __func__);
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_NOMATCH_RTR;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
@@ -1613,8 +1631,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* initiator ORD.
*/
if (insuff_ird) {
- printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
- __func__);
+ pr_err("%s: Insufficient IRD, sending TERM\n", __func__);
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_INSUFF_IRD;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
@@ -1653,7 +1670,7 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
struct mpa_v2_conn_params *mpa_v2_params;
u16 plen;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
/*
* If we get more than the supported amount of private data
@@ -1662,7 +1679,7 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt))
goto err_stop_timer;
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
/*
* Copy the new data into our accumulation buffer.
@@ -1678,15 +1695,15 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (ep->mpa_pkt_len < sizeof(*mpa))
return 0;
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
mpa = (struct mpa_message *) ep->mpa_pkt;
/*
* Validate MPA Header.
*/
if (mpa->revision > mpa_rev) {
- printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
- " Received = %d\n", __func__, mpa_rev, mpa->revision);
+ pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
+ __func__, mpa_rev, mpa->revision);
goto err_stop_timer;
}
@@ -1741,8 +1758,8 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
MPA_V2_IRD_ORD_MASK;
ep->ord = min_t(u32, ep->ord,
cur_max_read_depth(ep->com.dev));
- PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
- ep->ord);
+ pr_debug("%s initiator ird %u ord %u\n",
+ __func__, ep->ird, ep->ord);
if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
if (peer2peer) {
if (ntohs(mpa_v2_params->ord) &
@@ -1759,11 +1776,11 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (peer2peer)
ep->mpa_attr.p2p_type = p2p_type;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
- ep->mpa_attr.p2p_type);
+ pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n",
+ __func__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
+ ep->mpa_attr.p2p_type);
__state_set(&ep->com, MPA_REQ_RCVD);
@@ -1799,7 +1816,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
- PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
+ pr_debug("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
skb_pull(skb, sizeof(*hdr));
skb_trim(skb, dlen);
mutex_lock(&ep->com.mutex);
@@ -1850,10 +1867,10 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
if (!ep) {
- printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n");
+ pr_warn("Abort rpl to freed endpoint\n");
return 0;
}
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
switch (ep->com.state) {
case ABORTING:
@@ -1862,8 +1879,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
release = 1;
break;
default:
- printk(KERN_ERR "%s ep %p state %d\n",
- __func__, ep, ep->com.state);
+ pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
break;
}
mutex_unlock(&ep->com.mutex);
@@ -1884,8 +1900,7 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
int win;
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
- req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req));
- memset(req, 0, sizeof(*req));
+ req = __skb_put_zero(skb, sizeof(*req));
req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR));
req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
@@ -1979,7 +1994,8 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
{
ep->snd_win = snd_win;
ep->rcv_win = rcv_win;
- PDBG("%s snd_win %d rcv_win %d\n", __func__, ep->snd_win, ep->rcv_win);
+ pr_debug("%s snd_win %d rcv_win %d\n",
+ __func__, ep->snd_win, ep->rcv_win);
}
#define ACT_OPEN_RETRY_COUNT 2
@@ -2084,7 +2100,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
int iptype;
__u8 *ra;
- PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
+ pr_debug("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
init_timer(&ep->timer);
c4iw_init_wr_wait(&ep->com.wr_wait);
@@ -2108,7 +2124,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
*/
ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
if (ep->atid == -1) {
- pr_err("%s - cannot alloc atid.\n", __func__);
+ pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM;
goto fail2;
}
@@ -2135,7 +2151,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
ra = (__u8 *)&raddr6->sin6_addr;
}
if (!ep->dst) {
- pr_err("%s - cannot find route.\n", __func__);
+ pr_err("%s - cannot find route\n", __func__);
err = -EHOSTUNREACH;
goto fail3;
}
@@ -2143,13 +2159,13 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
ep->com.dev->rdev.lldi.adapter_type,
ep->com.cm_id->tos);
if (err) {
- pr_err("%s - cannot alloc l2e.\n", __func__);
+ pr_err("%s - cannot alloc l2e\n", __func__);
goto fail4;
}
- PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
- __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
- ep->l2t->idx);
+ pr_debug("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
+ __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
+ ep->l2t->idx);
state_set(&ep->com, CONNECTING);
ep->tos = ep->com.cm_id->tos;
@@ -2199,12 +2215,12 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
- PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
- status, status2errno(status));
+ pr_debug("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
+ status, status2errno(status));
if (cxgb_is_neg_adv(status)) {
- PDBG("%s Connection problems for atid %u status %u (%s)\n",
- __func__, atid, status, neg_adv_str(status));
+ pr_debug("%s Connection problems for atid %u status %u (%s)\n",
+ __func__, atid, status, neg_adv_str(status));
ep->stats.connect_neg_adv++;
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.neg_adv++;
@@ -2281,7 +2297,8 @@ fail:
(const u32 *)&sin6->sin6_addr.s6_addr, 1);
}
if (status && act_open_has_tid(status))
- cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl));
+ cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl),
+ ep->com.local_addr.ss_family);
remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid);
cxgb4_free_atid(t, atid);
@@ -2299,11 +2316,11 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
if (!ep) {
- PDBG("%s stid %d lookup failure!\n", __func__, stid);
+ pr_debug("%s stid %d lookup failure!\n", __func__, stid);
goto out;
}
- PDBG("%s ep %p status %d error %d\n", __func__, ep,
- rpl->status, status2errno(rpl->status));
+ pr_debug("%s ep %p status %d error %d\n", __func__, ep,
+ rpl->status, status2errno(rpl->status));
c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
c4iw_put_ep(&ep->com);
out:
@@ -2316,7 +2333,7 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
unsigned int stid = GET_TID(rpl);
struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
c4iw_put_ep(&ep->com);
return 0;
@@ -2334,7 +2351,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
int win;
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
BUG_ON(skb_cloned(skb));
skb_get(skb);
@@ -2405,7 +2422,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
if (peer2peer)
isn += 4;
rpl5->iss = cpu_to_be32(isn);
- PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss));
+ pr_debug("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss));
}
rpl->opt0 = cpu_to_be64(opt0);
@@ -2418,7 +2435,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
{
- PDBG("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid);
+ pr_debug("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid);
BUG_ON(skb_cloned(skb));
skb_trim(skb, sizeof(struct cpl_tid_release));
release_tid(&dev->rdev, hwtid, skb);
@@ -2444,12 +2461,13 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
if (!parent_ep) {
- PDBG("%s connect request on invalid stid %d\n", __func__, stid);
+ pr_debug("%s connect request on invalid stid %d\n",
+ __func__, stid);
goto reject;
}
if (state_read(&parent_ep->com) != LISTEN) {
- PDBG("%s - listening ep not in LISTEN\n", __func__);
+ pr_debug("%s - listening ep not in LISTEN\n", __func__);
goto reject;
}
@@ -2458,18 +2476,18 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
/* Find output route */
if (iptype == 4) {
- PDBG("%s parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
- , __func__, parent_ep, hwtid,
- local_ip, peer_ip, ntohs(local_port),
- ntohs(peer_port), peer_mss);
+ pr_debug("%s parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
+ , __func__, parent_ep, hwtid,
+ local_ip, peer_ip, ntohs(local_port),
+ ntohs(peer_port), peer_mss);
dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
*(__be32 *)local_ip, *(__be32 *)peer_ip,
local_port, peer_port, tos);
} else {
- PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
- , __func__, parent_ep, hwtid,
- local_ip, peer_ip, ntohs(local_port),
- ntohs(peer_port), peer_mss);
+ pr_debug("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
+ , __func__, parent_ep, hwtid,
+ local_ip, peer_ip, ntohs(local_port),
+ ntohs(peer_port), peer_mss);
dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
local_ip, peer_ip, local_port, peer_port,
PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
@@ -2477,15 +2495,13 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
&parent_ep->com.local_addr)->sin6_scope_id);
}
if (!dst) {
- printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
- __func__);
+ pr_err("%s - failed to find dst entry!\n", __func__);
goto reject;
}
child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
if (!child_ep) {
- printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
- __func__);
+ pr_err("%s - failed to allocate ep entry!\n", __func__);
dst_release(dst);
goto reject;
}
@@ -2493,14 +2509,14 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
parent_ep->com.dev->rdev.lldi.adapter_type, tos);
if (err) {
- printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
- __func__);
+ pr_err("%s - failed to allocate l2t entry!\n", __func__);
dst_release(dst);
kfree(child_ep);
goto reject;
}
- hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) +
+ hdrs = ((iptype == 4) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) +
+ sizeof(struct tcphdr) +
((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
child_ep->mtu = peer_mss + hdrs;
@@ -2517,18 +2533,18 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
struct sockaddr_in *sin = (struct sockaddr_in *)
&child_ep->com.local_addr;
- sin->sin_family = PF_INET;
+ sin->sin_family = AF_INET;
sin->sin_port = local_port;
sin->sin_addr.s_addr = *(__be32 *)local_ip;
sin = (struct sockaddr_in *)&child_ep->com.local_addr;
- sin->sin_family = PF_INET;
+ sin->sin_family = AF_INET;
sin->sin_port = ((struct sockaddr_in *)
&parent_ep->com.local_addr)->sin_port;
sin->sin_addr.s_addr = *(__be32 *)local_ip;
sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
- sin->sin_family = PF_INET;
+ sin->sin_family = AF_INET;
sin->sin_port = peer_port;
sin->sin_addr.s_addr = *(__be32 *)peer_ip;
} else {
@@ -2555,11 +2571,12 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
child_ep->dst = dst;
child_ep->hwtid = hwtid;
- PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__,
- child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
+ pr_debug("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__,
+ child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
init_timer(&child_ep->timer);
- cxgb4_insert_tid(t, child_ep, hwtid);
+ cxgb4_insert_tid(t, child_ep, hwtid,
+ child_ep->com.local_addr.ss_family);
insert_ep_tid(child_ep);
if (accept_cr(child_ep, skb, req)) {
c4iw_put_ep(&parent_ep->com);
@@ -2591,12 +2608,12 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
int ret;
ep = get_ep_from_tid(dev, tid);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
ep->snd_seq = be32_to_cpu(req->snd_isn);
ep->rcv_seq = be32_to_cpu(req->rcv_isn);
- PDBG("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid,
- ntohs(req->tcp_opt));
+ pr_debug("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid,
+ ntohs(req->tcp_opt));
set_emss(ep, ntohs(req->tcp_opt));
@@ -2628,7 +2645,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
if (!ep)
return 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
dst_confirm(ep->dst);
set_bit(PEER_CLOSE, &ep->com.history);
@@ -2650,12 +2667,12 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
* in rdma connection migration (see c4iw_accept_cr()).
*/
__state_set(&ep->com, CLOSING);
- PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
+ pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
break;
case MPA_REP_SENT:
__state_set(&ep->com, CLOSING);
- PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
+ pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
break;
case FPDU_MODE:
@@ -2719,17 +2736,17 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
if (cxgb_is_neg_adv(req->status)) {
- PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
- __func__, ep->hwtid, req->status,
- neg_adv_str(req->status));
+ pr_debug("%s Negative advice on abort- tid %u status %d (%s)\n",
+ __func__, ep->hwtid, req->status,
+ neg_adv_str(req->status));
ep->stats.abort_neg_adv++;
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.neg_adv++;
mutex_unlock(&dev->rdev.stats.lock);
goto deref_ep;
}
- PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
- ep->com.state);
+ pr_debug("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
+ ep->com.state);
set_bit(PEER_ABORT, &ep->com.history);
/*
@@ -2761,8 +2778,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
* do some housekeeping so as to re-initiate the
* connection
*/
- PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__,
- mpa_rev);
+ pr_debug("%s: mpa_rev=%d. Retrying with mpav1\n",
+ __func__, mpa_rev);
ep->retry_with_mpa_v1 = 1;
}
break;
@@ -2781,16 +2798,14 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
&attrs, 1);
if (ret)
- printk(KERN_ERR MOD
- "%s - qp <- error failed!\n",
- __func__);
+ pr_err("%s - qp <- error failed!\n", __func__);
}
peer_abort_upcall(ep);
break;
case ABORTING:
break;
case DEAD:
- PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
+ pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
mutex_unlock(&ep->com.mutex);
goto deref_ep;
default:
@@ -2829,7 +2844,8 @@ out:
1);
}
remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid);
- cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
+ cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
+ ep->com.local_addr.ss_family);
dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t);
c4iw_reconnect(ep);
@@ -2854,7 +2870,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
if (!ep)
return 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
BUG_ON(!ep);
/* The cm_id may be null if we failed to connect */
@@ -2902,13 +2918,13 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
BUG_ON(!ep);
if (ep && ep->com.qp) {
- printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
- ep->com.qp->wq.sq.qid);
+ pr_warn("TERM received tid %u qpid %u\n",
+ tid, ep->com.qp->wq.sq.qid);
attrs.next_state = C4IW_QP_STATE_TERMINATE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
} else
- printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
+ pr_warn("TERM received tid %u no ep/qp\n", tid);
c4iw_put_ep(&ep->com);
return 0;
@@ -2930,18 +2946,19 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
- PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
+ pr_debug("%s ep %p tid %u credits %u\n",
+ __func__, ep, ep->hwtid, credits);
if (credits == 0) {
- PDBG("%s 0 credit ack ep %p tid %u state %u\n",
- __func__, ep, ep->hwtid, state_read(&ep->com));
+ pr_debug("%s 0 credit ack ep %p tid %u state %u\n",
+ __func__, ep, ep->hwtid, state_read(&ep->com));
goto out;
}
dst_confirm(ep->dst);
if (ep->mpa_skb) {
- PDBG("%s last streaming msg ack ep %p tid %u state %u "
- "initiator %u freeing skb\n", __func__, ep, ep->hwtid,
- state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
+ pr_debug("%s last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n",
+ __func__, ep, ep->hwtid,
+ state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
mutex_lock(&ep->com.mutex);
kfree_skb(ep->mpa_skb);
ep->mpa_skb = NULL;
@@ -2959,7 +2976,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
int abort;
struct c4iw_ep *ep = to_ep(cm_id);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
if (ep->com.state != MPA_REQ_RCVD) {
@@ -2990,7 +3007,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
int abort = 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
if (ep->com.state != MPA_REQ_RCVD) {
@@ -3043,7 +3060,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->ird = 1;
}
- PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
+ pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
ep->com.cm_id = cm_id;
ref_cm_id(&ep->com);
@@ -3172,7 +3189,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ pr_err("%s - cannot alloc ep\n", __func__);
err = -ENOMEM;
goto out;
}
@@ -3199,20 +3216,20 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->com.dev = dev;
ep->com.qp = get_qhp(dev, conn_param->qpn);
if (!ep->com.qp) {
- PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
+ pr_debug("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
err = -EINVAL;
goto fail2;
}
ref_qp(ep);
- PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
- ep->com.qp, cm_id);
+ pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
+ ep->com.qp, cm_id);
/*
* Allocate an active TID to initiate a TCP connection.
*/
ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
if (ep->atid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
+ pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM;
goto fail2;
}
@@ -3242,9 +3259,9 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
/* find a route */
- PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
- __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
- ra, ntohs(raddr->sin_port));
+ pr_debug("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
+ __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
+ ra, ntohs(raddr->sin_port));
ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
laddr->sin_addr.s_addr,
raddr->sin_addr.s_addr,
@@ -3264,10 +3281,10 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
/* find a route */
- PDBG("%s saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
- __func__, laddr6->sin6_addr.s6_addr,
- ntohs(laddr6->sin6_port),
- raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
+ pr_debug("%s saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
+ __func__, laddr6->sin6_addr.s6_addr,
+ ntohs(laddr6->sin6_port),
+ raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
laddr6->sin6_addr.s6_addr,
raddr6->sin6_addr.s6_addr,
@@ -3276,7 +3293,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
raddr6->sin6_scope_id);
}
if (!ep->dst) {
- printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
+ pr_err("%s - cannot find route\n", __func__);
err = -EHOSTUNREACH;
goto fail3;
}
@@ -3284,13 +3301,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
if (err) {
- printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
+ pr_err("%s - cannot alloc l2e\n", __func__);
goto fail4;
}
- PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
- __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
- ep->l2t->idx);
+ pr_debug("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
+ __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
+ ep->l2t->idx);
state_set(&ep->com, CONNECTING);
ep->tos = cm_id->tos;
@@ -3398,12 +3415,12 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ pr_err("%s - cannot alloc ep\n", __func__);
err = -ENOMEM;
goto fail1;
}
skb_queue_head_init(&ep->com.ep_skb_list);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
ep->com.cm_id = cm_id;
ref_cm_id(&ep->com);
ep->com.dev = dev;
@@ -3423,7 +3440,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->m_local_addr.ss_family, ep);
if (ep->stid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
+ pr_err("%s - cannot alloc stid\n", __func__);
err = -ENOMEM;
goto fail2;
}
@@ -3457,7 +3474,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
int err;
struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
might_sleep();
state_set(&ep->com, DEAD);
@@ -3498,8 +3515,8 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
mutex_lock(&ep->com.mutex);
- PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
- states[ep->com.state], abrupt);
+ pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep,
+ states[ep->com.state], abrupt);
/*
* Ref the ep here in case we have fatal errors causing the
@@ -3552,8 +3569,8 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
case MORIBUND:
case ABORTING:
case DEAD:
- PDBG("%s ignoring disconnect ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s ignoring disconnect ep %p state %u\n",
+ __func__, ep, ep->com.state);
break;
default:
BUG();
@@ -3584,8 +3601,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
C4IW_QP_ATTR_NEXT_STATE,
&attrs, 1);
if (ret)
- pr_err(MOD
- "%s - qp <- error failed!\n",
+ pr_err("%s - qp <- error failed!\n",
__func__);
}
fatal = 1;
@@ -3658,7 +3674,7 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
rpl_skb = (struct sk_buff *)(unsigned long)req->cookie;
BUG_ON(!rpl_skb);
if (req->retval) {
- PDBG("%s passive open failure %d\n", __func__, req->retval);
+ pr_debug("%s passive open failure %d\n", __func__, req->retval);
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.pas_ofld_conn_fails++;
mutex_unlock(&dev->rdev.stats.lock);
@@ -3732,9 +3748,9 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
*/
memset(&tmp_opt, 0, sizeof(tmp_opt));
tcp_clear_options(&tmp_opt);
- tcp_parse_options(skb, &tmp_opt, 0, NULL);
+ tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
- req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req));
+ req = __skb_push(skb, sizeof(*req));
memset(req, 0, sizeof(*req));
req->l2info = cpu_to_be16(SYN_INTF_V(intf) |
SYN_MAC_IDX_V(RX_MACIDX_G(
@@ -3784,8 +3800,9 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
int ret;
req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
- req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req));
- memset(req, 0, sizeof(*req));
+ if (!req_skb)
+ return;
+ req = __skb_put_zero(req_skb, sizeof(*req));
req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F);
req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F);
@@ -3874,7 +3891,8 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
if (!lep) {
- PDBG("%s connect request on invalid stid %d\n", __func__, stid);
+ pr_debug("%s connect request on invalid stid %d\n",
+ __func__, stid);
goto reject;
}
@@ -3911,9 +3929,9 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
skb_set_transport_header(skb, (void *)tcph - (void *)rss);
skb_get(skb);
- PDBG("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__,
- ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
- ntohs(tcph->source), iph->tos);
+ pr_debug("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__,
+ ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
+ ntohs(tcph->source), iph->tos);
dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
iph->daddr, iph->saddr, tcph->dest,
@@ -4010,8 +4028,8 @@ static void process_timeout(struct c4iw_ep *ep)
int abort = 1;
mutex_lock(&ep->com.mutex);
- PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
- ep->com.state);
+ pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
+ ep->com.state);
set_bit(TIMEDOUT, &ep->com.history);
switch (ep->com.state) {
case MPA_REQ_SENT:
@@ -4141,8 +4159,8 @@ static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
if (rpl->status != CPL_ERR_NONE) {
- printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
- "for tid %u\n", rpl->status, GET_TID(rpl));
+ pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
+ rpl->status, GET_TID(rpl));
}
kfree_skb(skb);
return 0;
@@ -4154,13 +4172,13 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
struct c4iw_wr_wait *wr_waitp;
int ret;
- PDBG("%s type %u\n", __func__, rpl->type);
+ pr_debug("%s type %u\n", __func__, rpl->type);
switch (rpl->type) {
case FW6_TYPE_WR_RPL:
ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
- PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
+ pr_debug("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
if (wr_waitp)
c4iw_wake_up(wr_waitp, ret ? -ret : 0);
kfree_skb(skb);
@@ -4170,8 +4188,8 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
sched(dev, skb);
break;
default:
- printk(KERN_ERR MOD "%s unexpected fw6 msg type %u\n", __func__,
- rpl->type);
+ pr_err("%s unexpected fw6 msg type %u\n",
+ __func__, rpl->type);
kfree_skb(skb);
break;
}
@@ -4187,19 +4205,18 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
/* This EP will be dereferenced in peer_abort() */
if (!ep) {
- printk(KERN_WARNING MOD
- "Abort on non-existent endpoint, tid %d\n", tid);
+ pr_warn("Abort on non-existent endpoint, tid %d\n", tid);
kfree_skb(skb);
return 0;
}
if (cxgb_is_neg_adv(req->status)) {
- PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
- __func__, ep->hwtid, req->status,
- neg_adv_str(req->status));
+ pr_debug("%s Negative advice on abort- tid %u status %d (%s)\n",
+ __func__, ep->hwtid, req->status,
+ neg_adv_str(req->status));
goto out;
}
- PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
- ep->com.state);
+ pr_debug("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
+ ep->com.state);
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
out:
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index bec82a600d77..be07da1997e6 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -44,8 +44,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
wr_len = sizeof *res_wr + sizeof *res;
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
- res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
- memset(res_wr, 0, wr_len);
+ res_wr = __skb_put_zero(skb, wr_len);
res_wr->op_nres = cpu_to_be32(
FW_WR_OP_V(FW_RI_RES_WR) |
FW_RI_RES_WR_NRES_V(1) |
@@ -114,8 +113,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
}
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
- res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
- memset(res_wr, 0, wr_len);
+ res_wr = __skb_put_zero(skb, wr_len);
res_wr->op_nres = cpu_to_be32(
FW_WR_OP_V(FW_RI_RES_WR) |
FW_RI_RES_WR_NRES_V(1) |
@@ -146,7 +144,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
ret = c4iw_ofld_send(rdev, skb);
if (ret)
goto err4;
- PDBG("%s wait_event wr_wait %p\n", __func__, &wr_wait);
+ pr_debug("%s wait_event wr_wait %p\n", __func__, &wr_wait);
ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
if (ret)
goto err4;
@@ -159,7 +157,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
&cq->bar2_qid,
user ? &cq->bar2_pa : NULL);
if (user && !cq->bar2_pa) {
- pr_warn(MOD "%s: cqid %u not in BAR2 range.\n",
+ pr_warn("%s: cqid %u not in BAR2 range\n",
pci_name(rdev->lldi.pdev), cq->cqid);
ret = -EINVAL;
goto err4;
@@ -180,8 +178,8 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
{
struct t4_cqe cqe;
- PDBG("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
- wq, cq, cq->sw_cidx, cq->sw_pidx);
+ pr_debug("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
+ wq, cq, cq->sw_cidx, cq->sw_pidx);
memset(&cqe, 0, sizeof(cqe));
cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
CQE_OPCODE_V(FW_RI_SEND) |
@@ -199,8 +197,8 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
int in_use = wq->rq.in_use - count;
BUG_ON(in_use < 0);
- PDBG("%s wq %p cq %p rq.in_use %u skip count %u\n", __func__,
- wq, cq, wq->rq.in_use, count);
+ pr_debug("%s wq %p cq %p rq.in_use %u skip count %u\n", __func__,
+ wq, cq, wq->rq.in_use, count);
while (in_use--) {
insert_recv_cqe(wq, cq);
flushed++;
@@ -213,8 +211,8 @@ static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
{
struct t4_cqe cqe;
- PDBG("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
- wq, cq, cq->sw_cidx, cq->sw_pidx);
+ pr_debug("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
+ wq, cq, cq->sw_cidx, cq->sw_pidx);
memset(&cqe, 0, sizeof(cqe));
cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
CQE_OPCODE_V(swcqe->opcode) |
@@ -283,8 +281,8 @@ static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
/*
* Insert this completed cqe into the swcq.
*/
- PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n",
- __func__, cidx, cq->sw_pidx);
+ pr_debug("%s moving cqe into swcq sq idx %u cq idx %u\n",
+ __func__, cidx, cq->sw_pidx);
swsqe->cqe.header |= htonl(CQE_SWCQE_V(1));
cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
t4_swcq_produce(cq);
@@ -339,7 +337,7 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
struct t4_swsqe *swsqe;
int ret;
- PDBG("%s cqid 0x%x\n", __func__, chp->cq.cqid);
+ pr_debug("%s cqid 0x%x\n", __func__, chp->cq.cqid);
ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
/*
@@ -432,7 +430,7 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
u32 ptr;
*count = 0;
- PDBG("%s count zero %d\n", __func__, *count);
+ pr_debug("%s count zero %d\n", __func__, *count);
ptr = cq->sw_cidx;
while (ptr != cq->sw_pidx) {
cqe = &cq->sw_queue[ptr];
@@ -442,7 +440,7 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
if (++ptr == cq->size)
ptr = 0;
}
- PDBG("%s cq %p count %d\n", __func__, cq, *count);
+ pr_debug("%s cq %p count %d\n", __func__, cq, *count);
}
/*
@@ -473,12 +471,11 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
if (ret)
return ret;
- PDBG("%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x"
- " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- __func__, CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe),
- CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe),
- CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
- CQE_WRID_LOW(hw_cqe));
+ pr_debug("%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+ __func__, CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe),
+ CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe),
+ CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
+ CQE_WRID_LOW(hw_cqe));
/*
* skip cqe's not affiliated with a QP.
@@ -606,8 +603,8 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) {
struct t4_swsqe *swsqe;
- PDBG("%s out of order completion going in sw_sq at idx %u\n",
- __func__, CQE_WRID_SQ_IDX(hw_cqe));
+ pr_debug("%s out of order completion going in sw_sq at idx %u\n",
+ __func__, CQE_WRID_SQ_IDX(hw_cqe));
swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
swsqe->cqe = *hw_cqe;
swsqe->complete = 1;
@@ -641,13 +638,13 @@ proc_cqe:
BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size);
wq->sq.cidx = (uint16_t)idx;
- PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx);
+ pr_debug("%s completing sq idx %u\n", __func__, wq->sq.cidx);
*cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
if (c4iw_wr_log)
c4iw_log_wr_stats(wq, hw_cqe);
t4_sq_consume(wq);
} else {
- PDBG("%s completing rq idx %u\n", __func__, wq->rq.cidx);
+ pr_debug("%s completing rq idx %u\n", __func__, wq->rq.cidx);
*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
BUG_ON(t4_rq_empty(wq));
if (c4iw_wr_log)
@@ -664,12 +661,12 @@ flush_wq:
skip_cqe:
if (SW_CQE(hw_cqe)) {
- PDBG("%s cq %p cqid 0x%x skip sw cqe cidx %u\n",
- __func__, cq, cq->cqid, cq->sw_cidx);
+ pr_debug("%s cq %p cqid 0x%x skip sw cqe cidx %u\n",
+ __func__, cq, cq->cqid, cq->sw_cidx);
t4_swcq_consume(cq);
} else {
- PDBG("%s cq %p cqid 0x%x skip hw cqe cidx %u\n",
- __func__, cq, cq->cqid, cq->cidx);
+ pr_debug("%s cq %p cqid 0x%x skip hw cqe cidx %u\n",
+ __func__, cq, cq->cqid, cq->cidx);
t4_hwcq_consume(cq);
}
return ret;
@@ -715,10 +712,12 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
wc->vendor_err = CQE_STATUS(&cqe);
wc->wc_flags = 0;
- PDBG("%s qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x "
- "lo 0x%x cookie 0x%llx\n", __func__, CQE_QPID(&cqe),
- CQE_TYPE(&cqe), CQE_OPCODE(&cqe), CQE_STATUS(&cqe), CQE_LEN(&cqe),
- CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe), (unsigned long long)cookie);
+ pr_debug("%s qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
+ __func__, CQE_QPID(&cqe),
+ CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
+ CQE_STATUS(&cqe), CQE_LEN(&cqe),
+ CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe),
+ (unsigned long long)cookie);
if (CQE_TYPE(&cqe) == 0) {
if (!CQE_STATUS(&cqe))
@@ -766,8 +765,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
wc->opcode = IB_WC_SEND;
break;
default:
- printk(KERN_ERR MOD "Unexpected opcode %d "
- "in the CQE received for QPID=0x%0x\n",
+ pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
CQE_OPCODE(&cqe), CQE_QPID(&cqe));
ret = -EINVAL;
goto out;
@@ -822,8 +820,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
wc->status = IB_WC_WR_FLUSH_ERR;
break;
default:
- printk(KERN_ERR MOD
- "Unexpected cqe_status 0x%x for QPID=0x%0x\n",
+ pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n",
CQE_STATUS(&cqe), CQE_QPID(&cqe));
wc->status = IB_WC_FATAL_ERR;
}
@@ -860,7 +857,7 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq)
struct c4iw_cq *chp;
struct c4iw_ucontext *ucontext;
- PDBG("%s ib_cq %p\n", __func__, ib_cq);
+ pr_debug("%s ib_cq %p\n", __func__, ib_cq);
chp = to_c4iw_cq(ib_cq);
remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
@@ -892,7 +889,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
size_t memsize, hwentries;
struct c4iw_mm_entry *mm, *mm2;
- PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
+ pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
if (attr->flags)
return ERR_PTR(-EINVAL);
@@ -966,6 +963,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
goto err3;
if (ucontext) {
+ ret = -ENOMEM;
mm = kmalloc(sizeof *mm, GFP_KERNEL);
if (!mm)
goto err4;
@@ -998,9 +996,9 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
mm2->len = PAGE_SIZE;
insert_mmap(ucontext, mm2);
}
- PDBG("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n",
- __func__, chp->cq.cqid, chp, chp->cq.size,
- chp->cq.memsize, (unsigned long long) chp->cq.dma_addr);
+ pr_debug("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n",
+ __func__, chp->cq.cqid, chp, chp->cq.size,
+ chp->cq.memsize, (unsigned long long)chp->cq.dma_addr);
return &chp->ibcq;
err6:
kfree(mm2);
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 40c0e7b9fc6e..ae0b79aeea2e 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -214,6 +214,52 @@ static const struct file_operations wr_log_debugfs_fops = {
.write = wr_log_clear,
};
+static struct sockaddr_in zero_sin = {
+ .sin_family = AF_INET,
+};
+
+static struct sockaddr_in6 zero_sin6 = {
+ .sin6_family = AF_INET6,
+};
+
+static void set_ep_sin_addrs(struct c4iw_ep *ep,
+ struct sockaddr_in **lsin,
+ struct sockaddr_in **rsin,
+ struct sockaddr_in **m_lsin,
+ struct sockaddr_in **m_rsin)
+{
+ struct iw_cm_id *id = ep->com.cm_id;
+
+ *lsin = (struct sockaddr_in *)&ep->com.local_addr;
+ *rsin = (struct sockaddr_in *)&ep->com.remote_addr;
+ if (id) {
+ *m_lsin = (struct sockaddr_in *)&id->m_local_addr;
+ *m_rsin = (struct sockaddr_in *)&id->m_remote_addr;
+ } else {
+ *m_lsin = &zero_sin;
+ *m_rsin = &zero_sin;
+ }
+}
+
+static void set_ep_sin6_addrs(struct c4iw_ep *ep,
+ struct sockaddr_in6 **lsin6,
+ struct sockaddr_in6 **rsin6,
+ struct sockaddr_in6 **m_lsin6,
+ struct sockaddr_in6 **m_rsin6)
+{
+ struct iw_cm_id *id = ep->com.cm_id;
+
+ *lsin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+ *rsin6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
+ if (id) {
+ *m_lsin6 = (struct sockaddr_in6 *)&id->m_local_addr;
+ *m_rsin6 = (struct sockaddr_in6 *)&id->m_remote_addr;
+ } else {
+ *m_lsin6 = &zero_sin6;
+ *m_rsin6 = &zero_sin6;
+ }
+}
+
static int dump_qp(int id, void *p, void *data)
{
struct c4iw_qp *qp = p;
@@ -229,16 +275,15 @@ static int dump_qp(int id, void *p, void *data)
return 1;
if (qp->ep) {
- if (qp->ep->com.local_addr.ss_family == AF_INET) {
- struct sockaddr_in *lsin = (struct sockaddr_in *)
- &qp->ep->com.cm_id->local_addr;
- struct sockaddr_in *rsin = (struct sockaddr_in *)
- &qp->ep->com.cm_id->remote_addr;
- struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
- &qp->ep->com.cm_id->m_local_addr;
- struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
- &qp->ep->com.cm_id->m_remote_addr;
+ struct c4iw_ep *ep = qp->ep;
+
+ if (ep->com.local_addr.ss_family == AF_INET) {
+ struct sockaddr_in *lsin;
+ struct sockaddr_in *rsin;
+ struct sockaddr_in *m_lsin;
+ struct sockaddr_in *m_rsin;
+ set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin);
cc = snprintf(qpd->buf + qpd->pos, space,
"rc qp sq id %u rq id %u state %u "
"onchip %u ep tid %u state %u "
@@ -246,23 +291,19 @@ static int dump_qp(int id, void *p, void *data)
qp->wq.sq.qid, qp->wq.rq.qid,
(int)qp->attr.state,
qp->wq.sq.flags & T4_SQ_ONCHIP,
- qp->ep->hwtid, (int)qp->ep->com.state,
+ ep->hwtid, (int)ep->com.state,
&lsin->sin_addr, ntohs(lsin->sin_port),
- ntohs(mapped_lsin->sin_port),
+ ntohs(m_lsin->sin_port),
&rsin->sin_addr, ntohs(rsin->sin_port),
- ntohs(mapped_rsin->sin_port));
+ ntohs(m_rsin->sin_port));
} else {
- struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
- &qp->ep->com.cm_id->local_addr;
- struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
- &qp->ep->com.cm_id->remote_addr;
- struct sockaddr_in6 *mapped_lsin6 =
- (struct sockaddr_in6 *)
- &qp->ep->com.cm_id->m_local_addr;
- struct sockaddr_in6 *mapped_rsin6 =
- (struct sockaddr_in6 *)
- &qp->ep->com.cm_id->m_remote_addr;
+ struct sockaddr_in6 *lsin6;
+ struct sockaddr_in6 *rsin6;
+ struct sockaddr_in6 *m_lsin6;
+ struct sockaddr_in6 *m_rsin6;
+ set_ep_sin6_addrs(ep, &lsin6, &rsin6, &m_lsin6,
+ &m_rsin6);
cc = snprintf(qpd->buf + qpd->pos, space,
"rc qp sq id %u rq id %u state %u "
"onchip %u ep tid %u state %u "
@@ -270,13 +311,13 @@ static int dump_qp(int id, void *p, void *data)
qp->wq.sq.qid, qp->wq.rq.qid,
(int)qp->attr.state,
qp->wq.sq.flags & T4_SQ_ONCHIP,
- qp->ep->hwtid, (int)qp->ep->com.state,
+ ep->hwtid, (int)ep->com.state,
&lsin6->sin6_addr,
ntohs(lsin6->sin6_port),
- ntohs(mapped_lsin6->sin6_port),
+ ntohs(m_lsin6->sin6_port),
&rsin6->sin6_addr,
ntohs(rsin6->sin6_port),
- ntohs(mapped_rsin6->sin6_port));
+ ntohs(m_rsin6->sin6_port));
}
} else
cc = snprintf(qpd->buf + qpd->pos, space,
@@ -293,7 +334,7 @@ static int qp_release(struct inode *inode, struct file *file)
{
struct c4iw_debugfs_data *qpd = file->private_data;
if (!qpd) {
- printk(KERN_INFO "%s null qpd?\n", __func__);
+ pr_info("%s null qpd?\n", __func__);
return 0;
}
vfree(qpd->buf);
@@ -381,7 +422,7 @@ static int stag_release(struct inode *inode, struct file *file)
{
struct c4iw_debugfs_data *stagd = file->private_data;
if (!stagd) {
- printk(KERN_INFO "%s null stagd?\n", __func__);
+ pr_info("%s null stagd?\n", __func__);
return 0;
}
vfree(stagd->buf);
@@ -533,15 +574,12 @@ static int dump_ep(int id, void *p, void *data)
return 1;
if (ep->com.local_addr.ss_family == AF_INET) {
- struct sockaddr_in *lsin = (struct sockaddr_in *)
- &ep->com.cm_id->local_addr;
- struct sockaddr_in *rsin = (struct sockaddr_in *)
- &ep->com.cm_id->remote_addr;
- struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
- &ep->com.cm_id->m_local_addr;
- struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
- &ep->com.cm_id->m_remote_addr;
+ struct sockaddr_in *lsin;
+ struct sockaddr_in *rsin;
+ struct sockaddr_in *m_lsin;
+ struct sockaddr_in *m_rsin;
+ set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin);
cc = snprintf(epd->buf + epd->pos, space,
"ep %p cm_id %p qp %p state %d flags 0x%lx "
"history 0x%lx hwtid %d atid %d "
@@ -553,19 +591,16 @@ static int dump_ep(int id, void *p, void *data)
ep->stats.connect_neg_adv,
ep->stats.abort_neg_adv,
&lsin->sin_addr, ntohs(lsin->sin_port),
- ntohs(mapped_lsin->sin_port),
+ ntohs(m_lsin->sin_port),
&rsin->sin_addr, ntohs(rsin->sin_port),
- ntohs(mapped_rsin->sin_port));
+ ntohs(m_rsin->sin_port));
} else {
- struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
- &ep->com.cm_id->local_addr;
- struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
- &ep->com.cm_id->remote_addr;
- struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
- &ep->com.cm_id->m_local_addr;
- struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *)
- &ep->com.cm_id->m_remote_addr;
+ struct sockaddr_in6 *lsin6;
+ struct sockaddr_in6 *rsin6;
+ struct sockaddr_in6 *m_lsin6;
+ struct sockaddr_in6 *m_rsin6;
+ set_ep_sin6_addrs(ep, &lsin6, &rsin6, &m_lsin6, &m_rsin6);
cc = snprintf(epd->buf + epd->pos, space,
"ep %p cm_id %p qp %p state %d flags 0x%lx "
"history 0x%lx hwtid %d atid %d "
@@ -577,9 +612,9 @@ static int dump_ep(int id, void *p, void *data)
ep->stats.connect_neg_adv,
ep->stats.abort_neg_adv,
&lsin6->sin6_addr, ntohs(lsin6->sin6_port),
- ntohs(mapped_lsin6->sin6_port),
+ ntohs(m_lsin6->sin6_port),
&rsin6->sin6_addr, ntohs(rsin6->sin6_port),
- ntohs(mapped_rsin6->sin6_port));
+ ntohs(m_rsin6->sin6_port));
}
if (cc < space)
epd->pos += cc;
@@ -600,7 +635,7 @@ static int dump_listen_ep(int id, void *p, void *data)
if (ep->com.local_addr.ss_family == AF_INET) {
struct sockaddr_in *lsin = (struct sockaddr_in *)
&ep->com.cm_id->local_addr;
- struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+ struct sockaddr_in *m_lsin = (struct sockaddr_in *)
&ep->com.cm_id->m_local_addr;
cc = snprintf(epd->buf + epd->pos, space,
@@ -609,11 +644,11 @@ static int dump_listen_ep(int id, void *p, void *data)
ep, ep->com.cm_id, (int)ep->com.state,
ep->com.flags, ep->stid, ep->backlog,
&lsin->sin_addr, ntohs(lsin->sin_port),
- ntohs(mapped_lsin->sin_port));
+ ntohs(m_lsin->sin_port));
} else {
struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
&ep->com.cm_id->local_addr;
- struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
+ struct sockaddr_in6 *m_lsin6 = (struct sockaddr_in6 *)
&ep->com.cm_id->m_local_addr;
cc = snprintf(epd->buf + epd->pos, space,
@@ -622,7 +657,7 @@ static int dump_listen_ep(int id, void *p, void *data)
ep, ep->com.cm_id, (int)ep->com.state,
ep->com.flags, ep->stid, ep->backlog,
&lsin6->sin6_addr, ntohs(lsin6->sin6_port),
- ntohs(mapped_lsin6->sin6_port));
+ ntohs(m_lsin6->sin6_port));
}
if (cc < space)
epd->pos += cc;
@@ -732,7 +767,7 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
kfree(entry);
}
- list_for_each_safe(pos, nxt, &uctx->qpids) {
+ list_for_each_safe(pos, nxt, &uctx->cqids) {
entry = list_entry(pos, struct c4iw_qid_list, entry);
list_del_init(&entry->entry);
kfree(entry);
@@ -761,15 +796,14 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
* cqid and qpid range must match for now.
*/
if (rdev->lldi.udb_density != rdev->lldi.ucq_density) {
- pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
+ pr_err("%s: unsupported udb/ucq densities %u/%u\n",
pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
rdev->lldi.ucq_density);
return -EINVAL;
}
if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
- pr_err(MOD "%s: unsupported qp and cq id ranges "
- "qp start %u size %u cq start %u size %u\n",
+ pr_err("%s: unsupported qp and cq id ranges qp start %u size %u cq start %u size %u\n",
pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
rdev->lldi.vr->cq.size);
@@ -778,23 +812,20 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->qpmask = rdev->lldi.udb_density - 1;
rdev->cqmask = rdev->lldi.ucq_density - 1;
- PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d "
- "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x "
- "qp qid start %u size %u cq qid start %u size %u\n",
- __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
- rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
- rdev->lldi.vr->pbl.start,
- rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
- rdev->lldi.vr->rq.size,
- rdev->lldi.vr->qp.start,
- rdev->lldi.vr->qp.size,
- rdev->lldi.vr->cq.start,
- rdev->lldi.vr->cq.size);
- PDBG("udb %pR db_reg %p gts_reg %p "
- "qpmask 0x%x cqmask 0x%x\n",
- &rdev->lldi.pdev->resource[2],
- rdev->lldi.db_reg, rdev->lldi.gts_reg,
- rdev->qpmask, rdev->cqmask);
+ pr_debug("%s dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u\n",
+ __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
+ rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
+ rdev->lldi.vr->pbl.start,
+ rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
+ rdev->lldi.vr->rq.size,
+ rdev->lldi.vr->qp.start,
+ rdev->lldi.vr->qp.size,
+ rdev->lldi.vr->cq.start,
+ rdev->lldi.vr->cq.size);
+ pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n",
+ &rdev->lldi.pdev->resource[2],
+ rdev->lldi.db_reg, rdev->lldi.gts_reg,
+ rdev->qpmask, rdev->cqmask);
if (c4iw_num_stags(rdev) == 0)
return -EINVAL;
@@ -808,22 +839,22 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
if (err) {
- printk(KERN_ERR MOD "error %d initializing resources\n", err);
+ pr_err("error %d initializing resources\n", err);
return err;
}
err = c4iw_pblpool_create(rdev);
if (err) {
- printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
+ pr_err("error %d initializing pbl pool\n", err);
goto destroy_resource;
}
err = c4iw_rqtpool_create(rdev);
if (err) {
- printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
+ pr_err("error %d initializing rqt pool\n", err);
goto destroy_pblpool;
}
err = c4iw_ocqp_pool_create(rdev);
if (err) {
- printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
+ pr_err("error %d initializing ocqp pool\n", err);
goto destroy_rqtpool;
}
rdev->status_page = (struct t4_dev_status_page *)
@@ -849,13 +880,15 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->free_workq = create_singlethread_workqueue("iw_cxgb4_free");
if (!rdev->free_workq) {
err = -ENOMEM;
- goto err_free_status_page;
+ goto err_free_status_page_and_wr_log;
}
rdev->status_page->db_off = 0;
return 0;
-err_free_status_page:
+err_free_status_page_and_wr_log:
+ if (c4iw_wr_log && rdev->wr_log)
+ kfree(rdev->wr_log);
free_page((unsigned long)rdev->status_page);
destroy_ocqp_pool:
c4iw_ocqp_pool_destroy(rdev);
@@ -872,9 +905,11 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
{
destroy_workqueue(rdev->free_workq);
kfree(rdev->wr_log);
+ c4iw_release_dev_ucontext(rdev, &rdev->uctx);
free_page((unsigned long)rdev->status_page);
c4iw_pblpool_destroy(rdev);
c4iw_rqtpool_destroy(rdev);
+ c4iw_ocqp_pool_destroy(rdev);
c4iw_destroy_resource(&rdev->resource);
}
@@ -901,7 +936,7 @@ static void c4iw_dealloc(struct uld_ctx *ctx)
static void c4iw_remove(struct uld_ctx *ctx)
{
- PDBG("%s c4iw_dev %p\n", __func__, ctx->dev);
+ pr_debug("%s c4iw_dev %p\n", __func__, ctx->dev);
c4iw_unregister_device(ctx->dev);
c4iw_dealloc(ctx);
}
@@ -919,28 +954,28 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
int ret;
if (!rdma_supported(infop)) {
- printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n",
- pci_name(infop->pdev));
+ pr_info("%s: RDMA not supported on this device\n",
+ pci_name(infop->pdev));
return ERR_PTR(-ENOSYS);
}
if (!ocqp_supported(infop))
- pr_info("%s: On-Chip Queues not supported on this device.\n",
+ pr_info("%s: On-Chip Queues not supported on this device\n",
pci_name(infop->pdev));
devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
if (!devp) {
- printk(KERN_ERR MOD "Cannot allocate ib device\n");
+ pr_err("Cannot allocate ib device\n");
return ERR_PTR(-ENOMEM);
}
devp->rdev.lldi = *infop;
/* init various hw-queue params based on lld info */
- PDBG("%s: Ing. padding boundary is %d, egrsstatuspagesize = %d\n",
- __func__, devp->rdev.lldi.sge_ingpadboundary,
- devp->rdev.lldi.sge_egrstatuspagesize);
+ pr_debug("%s: Ing. padding boundary is %d, egrsstatuspagesize = %d\n",
+ __func__, devp->rdev.lldi.sge_ingpadboundary,
+ devp->rdev.lldi.sge_egrstatuspagesize);
devp->rdev.hw_queue.t4_eq_status_entries =
- devp->rdev.lldi.sge_ingpadboundary > 64 ? 2 : 1;
+ devp->rdev.lldi.sge_egrstatuspagesize / 64;
devp->rdev.hw_queue.t4_max_eq_size = 65520;
devp->rdev.hw_queue.t4_max_iq_size = 65520;
devp->rdev.hw_queue.t4_max_rq_size = 8192 -
@@ -965,7 +1000,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
pci_resource_len(devp->rdev.lldi.pdev, 2));
if (!devp->rdev.bar2_kva) {
- pr_err(MOD "Unable to ioremap BAR2\n");
+ pr_err("Unable to ioremap BAR2\n");
ib_dealloc_device(&devp->ibdev);
return ERR_PTR(-EINVAL);
}
@@ -977,20 +1012,19 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
devp->rdev.lldi.vr->ocq.size);
if (!devp->rdev.oc_mw_kva) {
- pr_err(MOD "Unable to ioremap onchip mem\n");
+ pr_err("Unable to ioremap onchip mem\n");
ib_dealloc_device(&devp->ibdev);
return ERR_PTR(-EINVAL);
}
}
- PDBG(KERN_INFO MOD "ocq memory: "
- "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
- devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
- devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
+ pr_debug("ocq memory: hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
+ devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
+ devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
ret = c4iw_rdev_open(&devp->rdev);
if (ret) {
- printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
+ pr_err("Unable to open CXIO rdev err %d\n", ret);
ib_dealloc_device(&devp->ibdev);
return ERR_PTR(ret);
}
@@ -1036,17 +1070,17 @@ static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
}
ctx->lldi = *infop;
- PDBG("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
- __func__, pci_name(ctx->lldi.pdev),
- ctx->lldi.nchan, ctx->lldi.nrxq,
- ctx->lldi.ntxq, ctx->lldi.nports);
+ pr_debug("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
+ __func__, pci_name(ctx->lldi.pdev),
+ ctx->lldi.nchan, ctx->lldi.nrxq,
+ ctx->lldi.ntxq, ctx->lldi.nports);
mutex_lock(&dev_mutex);
list_add_tail(&ctx->entry, &uld_ctx_list);
mutex_unlock(&dev_mutex);
for (i = 0; i < ctx->lldi.nrxq; i++)
- PDBG("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
+ pr_debug("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
out:
return ctx;
}
@@ -1103,8 +1137,7 @@ static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl,
goto out;
if (c4iw_handlers[opcode] == NULL) {
- pr_info("%s no handler opcode 0x%x...\n", __func__,
- opcode);
+ pr_info("%s no handler opcode 0x%x...\n", __func__, opcode);
kfree_skb(skb);
goto out;
}
@@ -1141,13 +1174,11 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
if (recv_rx_pkt(dev, gl, rsp))
return 0;
- pr_info("%s: unexpected FL contents at %p, " \
- "RSS %#llx, FL %#llx, len %u\n",
- pci_name(ctx->lldi.pdev), gl->va,
- (unsigned long long)be64_to_cpu(*rsp),
- (unsigned long long)be64_to_cpu(
- *(__force __be64 *)gl->va),
- gl->tot_len);
+ pr_info("%s: unexpected FL contents at %p, RSS %#llx, FL %#llx, len %u\n",
+ pci_name(ctx->lldi.pdev), gl->va,
+ be64_to_cpu(*rsp),
+ be64_to_cpu(*(__force __be64 *)gl->va),
+ gl->tot_len);
return 0;
} else {
@@ -1160,8 +1191,7 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
if (c4iw_handlers[opcode]) {
c4iw_handlers[opcode](dev, skb);
} else {
- pr_info("%s no handler opcode 0x%x...\n", __func__,
- opcode);
+ pr_info("%s no handler opcode 0x%x...\n", __func__, opcode);
kfree_skb(skb);
}
@@ -1174,17 +1204,16 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
{
struct uld_ctx *ctx = handle;
- PDBG("%s new_state %u\n", __func__, new_state);
+ pr_debug("%s new_state %u\n", __func__, new_state);
switch (new_state) {
case CXGB4_STATE_UP:
- printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
+ pr_info("%s: Up\n", pci_name(ctx->lldi.pdev));
if (!ctx->dev) {
int ret;
ctx->dev = c4iw_alloc(&ctx->lldi);
if (IS_ERR(ctx->dev)) {
- printk(KERN_ERR MOD
- "%s: initialization failed: %ld\n",
+ pr_err("%s: initialization failed: %ld\n",
pci_name(ctx->lldi.pdev),
PTR_ERR(ctx->dev));
ctx->dev = NULL;
@@ -1192,22 +1221,19 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
}
ret = c4iw_register_device(ctx->dev);
if (ret) {
- printk(KERN_ERR MOD
- "%s: RDMA registration failed: %d\n",
+ pr_err("%s: RDMA registration failed: %d\n",
pci_name(ctx->lldi.pdev), ret);
c4iw_dealloc(ctx);
}
}
break;
case CXGB4_STATE_DOWN:
- printk(KERN_INFO MOD "%s: Down\n",
- pci_name(ctx->lldi.pdev));
+ pr_info("%s: Down\n", pci_name(ctx->lldi.pdev));
if (ctx->dev)
c4iw_remove(ctx);
break;
case CXGB4_STATE_START_RECOVERY:
- printk(KERN_INFO MOD "%s: Fatal Error\n",
- pci_name(ctx->lldi.pdev));
+ pr_info("%s: Fatal Error\n", pci_name(ctx->lldi.pdev));
if (ctx->dev) {
struct ib_event event;
@@ -1220,8 +1246,7 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
}
break;
case CXGB4_STATE_DETACH:
- printk(KERN_INFO MOD "%s: Detach\n",
- pci_name(ctx->lldi.pdev));
+ pr_info("%s: Detach\n", pci_name(ctx->lldi.pdev));
if (ctx->dev)
c4iw_remove(ctx);
break;
@@ -1371,9 +1396,7 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
t4_sq_host_wq_pidx(&qp->wq),
t4_sq_wq_size(&qp->wq));
if (ret) {
- pr_err(MOD "%s: Fatal error - "
- "DB overflow recovery failed - "
- "error syncing SQ qid %u\n",
+ pr_err("%s: Fatal error - DB overflow recovery failed - error syncing SQ qid %u\n",
pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
spin_unlock(&qp->lock);
spin_unlock_irq(&qp->rhp->lock);
@@ -1387,9 +1410,7 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
t4_rq_wq_size(&qp->wq));
if (ret) {
- pr_err(MOD "%s: Fatal error - "
- "DB overflow recovery failed - "
- "error syncing RQ qid %u\n",
+ pr_err("%s: Fatal error - DB overflow recovery failed - error syncing RQ qid %u\n",
pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
spin_unlock(&qp->lock);
spin_unlock_irq(&qp->rhp->lock);
@@ -1420,7 +1441,7 @@ static void recover_queues(struct uld_ctx *ctx)
/* flush the SGE contexts */
ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
if (ret) {
- printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
+ pr_err("%s: Fatal error - DB overflow recovery failed\n",
pci_name(ctx->lldi.pdev));
return;
}
@@ -1478,8 +1499,8 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
mutex_unlock(&ctx->dev->rdev.stats.lock);
break;
default:
- printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
- pci_name(ctx->lldi.pdev), control);
+ pr_warn("%s: unknown control cmd %u\n",
+ pci_name(ctx->lldi.pdev), control);
break;
}
return 0;
@@ -1508,8 +1529,7 @@ static int __init c4iw_init_module(void)
c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
if (!c4iw_debugfs_root)
- printk(KERN_WARNING MOD
- "could not create debugfs entry, continuing\n");
+ pr_warn("could not create debugfs entry, continuing\n");
cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index bdfac2ccb704..8f963df0bffc 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -47,17 +47,16 @@ static void print_tpte(struct c4iw_dev *dev, u32 stag)
"%s cxgb4_read_tpte err %d\n", __func__, ret);
return;
}
- PDBG("stag idx 0x%x valid %d key 0x%x state %d pdid %d "
- "perm 0x%x ps %d len 0x%llx va 0x%llx\n",
- stag & 0xffffff00,
- FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_STAGKEY_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)),
- FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)),
- ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo),
- ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo));
+ pr_debug("stag idx 0x%x valid %d key 0x%x state %d pdid %d perm 0x%x ps %d len 0x%llx va 0x%llx\n",
+ stag & 0xffffff00,
+ FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_STAGKEY_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)),
+ FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)),
+ ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo),
+ ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo));
}
static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
@@ -71,9 +70,9 @@ static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), ntohl(err_cqe->len),
CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe));
- PDBG("%016llx %016llx %016llx %016llx\n",
- be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]),
- be64_to_cpu(p[3]));
+ pr_debug("%016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]),
+ be64_to_cpu(p[3]));
/*
* Ingress WRITE and READ_RESP errors provide
@@ -124,8 +123,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
spin_lock_irq(&dev->lock);
qhp = get_qhp(dev, CQE_QPID(err_cqe));
if (!qhp) {
- printk(KERN_ERR MOD "BAD AE qpid 0x%x opcode %d "
- "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ pr_err("BAD AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
CQE_QPID(err_cqe),
CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
@@ -140,8 +138,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
cqid = qhp->attr.rcq;
chp = get_chp(dev, cqid);
if (!chp) {
- printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d "
- "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
cqid, CQE_QPID(err_cqe),
CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
@@ -165,7 +162,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
/* Completion Events */
case T4_ERR_SUCCESS:
- printk(KERN_ERR MOD "AE with status 0!\n");
+ pr_err("AE with status 0!\n");
break;
case T4_ERR_STAG:
@@ -207,7 +204,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
break;
default:
- printk(KERN_ERR MOD "Unknown T4 status 0x%x QPID 0x%x\n",
+ pr_err("Unknown T4 status 0x%x QPID 0x%x\n",
CQE_STATUS(err_cqe), qhp->wq.sq.qid);
post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_FATAL);
break;
@@ -237,7 +234,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
if (atomic_dec_and_test(&chp->refcnt))
wake_up(&chp->wait);
} else {
- PDBG("%s unknown cqid 0x%x\n", __func__, qid);
+ pr_debug("%s unknown cqid 0x%x\n", __func__, qid);
spin_unlock_irqrestore(&dev->lock, flag);
}
return 0;
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 8cd4d054a87e..819a30635d53 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -37,7 +37,7 @@
#include <linux/idr.h>
#include <linux/completion.h>
#include <linux/netdevice.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/inet.h>
@@ -64,12 +64,11 @@
#define DRV_NAME "iw_cxgb4"
#define MOD DRV_NAME ":"
-extern int c4iw_debug;
-#define PDBG(fmt, args...) \
-do { \
- if (c4iw_debug) \
- printk(MOD fmt, ## args); \
-} while (0)
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include "t4.h"
@@ -231,15 +230,15 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
ret = wait_for_completion_timeout(&wr_waitp->completion, C4IW_WR_TO);
if (!ret) {
- PDBG("%s - Device %s not responding (disabling device) - tid %u qpid %u\n",
- func, pci_name(rdev->lldi.pdev), hwtid, qpid);
+ pr_debug("%s - Device %s not responding (disabling device) - tid %u qpid %u\n",
+ func, pci_name(rdev->lldi.pdev), hwtid, qpid);
rdev->flags |= T4_FATAL_ERROR;
wr_waitp->ret = -EIO;
}
out:
if (wr_waitp->ret)
- PDBG("%s: FW reply %d tid %u qpid %u\n",
- pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
+ pr_debug("%s: FW reply %d tid %u qpid %u\n",
+ pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
return wr_waitp->ret;
}
@@ -538,8 +537,9 @@ static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext,
if (mm->key == key && mm->len == len) {
list_del_init(&mm->entry);
spin_unlock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- key, (unsigned long long) mm->addr, mm->len);
+ pr_debug("%s key 0x%x addr 0x%llx len %d\n",
+ __func__, key,
+ (unsigned long long)mm->addr, mm->len);
return mm;
}
}
@@ -551,8 +551,8 @@ static inline void insert_mmap(struct c4iw_ucontext *ucontext,
struct c4iw_mm_entry *mm)
{
spin_lock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- mm->key, (unsigned long long) mm->addr, mm->len);
+ pr_debug("%s key 0x%x addr 0x%llx len %d\n",
+ __func__, mm->key, (unsigned long long)mm->addr, mm->len);
list_add_tail(&mm->entry, &ucontext->mmaps);
spin_unlock(&ucontext->mmap_lock);
}
@@ -670,17 +670,19 @@ enum c4iw_mmid_state {
#define MPA_V2_RDMA_READ_RTR 0x4000
#define MPA_V2_IRD_ORD_MASK 0x3FFF
-#define c4iw_put_ep(ep) { \
- PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
- WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
- kref_put(&((ep)->kref), _c4iw_free_ep); \
+#define c4iw_put_ep(ep) { \
+ pr_debug("put_ep (via %s:%u) ep %p refcnt %d\n", \
+ __func__, __LINE__, \
+ ep, kref_read(&((ep)->kref))); \
+ WARN_ON(kref_read(&((ep)->kref)) < 1); \
+ kref_put(&((ep)->kref), _c4iw_free_ep); \
}
-#define c4iw_get_ep(ep) { \
- PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
- kref_get(&((ep)->kref)); \
+#define c4iw_get_ep(ep) { \
+ pr_debug("get_ep (via %s:%u) ep %p, refcnt %d\n", \
+ __func__, __LINE__, \
+ ep, kref_read(&((ep)->kref))); \
+ kref_get(&((ep)->kref)); \
}
void _c4iw_free_ep(struct kref *kref);
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 410408f886c1..5332f06b99ba 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -38,9 +38,9 @@
#include "iw_cxgb4.h"
-int use_dsgl = 0;
+int use_dsgl = 1;
module_param(use_dsgl, int, 0644);
-MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=0)");
+MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=1) (DEPRECATED)");
#define T4_ULPTX_MIN_IO 32
#define C4IW_MAX_INLINE_SIZE 96
@@ -81,8 +81,7 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
}
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
- req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
- memset(req, 0, wr_len);
+ req = __skb_put_zero(skb, wr_len);
INIT_ULPTX_WR(req, wr_len, 0, 0);
req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) |
(wait ? FW_WR_COMPL_F : 0));
@@ -125,7 +124,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
cmd |= cpu_to_be32(T5_ULP_MEMIO_IMM_F);
addr &= 0x7FFFFFF;
- PDBG("%s addr 0x%x len %u\n", __func__, addr, len);
+ pr_debug("%s addr 0x%x len %u\n", __func__, addr, len);
num_wqe = DIV_ROUND_UP(len, C4IW_MAX_INLINE_SIZE);
c4iw_init_wr_wait(&wr_wait);
for (i = 0; i < num_wqe; i++) {
@@ -142,8 +141,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
}
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
- req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
- memset(req, 0, wr_len);
+ req = __skb_put_zero(skb, wr_len);
INIT_ULPTX_WR(req, wr_len, 0, 0);
if (i == (num_wqe-1)) {
@@ -231,13 +229,11 @@ out:
static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len,
void *data, struct sk_buff *skb)
{
- if (is_t5(rdev->lldi.adapter_type) && use_dsgl) {
+ if (rdev->lldi.ulptx_memwrite_dsgl && use_dsgl) {
if (len > inline_threshold) {
if (_c4iw_write_mem_dma(rdev, addr, len, data, skb)) {
- printk_ratelimited(KERN_WARNING
- "%s: dma map"
- " failure (non fatal)\n",
- pci_name(rdev->lldi.pdev));
+ pr_warn_ratelimited("%s: dma map failure (non fatal)\n",
+ pci_name(rdev->lldi.pdev));
return _c4iw_write_mem_inline(rdev, addr, len,
data, skb);
} else {
@@ -289,8 +285,8 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
mutex_unlock(&rdev->stats.lock);
*stag = (stag_idx << 8) | (atomic_inc_return(&key) & 0xff);
}
- PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
- __func__, stag_state, type, pdid, stag_idx);
+ pr_debug("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
+ __func__, stag_state, type, pdid, stag_idx);
/* write TPT entry */
if (reset_tpt_entry)
@@ -331,9 +327,9 @@ static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl,
{
int err;
- PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
- __func__, pbl_addr, rdev->lldi.vr->pbl.start,
- pbl_size);
+ pr_debug("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
+ __func__, pbl_addr, rdev->lldi.vr->pbl.start,
+ pbl_size);
err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL);
return err;
@@ -376,7 +372,7 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+ pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
}
@@ -426,7 +422,7 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
int ret;
u32 stag = T4_STAG_UNSET;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
php = to_c4iw_pd(pd);
rhp = php->rhp;
@@ -483,7 +479,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct c4iw_pd *php;
struct c4iw_mr *mhp;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
if (length == ~0ULL)
return ERR_PTR(-EINVAL);
@@ -517,7 +513,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(err);
}
- shift = ffs(mhp->umem->page_size) - 1;
+ shift = mhp->umem->page_shift;
n = mhp->umem->nmap;
err = alloc_pbl(mhp, n);
@@ -536,7 +532,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
- mhp->umem->page_size * k);
+ (k << shift));
if (i == PAGE_SIZE / sizeof *pages) {
err = write_pbl(&mhp->rhp->rdev,
pages,
@@ -620,7 +616,7 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
ret = -ENOMEM;
goto dealloc_win;
}
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmw);
dealloc_win:
@@ -645,7 +641,7 @@ int c4iw_dealloc_mw(struct ib_mw *mw)
deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb);
kfree_skb(mhp->dereg_skb);
kfree(mhp);
- PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
+ pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
return 0;
}
@@ -703,7 +699,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
goto err3;
}
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmr);
err3:
dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
@@ -748,7 +744,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
struct c4iw_mr *mhp;
u32 mmid;
- PDBG("%s ib_mr %p\n", __func__, ib_mr);
+ pr_debug("%s ib_mr %p\n", __func__, ib_mr);
mhp = to_c4iw_mr(ib_mr);
rhp = mhp->rhp;
@@ -766,7 +762,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
kfree((void *) (unsigned long) mhp->kva);
if (mhp->umem)
ib_umem_release(mhp->umem);
- PDBG("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
+ pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
kfree(mhp);
return 0;
}
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 3345e1c312f7..0771e9a4d061 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -59,7 +59,7 @@ module_param(fastreg_support, int, 0644);
MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)");
static struct ib_ah *c4iw_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
@@ -102,7 +102,7 @@ void _c4iw_free_ucontext(struct kref *kref)
ucontext = container_of(kref, struct c4iw_ucontext, kref);
rhp = to_c4iw_dev(ucontext->ibucontext.device);
- PDBG("%s ucontext %p\n", __func__, ucontext);
+ pr_debug("%s ucontext %p\n", __func__, ucontext);
list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
kfree(mm);
c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx);
@@ -113,7 +113,7 @@ static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
{
struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
- PDBG("%s context %p\n", __func__, context);
+ pr_debug("%s context %p\n", __func__, context);
c4iw_put_ucontext(ucontext);
return 0;
}
@@ -123,12 +123,11 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
{
struct c4iw_ucontext *context;
struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
- static int warned;
struct c4iw_alloc_ucontext_resp uresp;
int ret = 0;
struct c4iw_mm_entry *mm = NULL;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context) {
ret = -ENOMEM;
@@ -141,8 +140,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
kref_init(&context->kref);
if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
- if (!warned++)
- pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled.");
+ pr_err_once("Warning - downlevel libcxgb4 (non-fatal), device status page disabled\n");
rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED;
} else {
mm = kmalloc(sizeof(*mm), GFP_KERNEL);
@@ -187,8 +185,8 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
struct c4iw_ucontext *ucontext;
u64 addr;
- PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
- key, len);
+ pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
+ key, len);
if (vma->vm_start & (PAGE_SIZE-1))
return -EINVAL;
@@ -253,7 +251,7 @@ static int c4iw_deallocate_pd(struct ib_pd *pd)
php = to_c4iw_pd(pd);
rhp = php->rhp;
- PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
+ pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
c4iw_put_resource(&rhp->rdev.resource.pdid_table, php->pdid);
mutex_lock(&rhp->rdev.stats.lock);
rhp->rdev.stats.pd.cur--;
@@ -270,7 +268,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
u32 pdid;
struct c4iw_dev *rhp;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
rhp = (struct c4iw_dev *) ibdev;
pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table);
if (!pdid)
@@ -293,14 +291,14 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
if (rhp->rdev.stats.pd.cur > rhp->rdev.stats.pd.max)
rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur;
mutex_unlock(&rhp->rdev.stats.lock);
- PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
+ pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
return &php->ibpd;
}
static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey)
{
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
*pkey = 0;
return 0;
}
@@ -310,8 +308,8 @@ static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index,
{
struct c4iw_dev *dev;
- PDBG("%s ibdev %p, port %d, index %d, gid %p\n",
- __func__, ibdev, port, index, gid);
+ pr_debug("%s ibdev %p, port %d, index %d, gid %p\n",
+ __func__, ibdev, port, index, gid);
dev = to_c4iw_dev(ibdev);
BUG_ON(port == 0);
memset(&(gid->raw[0]), 0, sizeof(gid->raw));
@@ -325,7 +323,7 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
struct c4iw_dev *dev;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
if (uhw->inlen || uhw->outlen)
return -EINVAL;
@@ -366,12 +364,11 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
struct net_device *netdev;
struct in_device *inetdev;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
dev = to_c4iw_dev(ibdev);
netdev = dev->rdev.lldi.ports[port-1];
-
- memset(props, 0, sizeof(struct ib_port_attr));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -409,7 +406,7 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%d\n",
CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
}
@@ -422,7 +419,7 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
struct ethtool_drvinfo info;
struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0];
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.driver);
}
@@ -432,7 +429,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
c4iw_dev->rdev.lldi.pdev->device);
}
@@ -508,13 +505,14 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = c4iw_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
@@ -524,7 +522,7 @@ static void get_dev_fw_str(struct ib_device *dev, char *str,
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
snprintf(str, str_len, "%u.%u.%u.%u",
FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers),
@@ -538,7 +536,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
int ret;
int i;
- PDBG("%s c4iw_dev %p\n", __func__, dev);
+ pr_debug("%s c4iw_dev %p\n", __func__, dev);
BUG_ON(!dev->rdev.lldi.ports[0]);
strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
@@ -572,7 +570,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports;
dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq;
- dev->ibdev.dma_device = &(dev->rdev.lldi.pdev->dev);
+ dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev;
dev->ibdev.query_device = c4iw_query_device;
dev->ibdev.query_port = c4iw_query_port;
dev->ibdev.query_pkey = c4iw_query_pkey;
@@ -648,7 +646,7 @@ void c4iw_unregister_device(struct c4iw_dev *dev)
{
int i;
- PDBG("%s c4iw_dev %p\n", __func__, dev);
+ pr_debug("%s c4iw_dev %p\n", __func__, dev);
for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i)
device_remove_file(&dev->ibdev.dev,
c4iw_class_attributes[i]);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 04c1c382dedb..cb7fc0d35d1d 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -254,11 +254,11 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
ret = -ENOMEM;
goto free_sq;
}
- PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
- __func__, wq->sq.queue,
- (unsigned long long)virt_to_phys(wq->sq.queue),
- wq->rq.queue,
- (unsigned long long)virt_to_phys(wq->rq.queue));
+ pr_debug("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
+ __func__, wq->sq.queue,
+ (unsigned long long)virt_to_phys(wq->sq.queue),
+ wq->rq.queue,
+ (unsigned long long)virt_to_phys(wq->rq.queue));
memset(wq->rq.queue, 0, wq->rq.memsize);
dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
@@ -275,7 +275,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
* User mode must have bar2 access.
*/
if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) {
- pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n",
+ pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n",
pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
goto free_dma;
}
@@ -293,8 +293,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
}
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
- res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
- memset(res_wr, 0, wr_len);
+ res_wr = __skb_put_zero(skb, wr_len);
res_wr->op_nres = cpu_to_be32(
FW_WR_OP_V(FW_RI_RES_WR) |
FW_RI_RES_WR_NRES_V(2) |
@@ -362,9 +361,9 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
if (ret)
goto free_dma;
- PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n",
- __func__, wq->sq.qid, wq->rq.qid, wq->db,
- wq->sq.bar2_va, wq->rq.bar2_va);
+ pr_debug("%s sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n",
+ __func__, wq->sq.qid, wq->rq.qid, wq->db,
+ wq->sq.bar2_va, wq->rq.bar2_va);
return 0;
free_dma:
@@ -570,7 +569,7 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
{
if (wr->num_sge > 1)
return -EINVAL;
- if (wr->num_sge) {
+ if (wr->num_sge && wr->sg_list[0].length) {
wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey);
wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr
>> 32));
@@ -725,7 +724,7 @@ static void free_qp_work(struct work_struct *work)
ucontext = qhp->ucontext;
rhp = qhp->rhp;
- PDBG("%s qhp %p ucontext %p\n", __func__, qhp, ucontext);
+ pr_debug("%s qhp %p ucontext %p\n", __func__, qhp, ucontext);
destroy_qp(&rhp->rdev, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
@@ -739,19 +738,19 @@ static void queue_qp_free(struct kref *kref)
struct c4iw_qp *qhp;
qhp = container_of(kref, struct c4iw_qp, kref);
- PDBG("%s qhp %p\n", __func__, qhp);
+ pr_debug("%s qhp %p\n", __func__, qhp);
queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work);
}
void c4iw_qp_add_ref(struct ib_qp *qp)
{
- PDBG("%s ib_qp %p\n", __func__, qp);
+ pr_debug("%s ib_qp %p\n", __func__, qp);
kref_get(&to_c4iw_qp(qp)->kref);
}
void c4iw_qp_rem_ref(struct ib_qp *qp)
{
- PDBG("%s ib_qp %p\n", __func__, qp);
+ pr_debug("%s ib_qp %p\n", __func__, qp);
kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free);
}
@@ -959,8 +958,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey);
break;
default:
- PDBG("%s post of type=%d TBD!\n", __func__,
- wr->opcode);
+ pr_debug("%s post of type=%d TBD!\n", __func__,
+ wr->opcode);
err = -EINVAL;
}
if (err) {
@@ -981,9 +980,10 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
- PDBG("%s cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n",
- __func__, (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
- swsqe->opcode, swsqe->read_len);
+ pr_debug("%s cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n",
+ __func__,
+ (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
+ swsqe->opcode, swsqe->read_len);
wr = wr->next;
num_wrs--;
t4_sq_produce(&qhp->wq, len16);
@@ -1057,8 +1057,9 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
wqe->recv.r2[1] = 0;
wqe->recv.r2[2] = 0;
wqe->recv.len16 = len16;
- PDBG("%s cookie 0x%llx pidx %u\n", __func__,
- (unsigned long long) wr->wr_id, qhp->wq.rq.pidx);
+ pr_debug("%s cookie 0x%llx pidx %u\n",
+ __func__,
+ (unsigned long long)wr->wr_id, qhp->wq.rq.pidx);
t4_rq_produce(&qhp->wq, len16);
idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
wr = wr->next;
@@ -1217,8 +1218,8 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
struct sk_buff *skb;
struct terminate_message *term;
- PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
- qhp->ep->hwtid);
+ pr_debug("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
+ qhp->ep->hwtid);
skb = skb_dequeue(&qhp->ep->com.ep_skb_list);
if (WARN_ON(!skb))
@@ -1226,7 +1227,7 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
- wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
+ wqe = __skb_put(skb, sizeof(*wqe));
memset(wqe, 0, sizeof *wqe);
wqe->op_compl = cpu_to_be32(FW_WR_OP_V(FW_RI_INIT_WR));
wqe->flowid_len16 = cpu_to_be32(
@@ -1254,7 +1255,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
int rq_flushed, sq_flushed;
unsigned long flag;
- PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
+ pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
/* locking hierarchy: cq lock first, then qp lock. */
spin_lock_irqsave(&rchp->lock, flag);
@@ -1339,8 +1340,8 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
int ret;
struct sk_buff *skb;
- PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
- ep->hwtid);
+ pr_debug("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
+ ep->hwtid);
skb = skb_dequeue(&ep->com.ep_skb_list);
if (WARN_ON(!skb))
@@ -1348,7 +1349,7 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
+ wqe = __skb_put(skb, sizeof(*wqe));
memset(wqe, 0, sizeof *wqe);
wqe->op_compl = cpu_to_be32(
FW_WR_OP_V(FW_RI_INIT_WR) |
@@ -1366,13 +1367,13 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
ret = c4iw_wait_for_reply(&rhp->rdev, &ep->com.wr_wait, qhp->ep->hwtid,
qhp->wq.sq.qid, __func__);
out:
- PDBG("%s ret %d\n", __func__, ret);
+ pr_debug("%s ret %d\n", __func__, ret);
return ret;
}
static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
{
- PDBG("%s p2p_type = %d\n", __func__, p2p_type);
+ pr_debug("%s p2p_type = %d\n", __func__, p2p_type);
memset(&init->u, 0, sizeof init->u);
switch (p2p_type) {
case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
@@ -1401,8 +1402,8 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
int ret;
struct sk_buff *skb;
- PDBG("%s qhp %p qid 0x%x tid %u ird %u ord %u\n", __func__, qhp,
- qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord);
+ pr_debug("%s qhp %p qid 0x%x tid %u ird %u ord %u\n", __func__, qhp,
+ qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord);
skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
if (!skb) {
@@ -1417,7 +1418,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
}
set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
- wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
+ wqe = __skb_put(skb, sizeof(*wqe));
memset(wqe, 0, sizeof *wqe);
wqe->op_compl = cpu_to_be32(
FW_WR_OP_V(FW_RI_INIT_WR) |
@@ -1474,7 +1475,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
err1:
free_ird(rhp, qhp->attr.max_ird);
out:
- PDBG("%s ret %d\n", __func__, ret);
+ pr_debug("%s ret %d\n", __func__, ret);
return ret;
}
@@ -1491,9 +1492,10 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
int free = 0;
struct c4iw_ep *ep = NULL;
- PDBG("%s qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n", __func__,
- qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state,
- (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
+ pr_debug("%s qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n",
+ __func__,
+ qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state,
+ (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
mutex_lock(&qhp->mutex);
@@ -1580,7 +1582,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
case C4IW_QP_STATE_RTS:
switch (attrs->next_state) {
case C4IW_QP_STATE_CLOSING:
- BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
+ BUG_ON(kref_read(&qhp->ep->com.kref) < 2);
t4_set_wq_in_error(&qhp->wq);
set_state(qhp, C4IW_QP_STATE_CLOSING);
ep = qhp->ep;
@@ -1671,16 +1673,15 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
goto err;
break;
default:
- printk(KERN_ERR "%s in a bad state %d\n",
- __func__, qhp->attr.state);
+ pr_err("%s in a bad state %d\n", __func__, qhp->attr.state);
ret = -EINVAL;
goto err;
break;
}
goto out;
err:
- PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
- qhp->wq.sq.qid);
+ pr_debug("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
+ qhp->wq.sq.qid);
/* disassociate the LLP connection */
qhp->attr.llp_stream_handle = NULL;
@@ -1716,7 +1717,7 @@ out:
*/
if (free)
c4iw_put_ep(&ep->com);
- PDBG("%s exit state %d\n", __func__, qhp->attr.state);
+ pr_debug("%s exit state %d\n", __func__, qhp->attr.state);
return ret;
}
@@ -1746,7 +1747,7 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
c4iw_qp_rem_ref(ib_qp);
- PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
+ pr_debug("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
return 0;
}
@@ -1765,7 +1766,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
if (attrs->qp_type != IB_QPT_RC)
return ERR_PTR(-EINVAL);
@@ -1936,11 +1937,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
qhp->ibqp.qp_num = qhp->wq.sq.qid;
init_timer(&(qhp->timer));
INIT_LIST_HEAD(&qhp->db_fc_entry);
- PDBG("%s sq id %u size %u memsize %zu num_entries %u "
- "rq id %u size %u memsize %zu num_entries %u\n", __func__,
- qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
- attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size,
- qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
+ pr_debug("%s sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n",
+ __func__,
+ qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
+ attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size,
+ qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
return &qhp->ibqp;
err8:
kfree(ma_sync_key_mm);
@@ -1970,7 +1971,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
enum c4iw_qp_attr_mask mask = 0;
struct c4iw_qp_attributes attrs;
- PDBG("%s ib_qp %p\n", __func__, ibqp);
+ pr_debug("%s ib_qp %p\n", __func__, ibqp);
/* iwarp does not support the RTR state */
if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
@@ -2016,7 +2017,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
{
- PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
+ pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
}
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
index 67df71a7012e..8ff0cbe5cb16 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -90,7 +90,7 @@ u32 c4iw_get_resource(struct c4iw_id_table *id_table)
void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry)
{
- PDBG("%s entry 0x%x\n", __func__, entry);
+ pr_debug("%s entry 0x%x\n", __func__, entry);
c4iw_id_free(id_table, entry);
}
@@ -141,7 +141,7 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
}
out:
mutex_unlock(&uctx->lock);
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("%s qid 0x%x\n", __func__, qid);
mutex_lock(&rdev->stats.lock);
if (rdev->stats.qid.cur > rdev->stats.qid.max)
rdev->stats.qid.max = rdev->stats.qid.cur;
@@ -157,7 +157,7 @@ void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid,
entry = kmalloc(sizeof *entry, GFP_KERNEL);
if (!entry)
return;
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("%s qid 0x%x\n", __func__, qid);
entry->qid = qid;
mutex_lock(&uctx->lock);
list_add_tail(&entry->entry, &uctx->cqids);
@@ -215,7 +215,7 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
}
out:
mutex_unlock(&uctx->lock);
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("%s qid 0x%x\n", __func__, qid);
mutex_lock(&rdev->stats.lock);
if (rdev->stats.qid.cur > rdev->stats.qid.max)
rdev->stats.qid.max = rdev->stats.qid.cur;
@@ -231,7 +231,7 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid,
entry = kmalloc(sizeof *entry, GFP_KERNEL);
if (!entry)
return;
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("%s qid 0x%x\n", __func__, qid);
entry->qid = qid;
mutex_lock(&uctx->lock);
list_add_tail(&entry->entry, &uctx->qpids);
@@ -254,7 +254,7 @@ void c4iw_destroy_resource(struct c4iw_resource *rscp)
u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size)
{
unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
mutex_lock(&rdev->stats.lock);
if (addr) {
rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT);
@@ -268,7 +268,7 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size)
void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, addr, size);
mutex_lock(&rdev->stats.lock);
rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT);
mutex_unlock(&rdev->stats.lock);
@@ -290,19 +290,17 @@ int c4iw_pblpool_create(struct c4iw_rdev *rdev)
while (pbl_start < pbl_top) {
pbl_chunk = min(pbl_top - pbl_start + 1, pbl_chunk);
if (gen_pool_add(rdev->pbl_pool, pbl_start, pbl_chunk, -1)) {
- PDBG("%s failed to add PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
+ pr_debug("%s failed to add PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
- printk(KERN_WARNING MOD
- "Failed to add all PBL chunks (%x/%x)\n",
- pbl_start,
- pbl_top - pbl_start);
+ pr_warn("Failed to add all PBL chunks (%x/%x)\n",
+ pbl_start, pbl_top - pbl_start);
return 0;
}
pbl_chunk >>= 1;
} else {
- PDBG("%s added PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
+ pr_debug("%s added PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
pbl_start += pbl_chunk;
}
}
@@ -324,9 +322,9 @@ void c4iw_pblpool_destroy(struct c4iw_rdev *rdev)
u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
{
unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
+ pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
if (!addr)
- pr_warn_ratelimited(MOD "%s: Out of RQT memory\n",
+ pr_warn_ratelimited("%s: Out of RQT memory\n",
pci_name(rdev->lldi.pdev));
mutex_lock(&rdev->stats.lock);
if (addr) {
@@ -341,7 +339,7 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6);
+ pr_debug("%s addr 0x%x size %d\n", __func__, addr, size << 6);
mutex_lock(&rdev->stats.lock);
rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT);
mutex_unlock(&rdev->stats.lock);
@@ -363,18 +361,17 @@ int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
while (rqt_start < rqt_top) {
rqt_chunk = min(rqt_top - rqt_start + 1, rqt_chunk);
if (gen_pool_add(rdev->rqt_pool, rqt_start, rqt_chunk, -1)) {
- PDBG("%s failed to add RQT chunk (%x/%x)\n",
- __func__, rqt_start, rqt_chunk);
+ pr_debug("%s failed to add RQT chunk (%x/%x)\n",
+ __func__, rqt_start, rqt_chunk);
if (rqt_chunk <= 1024 << MIN_RQT_SHIFT) {
- printk(KERN_WARNING MOD
- "Failed to add all RQT chunks (%x/%x)\n",
- rqt_start, rqt_top - rqt_start);
+ pr_warn("Failed to add all RQT chunks (%x/%x)\n",
+ rqt_start, rqt_top - rqt_start);
return 0;
}
rqt_chunk >>= 1;
} else {
- PDBG("%s added RQT chunk (%x/%x)\n",
- __func__, rqt_start, rqt_chunk);
+ pr_debug("%s added RQT chunk (%x/%x)\n",
+ __func__, rqt_start, rqt_chunk);
rqt_start += rqt_chunk;
}
}
@@ -394,7 +391,7 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev)
u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size)
{
unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
if (addr) {
mutex_lock(&rdev->stats.lock);
rdev->stats.ocqp.cur += roundup(size, 1 << MIN_OCQP_SHIFT);
@@ -407,7 +404,7 @@ u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size)
void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, addr, size);
mutex_lock(&rdev->stats.lock);
rdev->stats.ocqp.cur -= roundup(size, 1 << MIN_OCQP_SHIFT);
mutex_unlock(&rdev->stats.lock);
@@ -429,18 +426,17 @@ int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev)
while (start < top) {
chunk = min(top - start + 1, chunk);
if (gen_pool_add(rdev->ocqp_pool, start, chunk, -1)) {
- PDBG("%s failed to add OCQP chunk (%x/%x)\n",
- __func__, start, chunk);
+ pr_debug("%s failed to add OCQP chunk (%x/%x)\n",
+ __func__, start, chunk);
if (chunk <= 1024 << MIN_OCQP_SHIFT) {
- printk(KERN_WARNING MOD
- "Failed to add all OCQP chunks (%x/%x)\n",
- start, top - start);
+ pr_warn("Failed to add all OCQP chunks (%x/%x)\n",
+ start, top - start);
return 0;
}
chunk >>= 1;
} else {
- PDBG("%s added OCQP chunk (%x/%x)\n",
- __func__, start, chunk);
+ pr_debug("%s added OCQP chunk (%x/%x)\n",
+ __func__, start, chunk);
start += chunk;
}
}
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 640d22148a3e..e765c00303cd 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -466,14 +466,14 @@ static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe)
wmb();
if (wq->sq.bar2_va) {
if (inc == 1 && wq->sq.bar2_qid == 0 && wqe) {
- PDBG("%s: WC wq->sq.pidx = %d\n",
- __func__, wq->sq.pidx);
+ pr_debug("%s: WC wq->sq.pidx = %d\n",
+ __func__, wq->sq.pidx);
pio_copy((u64 __iomem *)
(wq->sq.bar2_va + SGE_UDB_WCDOORBELL),
(u64 *)wqe);
} else {
- PDBG("%s: DB wq->sq.pidx = %d\n",
- __func__, wq->sq.pidx);
+ pr_debug("%s: DB wq->sq.pidx = %d\n",
+ __func__, wq->sq.pidx);
writel(PIDX_T5_V(inc) | QID_V(wq->sq.bar2_qid),
wq->sq.bar2_va + SGE_UDB_KDOORBELL);
}
@@ -493,14 +493,14 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc,
wmb();
if (wq->rq.bar2_va) {
if (inc == 1 && wq->rq.bar2_qid == 0 && wqe) {
- PDBG("%s: WC wq->rq.pidx = %d\n",
- __func__, wq->rq.pidx);
+ pr_debug("%s: WC wq->rq.pidx = %d\n",
+ __func__, wq->rq.pidx);
pio_copy((u64 __iomem *)
(wq->rq.bar2_va + SGE_UDB_WCDOORBELL),
(void *)wqe);
} else {
- PDBG("%s: DB wq->rq.pidx = %d\n",
- __func__, wq->rq.pidx);
+ pr_debug("%s: DB wq->rq.pidx = %d\n",
+ __func__, wq->rq.pidx);
writel(PIDX_T5_V(inc) | QID_V(wq->rq.bar2_qid),
wq->rq.bar2_va + SGE_UDB_KDOORBELL);
}
@@ -601,7 +601,8 @@ static inline void t4_swcq_produce(struct t4_cq *cq)
{
cq->sw_in_use++;
if (cq->sw_in_use == cq->size) {
- PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
+ pr_debug("%s cxgb4 sw cq overflow cqid %u\n",
+ __func__, cq->cqid);
cq->error = 1;
BUG_ON(1);
}
@@ -656,7 +657,7 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
if (cq->queue[prev_cidx].bits_type_ts != cq->bits_type_ts) {
ret = -EOVERFLOW;
cq->error = 1;
- printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid);
+ pr_err("cq overflow cqid %u\n", cq->cqid);
BUG_ON(1);
} else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) {
@@ -672,7 +673,8 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq)
{
if (cq->sw_in_use == cq->size) {
- PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
+ pr_debug("%s cxgb4 sw cq overflow cqid %u\n",
+ __func__, cq->cqid);
cq->error = 1;
BUG_ON(1);
return NULL;
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 0cf97a09b64b..88085f65432e 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
- verbs_txreq.o
+ verbs_txreq.o vnic_main.o vnic_sdma.o
hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 7a3d906b3671..e2cd2cd3b28a 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -576,7 +576,7 @@ int hfi1_get_proc_affinity(int node)
struct hfi1_affinity_node *entry;
cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
const struct cpumask *node_mask,
- *proc_mask = tsk_cpus_allowed(current);
+ *proc_mask = &current->cpus_allowed;
struct hfi1_affinity_node_list *affinity = &node_affinity;
struct cpu_mask_set *set = &affinity->proc;
diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h
index 0d58fe3b49b5..794e6814a531 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -229,14 +229,17 @@ static inline void aspm_ctx_timer_function(unsigned long data)
spin_unlock_irqrestore(&rcd->aspm_lock, flags);
}
-/* Disable interrupt processing for verbs contexts when PSM contexts are open */
+/*
+ * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
+ * are open.
+ */
static inline void aspm_disable_all(struct hfi1_devdata *dd)
{
struct hfi1_ctxtdata *rcd;
unsigned long flags;
unsigned i;
- for (i = 0; i < dd->first_user_ctxt; i++) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
del_timer_sync(&rcd->aspm_timer);
spin_lock_irqsave(&rcd->aspm_lock, flags);
@@ -260,7 +263,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd)
if (aspm_mode != ASPM_MODE_DYNAMIC)
return;
- for (i = 0; i < dd->first_user_ctxt; i++) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
spin_lock_irqsave(&rcd->aspm_lock, flags);
rcd->aspm_intr_enable = true;
@@ -276,7 +279,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
(unsigned long)rcd);
rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
aspm_mode == ASPM_MODE_DYNAMIC &&
- rcd->ctxt < rcd->dd->first_user_ctxt;
+ rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt;
}
static inline void aspm_init(struct hfi1_devdata *dd)
@@ -286,7 +289,7 @@ static inline void aspm_init(struct hfi1_devdata *dd)
spin_lock_init(&dd->aspm_lock);
dd->aspm_supported = aspm_hw_l1_supported(dd);
- for (i = 0; i < dd->first_user_ctxt; i++)
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++)
aspm_ctx_init(dd->rcd[i]);
/* Start with ASPM disabled */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index ef72bc2a9e1d..94b54850ec75 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -64,6 +64,7 @@
#include "platform.h"
#include "aspm.h"
#include "affinity.h"
+#include "debugfs.h"
#define NUM_IB_PORTS 1
@@ -125,9 +126,16 @@ struct flag_table {
#define DEFAULT_KRCVQS 2
#define MIN_KERNEL_KCTXTS 2
#define FIRST_KERNEL_KCTXT 1
-/* sizes for both the QP and RSM map tables */
-#define NUM_MAP_ENTRIES 256
-#define NUM_MAP_REGS 32
+
+/*
+ * RSM instance allocation
+ * 0 - Verbs
+ * 1 - User Fecn Handling
+ * 2 - Vnic
+ */
+#define RSM_INS_VERBS 0
+#define RSM_INS_FECN 1
+#define RSM_INS_VNIC 2
/* Bit offset into the GUID which carries HFI id information */
#define GUID_HFI_INDEX_SHIFT 39
@@ -138,8 +146,7 @@ struct flag_table {
#define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3)
#define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4)
-/* RSM fields */
-
+/* RSM fields for Verbs */
/* packet type */
#define IB_PACKET_TYPE 2ull
#define QW_SHIFT 6ull
@@ -169,6 +176,28 @@ struct flag_table {
/* QPN[m+n:1] QW 1, OFFSET 1 */
#define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull))
+/* RSM fields for Vnic */
+/* L2_TYPE: QW 0, OFFSET 61 - for match */
+#define L2_TYPE_QW 0ull
+#define L2_TYPE_BIT_OFFSET 61ull
+#define L2_TYPE_OFFSET(off) ((L2_TYPE_QW << QW_SHIFT) | (off))
+#define L2_TYPE_MATCH_OFFSET L2_TYPE_OFFSET(L2_TYPE_BIT_OFFSET)
+#define L2_TYPE_MASK 3ull
+#define L2_16B_VALUE 2ull
+
+/* L4_TYPE QW 1, OFFSET 0 - for match */
+#define L4_TYPE_QW 1ull
+#define L4_TYPE_BIT_OFFSET 0ull
+#define L4_TYPE_OFFSET(off) ((L4_TYPE_QW << QW_SHIFT) | (off))
+#define L4_TYPE_MATCH_OFFSET L4_TYPE_OFFSET(L4_TYPE_BIT_OFFSET)
+#define L4_16B_TYPE_MASK 0xFFull
+#define L4_16B_ETH_VALUE 0x78ull
+
+/* 16B VESWID - for select */
+#define L4_16B_HDR_VESWID_OFFSET ((2 << QW_SHIFT) | (16ull))
+/* 16B ENTROPY - for select */
+#define L2_16B_ENTROPY_OFFSET ((1 << QW_SHIFT) | (32ull))
+
/* defines to build power on SC2VL table */
#define SC2VL_VAL( \
num, \
@@ -1026,7 +1055,7 @@ static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
-static void set_partition_keys(struct hfi1_pportdata *);
+static void set_partition_keys(struct hfi1_pportdata *ppd);
static const char *link_state_name(u32 state);
static const char *link_state_reason_name(struct hfi1_pportdata *ppd,
u32 state);
@@ -1039,12 +1068,14 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs);
static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
-static void handle_temp_err(struct hfi1_devdata *);
-static void dc_shutdown(struct hfi1_devdata *);
-static void dc_start(struct hfi1_devdata *);
+static void handle_temp_err(struct hfi1_devdata *dd);
+static void dc_shutdown(struct hfi1_devdata *dd);
+static void dc_start(struct hfi1_devdata *dd);
static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
unsigned int *np);
static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
+static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms);
+static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index);
/*
* Error interrupt table entry. This is used as input to the interrupt
@@ -6281,25 +6312,38 @@ static void handle_8051_request(struct hfi1_pportdata *ppd)
}
}
-static void write_global_credit(struct hfi1_devdata *dd,
- u8 vau, u16 total, u16 shared)
+/*
+ * Set up allocation unit vaulue.
+ */
+void set_up_vau(struct hfi1_devdata *dd, u8 vau)
{
- write_csr(dd, SEND_CM_GLOBAL_CREDIT,
- ((u64)total <<
- SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT) |
- ((u64)shared <<
- SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT) |
- ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
+ u64 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
+
+ /* do not modify other values in the register */
+ reg &= ~SEND_CM_GLOBAL_CREDIT_AU_SMASK;
+ reg |= (u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT;
+ write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
}
/*
* Set up initial VL15 credits of the remote. Assumes the rest of
- * the CM credit registers are zero from a previous global or credit reset .
+ * the CM credit registers are zero from a previous global or credit reset.
+ * Shared limit for VL15 will always be 0.
*/
-void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
+void set_up_vl15(struct hfi1_devdata *dd, u16 vl15buf)
{
- /* leave shared count at zero for both global and VL15 */
- write_global_credit(dd, vau, vl15buf, 0);
+ u64 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
+
+ /* set initial values for total and shared credit limit */
+ reg &= ~(SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK |
+ SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK);
+
+ /*
+ * Set total limit to be equal to VL15 credits.
+ * Leave shared limit at 0.
+ */
+ reg |= (u64)vl15buf << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT;
+ write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
<< SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
@@ -6317,9 +6361,11 @@ void reset_link_credits(struct hfi1_devdata *dd)
for (i = 0; i < TXE_NUM_DATA_VL; i++)
write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0);
write_csr(dd, SEND_CM_CREDIT_VL15, 0);
- write_global_credit(dd, 0, 0, 0);
+ write_csr(dd, SEND_CM_GLOBAL_CREDIT, 0);
/* reset the CM block */
pio_send_control(dd, PSC_CM_RESET);
+ /* reset cached value */
+ dd->vl15buf_cached = 0;
}
/* convert a vCU to a CU */
@@ -6379,18 +6425,17 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
*
* The expectation is that the caller of this routine would have taken
* care of properly transitioning the link into the correct state.
+ * NOTE: the caller needs to acquire the dd->dc8051_lock lock
+ * before calling this function.
*/
-static void dc_shutdown(struct hfi1_devdata *dd)
+static void _dc_shutdown(struct hfi1_devdata *dd)
{
- unsigned long flags;
+ lockdep_assert_held(&dd->dc8051_lock);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
- if (dd->dc_shutdown) {
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+ if (dd->dc_shutdown)
return;
- }
+
dd->dc_shutdown = 1;
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
/* Shutdown the LCB */
lcb_shutdown(dd, 1);
/*
@@ -6401,35 +6446,45 @@ static void dc_shutdown(struct hfi1_devdata *dd)
write_csr(dd, DC_DC8051_CFG_RST, 0x1);
}
+static void dc_shutdown(struct hfi1_devdata *dd)
+{
+ mutex_lock(&dd->dc8051_lock);
+ _dc_shutdown(dd);
+ mutex_unlock(&dd->dc8051_lock);
+}
+
/*
* Calling this after the DC has been brought out of reset should not
* do any damage.
+ * NOTE: the caller needs to acquire the dd->dc8051_lock lock
+ * before calling this function.
*/
-static void dc_start(struct hfi1_devdata *dd)
+static void _dc_start(struct hfi1_devdata *dd)
{
- unsigned long flags;
- int ret;
+ lockdep_assert_held(&dd->dc8051_lock);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
if (!dd->dc_shutdown)
- goto done;
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+ return;
+
/* Take the 8051 out of reset */
write_csr(dd, DC_DC8051_CFG_RST, 0ull);
/* Wait until 8051 is ready */
- ret = wait_fm_ready(dd, TIMEOUT_8051_START);
- if (ret) {
+ if (wait_fm_ready(dd, TIMEOUT_8051_START))
dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
__func__);
- }
+
/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
write_csr(dd, DCC_CFG_RESET, 0x10);
/* lcb_shutdown() with abort=1 does not restore these */
write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
dd->dc_shutdown = 0;
-done:
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+}
+
+static void dc_start(struct hfi1_devdata *dd)
+{
+ mutex_lock(&dd->dc8051_lock);
+ _dc_start(dd);
+ mutex_unlock(&dd->dc8051_lock);
}
/*
@@ -6701,7 +6756,13 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
int i;
/* enable all kernel contexts */
- for (i = 0; i < dd->n_krcv_queues; i++) {
+ for (i = 0; i < dd->num_rcv_contexts; i++) {
+ struct hfi1_ctxtdata *rcd = dd->rcd[i];
+
+ /* Ensure all non-user contexts(including vnic) are enabled */
+ if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER))
+ continue;
+
rcvmask = HFI1_RCVCTRL_CTXT_ENB;
/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
@@ -6793,24 +6854,35 @@ void handle_link_up(struct work_struct *work)
{
struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
link_up_work);
+ struct hfi1_devdata *dd = ppd->dd;
+
set_link_state(ppd, HLS_UP_INIT);
/* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
- read_ltp_rtt(ppd->dd);
+ read_ltp_rtt(dd);
/*
* OPA specifies that certain counters are cleared on a transition
* to link up, so do that.
*/
- clear_linkup_counters(ppd->dd);
+ clear_linkup_counters(dd);
/*
* And (re)set link up default values.
*/
set_linkup_defaults(ppd);
+ /*
+ * Set VL15 credits. Use cached value from verify cap interrupt.
+ * In case of quick linkup or simulator, vl15 value will be set by
+ * handle_linkup_change. VerifyCap interrupt handler will not be
+ * called in those scenarios.
+ */
+ if (!(quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR))
+ set_up_vl15(dd, dd->vl15buf_cached);
+
/* enforce link speed enabled */
if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
/* oops - current speed is not enabled, bounce */
- dd_dev_err(ppd->dd,
+ dd_dev_err(dd,
"Link speed active 0x%x is outside enabled 0x%x, downing link\n",
ppd->link_speed_active, ppd->link_speed_enabled);
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
@@ -7077,7 +7149,7 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
- /* Sanity check - ppd->pkeys[2] should be 0, or already initalized */
+ /* Sanity check - ppd->pkeys[2] should be 0, or already initialized */
if (!((ppd->pkeys[2] == 0) || (ppd->pkeys[2] == FULL_MGMT_P_KEY)))
dd_dev_warn(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
__func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
@@ -7165,7 +7237,7 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
* set the max_rate field in handle_verify_cap until v0.19.
*/
if ((dd->icode == ICODE_RTL_SILICON) &&
- (dd->dc8051_ver < dc8051_ver(0, 19))) {
+ (dd->dc8051_ver < dc8051_ver(0, 19, 0))) {
/* max_rate: 0 = 12.5G, 1 = 25G */
switch (max_rate) {
case 0:
@@ -7277,15 +7349,6 @@ void handle_verify_cap(struct work_struct *work)
lcb_shutdown(dd, 0);
adjust_lcb_for_fpga_serdes(dd);
- /*
- * These are now valid:
- * remote VerifyCap fields in the general LNI config
- * CSR DC8051_STS_REMOTE_GUID
- * CSR DC8051_STS_REMOTE_NODE_TYPE
- * CSR DC8051_STS_REMOTE_FM_SECURITY
- * CSR DC8051_STS_REMOTE_PORT_NO
- */
-
read_vc_remote_phy(dd, &power_management, &continious);
read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf,
&partner_supported_crc);
@@ -7320,7 +7383,14 @@ void handle_verify_cap(struct work_struct *work)
*/
if (vau == 0)
vau = 1;
- set_up_vl15(dd, vau, vl15buf);
+ set_up_vau(dd, vau);
+
+ /*
+ * Set VL15 credits to 0 in global credit register. Cache remote VL15
+ * credits value and wait for link-up interrupt ot set it.
+ */
+ set_up_vl15(dd, 0);
+ dd->vl15buf_cached = vl15buf;
/* set up the LCB CRC mode */
crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc;
@@ -7350,7 +7420,7 @@ void handle_verify_cap(struct work_struct *work)
}
ppd->link_speed_active = 0; /* invalid value */
- if (dd->dc8051_ver < dc8051_ver(0, 20)) {
+ if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* remote_tx_rate: 0 = 12.5G, 1 = 25G */
switch (remote_tx_rate) {
case 0:
@@ -7416,20 +7486,6 @@ void handle_verify_cap(struct work_struct *work)
write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
set_8051_lcb_access(dd);
- ppd->neighbor_guid =
- read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
- ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
- DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
- ppd->neighbor_type =
- read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
- DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
- ppd->neighbor_fm_security =
- read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
- DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
- dd_dev_info(dd,
- "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
- ppd->neighbor_guid, ppd->neighbor_type,
- ppd->mgmt_allowed, ppd->neighbor_fm_security);
if (ppd->mgmt_allowed)
add_full_mgmt_pkey(ppd);
@@ -7827,7 +7883,8 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
}
/* just report this */
- dd_dev_info(dd, "DCC Error: fmconfig error: %s\n", extra);
+ dd_dev_info_ratelimited(dd, "DCC Error: fmconfig error: %s\n",
+ extra);
reg &= ~DCC_ERR_FLG_FMCONFIG_ERR_SMASK;
}
@@ -7878,34 +7935,38 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
}
/* just report this */
- dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
- dd_dev_info(dd, " hdr0 0x%llx, hdr1 0x%llx\n",
- hdr0, hdr1);
+ dd_dev_info_ratelimited(dd, "DCC Error: PortRcv error: %s\n"
+ " hdr0 0x%llx, hdr1 0x%llx\n",
+ extra, hdr0, hdr1);
reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
}
if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK) {
/* informative only */
- dd_dev_info(dd, "8051 access to LCB blocked\n");
+ dd_dev_info_ratelimited(dd, "8051 access to LCB blocked\n");
reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK;
}
if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK) {
/* informative only */
- dd_dev_info(dd, "host access to LCB blocked\n");
+ dd_dev_info_ratelimited(dd, "host access to LCB blocked\n");
reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
}
+ if (unlikely(hfi1_dbg_fault_suppress_err(&dd->verbs_dev)))
+ reg &= ~DCC_ERR_FLG_LATE_EBP_ERR_SMASK;
+
/* report any remaining errors */
if (reg)
- dd_dev_info(dd, "DCC Error: %s\n",
- dcc_err_string(buf, sizeof(buf), reg));
+ dd_dev_info_ratelimited(dd, "DCC Error: %s\n",
+ dcc_err_string(buf, sizeof(buf), reg));
if (lcl_reason == 0)
lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
if (do_bounce) {
- dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
+ dd_dev_info_ratelimited(dd, "%s: PortErrorAction bounce\n",
+ __func__);
set_link_down_reason(ppd, lcl_reason, 0, lcl_reason);
queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
}
@@ -7993,7 +8054,9 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
if (likely(source < dd->num_rcv_contexts)) {
rcd = dd->rcd[source];
if (rcd) {
- if (source < dd->first_user_ctxt)
+ /* Check for non-user contexts, including vnic */
+ if ((source < dd->first_dyn_alloc_ctxt) ||
+ (rcd->sc && (rcd->sc->type == SC_KERNEL)))
rcd->do_interrupt(rcd, 0);
else
handle_user_interrupt(rcd);
@@ -8021,7 +8084,8 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
rcd = dd->rcd[source];
if (rcd) {
/* only pay attention to user urgent interrupts */
- if (source >= dd->first_user_ctxt)
+ if ((source >= dd->first_dyn_alloc_ctxt) &&
+ (!rcd->sc || (rcd->sc->type == SC_USER)))
handle_user_interrupt(rcd);
return; /* OK */
}
@@ -8154,10 +8218,10 @@ static irqreturn_t sdma_interrupt(int irq, void *data)
/* handle the interrupt(s) */
sdma_engine_interrupt(sde, status);
- } else
+ } else {
dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
sde->this_idx);
-
+ }
return IRQ_HANDLED;
}
@@ -8342,6 +8406,52 @@ static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
}
/*
+ * Provide a cache for some of the LCB registers in case the LCB is
+ * unavailable.
+ * (The LCB is unavailable in certain link states, for example.)
+ */
+struct lcb_datum {
+ u32 off;
+ u64 val;
+};
+
+static struct lcb_datum lcb_cache[] = {
+ { DC_LCB_ERR_INFO_RX_REPLAY_CNT, 0},
+ { DC_LCB_ERR_INFO_SEQ_CRC_CNT, 0 },
+ { DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT, 0 },
+};
+
+static void update_lcb_cache(struct hfi1_devdata *dd)
+{
+ int i;
+ int ret;
+ u64 val;
+
+ for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
+ ret = read_lcb_csr(dd, lcb_cache[i].off, &val);
+
+ /* Update if we get good data */
+ if (likely(ret != -EBUSY))
+ lcb_cache[i].val = val;
+ }
+}
+
+static int read_lcb_cache(u32 off, u64 *val)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
+ if (lcb_cache[i].off == off) {
+ *val = lcb_cache[i].val;
+ return 0;
+ }
+ }
+
+ pr_warn("%s bad offset 0x%x\n", __func__, off);
+ return -1;
+}
+
+/*
* Read an LCB CSR. Access may not be in host control, so check.
* Return 0 on success, -EBUSY on failure.
*/
@@ -8352,9 +8462,13 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
/* if up, go through the 8051 for the value */
if (ppd->host_link_state & HLS_UP)
return read_lcb_via_8051(dd, addr, data);
- /* if going up or down, no access */
- if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
- return -EBUSY;
+ /* if going up or down, check the cache, otherwise, no access */
+ if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) {
+ if (read_lcb_cache(addr, data))
+ return -EBUSY;
+ return 0;
+ }
+
/* otherwise, host has access */
*data = read_csr(dd, addr);
return 0;
@@ -8369,7 +8483,7 @@ static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
int ret;
if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR ||
- (dd->dc8051_ver < dc8051_ver(0, 20))) {
+ (dd->dc8051_ver < dc8051_ver(0, 20, 0))) {
if (acquire_lcb_access(dd, 0) == 0) {
write_csr(dd, addr, data);
release_lcb_access(dd, 0);
@@ -8418,16 +8532,11 @@ static int do_8051_command(
{
u64 reg, completed;
int return_code;
- unsigned long flags;
unsigned long timeout;
hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
- /*
- * Alternative to holding the lock for a long time:
- * - keep busy wait - have other users bounce off
- */
- spin_lock_irqsave(&dd->dc8051_lock, flags);
+ mutex_lock(&dd->dc8051_lock);
/* We can't send any commands to the 8051 if it's in reset */
if (dd->dc_shutdown) {
@@ -8453,10 +8562,8 @@ static int do_8051_command(
return_code = -ENXIO;
goto fail;
}
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
- dc_shutdown(dd);
- dc_start(dd);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
+ _dc_shutdown(dd);
+ _dc_start(dd);
}
/*
@@ -8537,8 +8644,7 @@ static int do_8051_command(
write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
fail:
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
-
+ mutex_unlock(&dd->dc8051_lock);
return return_code;
}
@@ -8675,13 +8781,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
& REMOTE_DEVICE_REV_MASK;
}
-void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
+void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
+ u8 *ver_patch)
{
u32 frame;
read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
- *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
- *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
+ *ver_major = (frame >> STS_FM_VERSION_MAJOR_SHIFT) &
+ STS_FM_VERSION_MAJOR_MASK;
+ *ver_minor = (frame >> STS_FM_VERSION_MINOR_SHIFT) &
+ STS_FM_VERSION_MINOR_MASK;
+
+ read_8051_config(dd, VERSION_PATCH, GENERAL_CONFIG, &frame);
+ *ver_patch = (frame >> STS_FM_VERSION_PATCH_SHIFT) &
+ STS_FM_VERSION_PATCH_MASK;
}
static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
@@ -8889,8 +9002,6 @@ int send_idle_sma(struct hfi1_devdata *dd, u64 message)
*/
static int do_quick_linkup(struct hfi1_devdata *dd)
{
- u64 reg;
- unsigned long timeout;
int ret;
lcb_shutdown(dd, 0);
@@ -8913,19 +9024,9 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
write_csr(dd, DC_LCB_CFG_RUN,
1ull << DC_LCB_CFG_RUN_EN_SHIFT);
- /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
- timeout = jiffies + msecs_to_jiffies(10);
- while (1) {
- reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
- if (reg)
- break;
- if (time_after(jiffies, timeout)) {
- dd_dev_err(dd,
- "timeout waiting for LINK_TRANSFER_ACTIVE\n");
- return -ETIMEDOUT;
- }
- udelay(2);
- }
+ ret = wait_link_transfer_active(dd, 10);
+ if (ret)
+ return ret;
write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
@@ -9089,7 +9190,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
if (ret)
goto set_local_link_attributes_fail;
- if (dd->dc8051_ver < dc8051_ver(0, 20)) {
+ if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* set the tx rate to the fastest enabled */
if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
ppd->local_tx_rate = 1;
@@ -9272,7 +9373,7 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
(qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
- dd_dev_info(dd, "%s: QSFP cable on fire\n",
+ dd_dev_info(dd, "%s: QSFP cable temperature too high\n",
__func__);
if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
@@ -9492,8 +9593,11 @@ static int test_qsfp_read(struct hfi1_pportdata *ppd)
int ret;
u8 status;
- /* report success if not a QSFP */
- if (ppd->port_type != PORT_TYPE_QSFP)
+ /*
+ * Report success if not a QSFP or, if it is a QSFP, but the cable is
+ * not present
+ */
+ if (ppd->port_type != PORT_TYPE_QSFP || !qsfp_mod_present(ppd))
return 0;
/* read byte 2, the status byte */
@@ -10080,6 +10184,64 @@ static void check_lni_states(struct hfi1_pportdata *ppd)
decode_state_complete(ppd, last_remote_state, "received");
}
+/* wait for wait_ms for LINK_TRANSFER_ACTIVE to go to 1 */
+static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms)
+{
+ u64 reg;
+ unsigned long timeout;
+
+ /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
+ timeout = jiffies + msecs_to_jiffies(wait_ms);
+ while (1) {
+ reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
+ if (reg)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(dd,
+ "timeout waiting for LINK_TRANSFER_ACTIVE\n");
+ return -ETIMEDOUT;
+ }
+ udelay(2);
+ }
+ return 0;
+}
+
+/* called when the logical link state is not down as it should be */
+static void force_logical_link_state_down(struct hfi1_pportdata *ppd)
+{
+ struct hfi1_devdata *dd = ppd->dd;
+
+ /*
+ * Bring link up in LCB loopback
+ */
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
+ write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
+ DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
+
+ write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
+ write_csr(dd, DC_LCB_CFG_REINIT_AS_SLAVE, 0);
+ write_csr(dd, DC_LCB_CFG_CNT_FOR_SKIP_STALL, 0x110);
+ write_csr(dd, DC_LCB_CFG_LOOPBACK, 0x2);
+
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
+ (void)read_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET);
+ udelay(3);
+ write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 1);
+ write_csr(dd, DC_LCB_CFG_RUN, 1ull << DC_LCB_CFG_RUN_EN_SHIFT);
+
+ wait_link_transfer_active(dd, 100);
+
+ /*
+ * Bring the link down again.
+ */
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
+ write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0);
+ write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0);
+
+ /* call again to adjust ppd->statusp, if needed */
+ get_logical_state(ppd);
+}
+
/*
* Helper for set_link_state(). Do not call except from that routine.
* Expects ppd->hls_mutex to be held.
@@ -10096,13 +10258,15 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
int do_transition;
int do_wait;
+ update_lcb_cache(dd);
+
previous_state = ppd->host_link_state;
ppd->host_link_state = HLS_GOING_OFFLINE;
pstate = read_physical_state(dd);
if (pstate == PLS_OFFLINE) {
do_transition = 0; /* in right state */
do_wait = 0; /* ...no need to wait */
- } else if ((pstate & 0xff) == PLS_OFFLINE) {
+ } else if ((pstate & 0xf0) == PLS_OFFLINE) {
do_transition = 0; /* in an offline transient state */
do_wait = 1; /* ...wait for it to settle */
} else {
@@ -10133,15 +10297,18 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
return ret;
}
- /* make sure the logical state is also down */
- wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
-
/*
* Now in charge of LCB - must be after the physical state is
* offline.quiet and before host_link_state is changed.
*/
set_host_lcb_access(dd);
write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
+
+ /* make sure the logical state is also down */
+ ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
+ if (ret)
+ force_logical_link_state_down(ppd);
+
ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
if (ppd->port_type == PORT_TYPE_QSFP &&
@@ -10378,11 +10545,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
goto unexpected;
}
- ppd->host_link_state = HLS_UP_INIT;
ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
if (ret) {
- /* logical state didn't change, stay at going_up */
- ppd->host_link_state = HLS_GOING_UP;
dd_dev_err(dd,
"%s: logical state did not change to INIT\n",
__func__);
@@ -10396,6 +10560,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
handle_linkup_change(dd, 1);
+ ppd->host_link_state = HLS_UP_INIT;
}
break;
case HLS_UP_ARMED:
@@ -10508,16 +10673,18 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
ppd->remote_link_down_reason = 0;
}
- ret1 = set_physical_link_state(dd, PLS_DISABLED);
- if (ret1 != HCMD_SUCCESS) {
- dd_dev_err(dd,
- "Failed to transition to Disabled link state, return 0x%x\n",
- ret1);
- ret = -EINVAL;
- break;
+ if (!dd->dc_shutdown) {
+ ret1 = set_physical_link_state(dd, PLS_DISABLED);
+ if (ret1 != HCMD_SUCCESS) {
+ dd_dev_err(dd,
+ "Failed to transition to Disabled link state, return 0x%x\n",
+ ret1);
+ ret = -EINVAL;
+ break;
+ }
+ dc_shutdown(dd);
}
ppd->host_link_state = HLS_DN_DISABLE;
- dc_shutdown(dd);
break;
case HLS_DN_OFFLINE:
if (ppd->host_link_state == HLS_DN_DISABLE)
@@ -11849,6 +12016,10 @@ static void free_cntrs(struct hfi1_devdata *dd)
dd->scntrs = NULL;
kfree(dd->cntrnames);
dd->cntrnames = NULL;
+ if (dd->update_cntr_wq) {
+ destroy_workqueue(dd->update_cntr_wq);
+ dd->update_cntr_wq = NULL;
+ }
}
static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
@@ -12004,7 +12175,7 @@ u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data)
return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data);
}
-static void update_synth_timer(unsigned long opaque)
+static void do_update_synth_timer(struct work_struct *work)
{
u64 cur_tx;
u64 cur_rx;
@@ -12013,8 +12184,8 @@ static void update_synth_timer(unsigned long opaque)
int i, j, vl;
struct hfi1_pportdata *ppd;
struct cntr_entry *entry;
-
- struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
+ struct hfi1_devdata *dd = container_of(work, struct hfi1_devdata,
+ update_cntr_work);
/*
* Rather than keep beating on the CSRs pick a minimal set that we can
@@ -12097,7 +12268,13 @@ static void update_synth_timer(unsigned long opaque)
} else {
hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
}
+}
+
+static void update_synth_timer(unsigned long opaque)
+{
+ struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
+ queue_work(dd->update_cntr_wq, &dd->update_cntr_work);
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
}
@@ -12333,6 +12510,13 @@ static int init_cntrs(struct hfi1_devdata *dd)
if (init_cpu_counters(dd))
goto bail;
+ dd->update_cntr_wq = alloc_ordered_workqueue("hfi1_update_cntr_%d",
+ WQ_MEM_RECLAIM, dd->unit);
+ if (!dd->update_cntr_wq)
+ goto bail;
+
+ INIT_WORK(&dd->update_cntr_work, do_update_synth_timer);
+
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
return 0;
bail:
@@ -12511,7 +12695,7 @@ u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
#define SET_STATIC_RATE_CONTROL_SMASK(r) \
(r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
-int hfi1_init_ctxt(struct send_context *sc)
+void hfi1_init_ctxt(struct send_context *sc)
{
if (sc) {
struct hfi1_devdata *dd = sc->dd;
@@ -12528,7 +12712,6 @@ int hfi1_init_ctxt(struct send_context *sc)
write_kctxt_csr(dd, sc->hw_context,
SEND_CTXT_CHECK_ENABLE, reg);
}
- return 0;
}
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
@@ -12664,7 +12847,12 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
/* clear from the handled mask of the general interrupt */
m = isrc / 64;
n = isrc % 64;
- dd->gi_mask[m] &= ~((u64)1 << n);
+ if (likely(m < CCE_NUM_INT_CSRS)) {
+ dd->gi_mask[m] &= ~((u64)1 << n);
+ } else {
+ dd_dev_err(dd, "remap interrupt err\n");
+ return;
+ }
/* direct the chip source to the given MSI-X interrupt */
m = isrc / 8;
@@ -12722,7 +12910,10 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
first_sdma = last_general;
last_sdma = first_sdma + dd->num_sdma;
first_rx = last_sdma;
- last_rx = first_rx + dd->n_krcv_queues;
+ last_rx = first_rx + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
+
+ /* VNIC MSIx interrupts get mapped when VNIC contexts are created */
+ dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
/*
* Sanity check - the code expects all SDMA chip source
@@ -12736,7 +12927,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
const char *err_info;
irq_handler_t handler;
irq_handler_t thread = NULL;
- void *arg;
+ void *arg = NULL;
int idx;
struct hfi1_ctxtdata *rcd = NULL;
struct sdma_engine *sde = NULL;
@@ -12763,24 +12954,25 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
} else if (first_rx <= i && i < last_rx) {
idx = i - first_rx;
rcd = dd->rcd[idx];
- /* no interrupt if no rcd */
- if (!rcd)
- continue;
- /*
- * Set the interrupt register and mask for this
- * context's interrupt.
- */
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
- rcd->imask = ((u64)1) <<
- ((IS_RCVAVAIL_START + idx) % 64);
- handler = receive_context_interrupt;
- thread = receive_context_thread;
- arg = rcd;
- snprintf(me->name, sizeof(me->name),
- DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
- err_info = "receive context";
- remap_intr(dd, IS_RCVAVAIL_START + idx, i);
- me->type = IRQ_RCVCTXT;
+ if (rcd) {
+ /*
+ * Set the interrupt register and mask for this
+ * context's interrupt.
+ */
+ rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
+ rcd->imask = ((u64)1) <<
+ ((IS_RCVAVAIL_START + idx) % 64);
+ handler = receive_context_interrupt;
+ thread = receive_context_thread;
+ arg = rcd;
+ snprintf(me->name, sizeof(me->name),
+ DRIVER_NAME "_%d kctxt%d",
+ dd->unit, idx);
+ err_info = "receive context";
+ remap_intr(dd, IS_RCVAVAIL_START + idx, i);
+ me->type = IRQ_RCVCTXT;
+ rcd->msix_intr = i;
+ }
} else {
/* not in our expected range - complain, then
* ignore it
@@ -12818,6 +13010,84 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
return ret;
}
+void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
+{
+ int i;
+
+ if (!dd->num_msix_entries) {
+ synchronize_irq(dd->pcidev->irq);
+ return;
+ }
+
+ for (i = 0; i < dd->vnic.num_ctxt; i++) {
+ struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
+ struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
+
+ synchronize_irq(me->msix.vector);
+ }
+}
+
+void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
+
+ if (!me->arg) /* => no irq, no affinity */
+ return;
+
+ hfi1_put_irq_affinity(dd, me);
+ free_irq(me->msix.vector, me->arg);
+
+ me->arg = NULL;
+}
+
+void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ struct hfi1_msix_entry *me;
+ int idx = rcd->ctxt;
+ void *arg = rcd;
+ int ret;
+
+ rcd->msix_intr = dd->vnic.msix_idx++;
+ me = &dd->msix_entries[rcd->msix_intr];
+
+ /*
+ * Set the interrupt register and mask for this
+ * context's interrupt.
+ */
+ rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
+ rcd->imask = ((u64)1) <<
+ ((IS_RCVAVAIL_START + idx) % 64);
+
+ snprintf(me->name, sizeof(me->name),
+ DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
+ me->name[sizeof(me->name) - 1] = 0;
+ me->type = IRQ_RCVCTXT;
+
+ remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
+
+ ret = request_threaded_irq(me->msix.vector, receive_context_interrupt,
+ receive_context_thread, 0, me->name, arg);
+ if (ret) {
+ dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n",
+ me->msix.vector, idx, ret);
+ return;
+ }
+ /*
+ * assign arg after request_irq call, so it will be
+ * cleaned up
+ */
+ me->arg = arg;
+
+ ret = hfi1_get_irq_affinity(dd, me);
+ if (ret) {
+ dd_dev_err(dd,
+ "unable to pin IRQ %d\n", ret);
+ free_irq(me->msix.vector, me->arg);
+ }
+}
+
/*
* Set the general handler to accept all interrupts, remap all
* chip interrupts back to MSI-X 0.
@@ -12849,7 +13119,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
* N interrupts - one per used SDMA engine
* M interrupt - one per kernel receive context
*/
- total = 1 + dd->num_sdma + dd->n_krcv_queues;
+ total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
if (!entries) {
@@ -12914,7 +13184,8 @@ fail:
*
* num_rcv_contexts - number of contexts being used
* n_krcv_queues - number of kernel contexts
- * first_user_ctxt - first non-kernel context in array of contexts
+ * first_dyn_alloc_ctxt - first dynamically allocated context
+ * in array of contexts
* freectxts - number of free user contexts
* num_send_contexts - number of PIO send contexts being used
*/
@@ -12991,10 +13262,14 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
total_contexts = num_kernel_contexts + num_user_contexts;
}
- /* the first N are kernel contexts, the rest are user contexts */
+ /* Accommodate VNIC contexts */
+ if ((total_contexts + HFI1_NUM_VNIC_CTXT) <= dd->chip_rcv_contexts)
+ total_contexts += HFI1_NUM_VNIC_CTXT;
+
+ /* the first N are kernel contexts, the rest are user/vnic contexts */
dd->num_rcv_contexts = total_contexts;
dd->n_krcv_queues = num_kernel_contexts;
- dd->first_user_ctxt = num_kernel_contexts;
+ dd->first_dyn_alloc_ctxt = num_kernel_contexts;
dd->num_user_contexts = num_user_contexts;
dd->freectxts = num_user_contexts;
dd_dev_info(dd,
@@ -13450,11 +13725,8 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd)
write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0);
for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++)
write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0);
- for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) {
- write_csr(dd, RCV_RSM_CFG + (8 * i), 0);
- write_csr(dd, RCV_RSM_SELECT + (8 * i), 0);
- write_csr(dd, RCV_RSM_MATCH + (8 * i), 0);
- }
+ for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++)
+ clear_rsm_rule(dd, i);
for (i = 0; i < 32; i++)
write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0);
@@ -13606,14 +13878,14 @@ static void init_chip(struct hfi1_devdata *dd)
dd_dev_info(dd, "Resetting CSRs with FLR\n");
/* do the FLR, the DC reset will remain */
- hfi1_pcie_flr(dd);
+ pcie_flr(dd->pcidev);
/* restore command and BARs */
restore_pci_variables(dd);
if (is_ax(dd)) {
dd_dev_info(dd, "Resetting CSRs with FLR\n");
- hfi1_pcie_flr(dd);
+ pcie_flr(dd->pcidev);
restore_pci_variables(dd);
}
} else {
@@ -13813,6 +14085,16 @@ static void add_rsm_rule(struct hfi1_devdata *dd, u8 rule_index,
(u64)rrd->value2 << RCV_RSM_MATCH_VALUE2_SHIFT);
}
+/*
+ * Clear a receive side mapping rule.
+ */
+static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
+{
+ write_csr(dd, RCV_RSM_CFG + (8 * rule_index), 0);
+ write_csr(dd, RCV_RSM_SELECT + (8 * rule_index), 0);
+ write_csr(dd, RCV_RSM_MATCH + (8 * rule_index), 0);
+}
+
/* return the number of RSM map table entries that will be used for QOS */
static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
unsigned int *np)
@@ -13928,7 +14210,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
rrd.value2 = LRH_SC_VALUE;
/* add rule 0 */
- add_rsm_rule(dd, 0, &rrd);
+ add_rsm_rule(dd, RSM_INS_VERBS, &rrd);
/* mark RSM map entries as used */
rmt->used += rmt_entries;
@@ -13958,7 +14240,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
/*
* RSM will extract the destination context as an index into the
* map table. The destination contexts are a sequential block
- * in the range first_user_ctxt...num_rcv_contexts-1 (inclusive).
+ * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive).
* Map entries are accessed as offset + extracted value. Adjust
* the added offset so this sequence can be placed anywhere in
* the table - as long as the entries themselves do not wrap.
@@ -13966,9 +14248,9 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
* start with that to allow for a "negative" offset.
*/
offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used -
- (int)dd->first_user_ctxt);
+ (int)dd->first_dyn_alloc_ctxt);
- for (i = dd->first_user_ctxt, idx = rmt->used;
+ for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used;
i < dd->num_rcv_contexts; i++, idx++) {
/* replace with identity mapping */
regoff = (idx % 8) * 8;
@@ -14002,11 +14284,84 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
rrd.value2 = 1;
/* add rule 1 */
- add_rsm_rule(dd, 1, &rrd);
+ add_rsm_rule(dd, RSM_INS_FECN, &rrd);
rmt->used += dd->num_user_contexts;
}
+/* Initialize RSM for VNIC */
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
+{
+ u8 i, j;
+ u8 ctx_id = 0;
+ u64 reg;
+ u32 regoff;
+ struct rsm_rule_data rrd;
+
+ if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) {
+ dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n",
+ dd->vnic.rmt_start);
+ return;
+ }
+
+ dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n",
+ dd->vnic.rmt_start,
+ dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES);
+
+ /* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */
+ regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8;
+ reg = read_csr(dd, regoff);
+ for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) {
+ /* Update map register with vnic context */
+ j = (dd->vnic.rmt_start + i) % 8;
+ reg &= ~(0xffllu << (j * 8));
+ reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8);
+ /* Wrap up vnic ctx index */
+ ctx_id %= dd->vnic.num_ctxt;
+ /* Write back map register */
+ if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) {
+ dev_dbg(&(dd)->pcidev->dev,
+ "Vnic rsm map reg[%d] =0x%llx\n",
+ regoff - RCV_RSM_MAP_TABLE, reg);
+
+ write_csr(dd, regoff, reg);
+ regoff += 8;
+ if (i < (NUM_VNIC_MAP_ENTRIES - 1))
+ reg = read_csr(dd, regoff);
+ }
+ }
+
+ /* Add rule for vnic */
+ rrd.offset = dd->vnic.rmt_start;
+ rrd.pkt_type = 4;
+ /* Match 16B packets */
+ rrd.field1_off = L2_TYPE_MATCH_OFFSET;
+ rrd.mask1 = L2_TYPE_MASK;
+ rrd.value1 = L2_16B_VALUE;
+ /* Match ETH L4 packets */
+ rrd.field2_off = L4_TYPE_MATCH_OFFSET;
+ rrd.mask2 = L4_16B_TYPE_MASK;
+ rrd.value2 = L4_16B_ETH_VALUE;
+ /* Calc context from veswid and entropy */
+ rrd.index1_off = L4_16B_HDR_VESWID_OFFSET;
+ rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES);
+ rrd.index2_off = L2_16B_ENTROPY_OFFSET;
+ rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES);
+ add_rsm_rule(dd, RSM_INS_VNIC, &rrd);
+
+ /* Enable RSM if not already enabled */
+ add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+}
+
+void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd)
+{
+ clear_rsm_rule(dd, RSM_INS_VNIC);
+
+ /* Disable RSM if used only by vnic */
+ if (dd->vnic.rmt_start == 0)
+ clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+}
+
static void init_rxe(struct hfi1_devdata *dd)
{
struct rsm_map_table *rmt;
@@ -14019,6 +14374,8 @@ static void init_rxe(struct hfi1_devdata *dd)
init_qos(dd, rmt);
init_user_fecn_handling(dd, rmt);
complete_rsm_map_table(dd, rmt);
+ /* record number of used rsm map entries for vnic */
+ dd->vnic.rmt_start = rmt->used;
kfree(rmt);
/*
@@ -14208,30 +14565,24 @@ done:
return ret;
}
-int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
+int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt)
{
- struct hfi1_ctxtdata *rcd;
- unsigned sctxt;
- int ret = 0;
+ u8 hw_ctxt;
u64 reg;
- if (ctxt < dd->num_rcv_contexts) {
- rcd = dd->rcd[ctxt];
- } else {
- ret = -EINVAL;
- goto done;
- }
- if (!rcd || !rcd->sc) {
- ret = -EINVAL;
- goto done;
- }
- sctxt = rcd->sc->hw_context;
- reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
+ if (!ctxt || !ctxt->sc)
+ return -EINVAL;
+
+ if (ctxt->ctxt >= dd->num_rcv_contexts)
+ return -EINVAL;
+
+ hw_ctxt = ctxt->sc->hw_context;
+ reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
-done:
- return ret;
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg);
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
+
+ return 0;
}
/*
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 043fd21dc5f3..cbe455d9ab8b 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -1,7 +1,7 @@
#ifndef _CHIP_H
#define _CHIP_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -394,7 +394,8 @@
#define LAST_REMOTE_STATE_COMPLETE 0x13
#define LINK_QUALITY_INFO 0x14
#define REMOTE_DEVICE_ID 0x15
-#define LINK_DOWN_REASON 0x16
+#define LINK_DOWN_REASON 0x16 /* first byte of offset 0x16 */
+#define VERSION_PATCH 0x16 /* last byte of offset 0x16 */
/* 8051 lane specific register field IDs */
#define TX_EQ_SETTINGS 0x00
@@ -524,10 +525,12 @@ enum {
#define SUPPORTED_CRCS (CAP_CRC_14B | CAP_CRC_48B)
/* misc status version fields */
-#define STS_FM_VERSION_A_SHIFT 16
-#define STS_FM_VERSION_A_MASK 0xff
-#define STS_FM_VERSION_B_SHIFT 24
-#define STS_FM_VERSION_B_MASK 0xff
+#define STS_FM_VERSION_MINOR_SHIFT 16
+#define STS_FM_VERSION_MINOR_MASK 0xff
+#define STS_FM_VERSION_MAJOR_SHIFT 24
+#define STS_FM_VERSION_MAJOR_MASK 0xff
+#define STS_FM_VERSION_PATCH_SHIFT 24
+#define STS_FM_VERSION_PATCH_MASK 0xff
/* LCB_CFG_CRC_MODE TX_VAL and RX_VAL CRC mode values */
#define LCB_CRC_16B 0x0 /* 16b CRC */
@@ -633,7 +636,8 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt,
write_csr(dd, offset0 + (0x1000 * ctxt), value);
}
-u64 create_pbc(struct hfi1_pportdata *ppd, u64, int, u32, u32);
+u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
+ u32 dw_len);
/* firmware.c */
#define SBUS_MASTER_BROADCAST 0xfd
@@ -698,7 +702,8 @@ void fabric_serdes_reset(struct hfi1_devdata *dd);
int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result);
/* chip.c */
-void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b);
+void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
+ u8 *ver_patch);
void read_guid(struct hfi1_devdata *dd);
int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout);
void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
@@ -724,7 +729,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd);
void set_intr_state(struct hfi1_devdata *dd, u32 enable);
void apply_link_downgrade_policy(struct hfi1_pportdata *ppd,
int refresh_widths);
-void update_usrhead(struct hfi1_ctxtdata *, u32, u32, u32, u32, u32);
+void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
+ u32 intr_adjust, u32 npkts);
int stop_drain_data_vls(struct hfi1_devdata *dd);
int open_fill_data_vls(struct hfi1_devdata *dd);
u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns);
@@ -1343,7 +1349,7 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd);
void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
struct ib_header *hfi1_get_msgheader(
struct hfi1_devdata *dd, __le32 *rhf_addr);
-int hfi1_init_ctxt(struct send_context *sc);
+void hfi1_init_ctxt(struct send_context *sc);
void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
u32 type, unsigned long pa, u16 order);
void hfi1_quiet_serdes(struct hfi1_pportdata *ppd);
@@ -1356,8 +1362,10 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val);
int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey);
int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt);
int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey);
-int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt);
+int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt);
void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
+void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
/*
* Interrupt source table.
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index 5bfa839d1c48..793514f1d15f 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -839,7 +839,9 @@
#define SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK 0x8ull
#define SEND_CM_CTRL_RESETCSR 0x0000000000000020ull
#define SEND_CM_GLOBAL_CREDIT (TXE + 0x000000000508)
+#define SEND_CM_GLOBAL_CREDIT_AU_MASK 0x7ull
#define SEND_CM_GLOBAL_CREDIT_AU_SHIFT 16
+#define SEND_CM_GLOBAL_CREDIT_AU_SMASK 0x70000ull
#define SEND_CM_GLOBAL_CREDIT_RESETCSR 0x0000094000030000ull
#define SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK 0xFFFFull
#define SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT 0
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index da7be21bedb4..995d62c7f9a7 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -331,16 +331,15 @@ struct diag_pkt {
#define FULL_MGMT_P_KEY 0xFFFF
#define DEFAULT_P_KEY LIM_MGMT_P_KEY
-#define HFI1_AETH_CREDIT_SHIFT 24
-#define HFI1_AETH_CREDIT_MASK 0x1F
-#define HFI1_AETH_CREDIT_INVAL 0x1F
-#define HFI1_MSN_MASK 0xFFFFFF
-#define HFI1_FECN_SHIFT 31
-#define HFI1_FECN_MASK 1
-#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT)
-#define HFI1_BECN_SHIFT 30
-#define HFI1_BECN_MASK 1
-#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT)
+
+/**
+ * 0xF8 - 4 bits of multicast range and 1 bit for collective range
+ * Example: For 24 bit LID space,
+ * Multicast range: 0xF00000 to 0xF7FFFF
+ * Collective range: 0xF80000 to 0xFFFFFE
+ */
+#define HFI1_MCAST_NR 0x4 /* Number of top bits set */
+#define HFI1_COLLECTIVE_NR 0x1 /* Number of bits after MCAST_NR */
#define HFI1_PSM_IOC_BASE_SEQ 0x0
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 8725f4c086cf..e9fa3c293e42 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -1,6 +1,6 @@
#ifdef CONFIG_DEBUG_FS
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -50,8 +50,13 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ratelimit.h>
+#include <linux/fault-inject.h>
#include "hfi.h"
+#include "trace.h"
#include "debugfs.h"
#include "device.h"
#include "qp.h"
@@ -169,7 +174,7 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v)
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
- for (j = 0; j < dd->first_user_ctxt; j++) {
+ for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
if (!dd->rcd[j])
continue;
n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
@@ -195,7 +200,7 @@ static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos)
if (!*pos)
return SEQ_START_TOKEN;
- if (*pos >= dd->first_user_ctxt)
+ if (*pos >= dd->first_dyn_alloc_ctxt)
return NULL;
return pos;
}
@@ -209,7 +214,7 @@ static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
return pos;
++*pos;
- if (*pos >= dd->first_user_ctxt)
+ if (*pos >= dd->first_dyn_alloc_ctxt)
return NULL;
return pos;
}
@@ -503,18 +508,11 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
ppd = private2ppd(file);
dd = ppd->dd;
- buff = kmalloc(count + 1, GFP_KERNEL);
- if (!buff)
- return -ENOMEM;
-
- ret = copy_from_user(buff, buf, count);
- if (ret > 0) {
- ret = -EFAULT;
- goto do_free;
- }
-
/* zero terminate and read the expected integer */
- buff[count] = 0;
+ buff = memdup_user_nul(buf, count);
+ if (IS_ERR(buff))
+ return PTR_ERR(buff);
+
ret = kstrtoull(buff, 0, &value);
if (ret)
goto do_free;
@@ -692,15 +690,9 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
if (i2c_addr == 0)
return -EINVAL;
- buff = kmalloc(count, GFP_KERNEL);
- if (!buff)
- return -ENOMEM;
-
- ret = copy_from_user(buff, buf, count);
- if (ret > 0) {
- ret = -EFAULT;
- goto _free;
- }
+ buff = memdup_user(buf, count);
+ if (IS_ERR(buff))
+ return PTR_ERR(buff);
total_written = i2c_write(ppd, target, i2c_addr, offset, buff, count);
if (total_written < 0) {
@@ -805,15 +797,10 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
ppd = private2ppd(file);
- buff = kmalloc(count, GFP_KERNEL);
- if (!buff)
- return -ENOMEM;
+ buff = memdup_user(buf, count);
+ if (IS_ERR(buff))
+ return PTR_ERR(buff);
- ret = copy_from_user(buff, buf, count);
- if (ret > 0) {
- ret = -EFAULT;
- goto _free;
- }
total_written = qsfp_write(ppd, target, *ppos, buff, count);
if (total_written < 0) {
ret = total_written;
@@ -1080,6 +1067,222 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
DEBUGFS_FILE_OPS(sdma_cpu_list);
+#ifdef CONFIG_FAULT_INJECTION
+static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ ++*pos;
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void _fault_stats_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _fault_stats_seq_show(struct seq_file *s, void *v)
+{
+ loff_t *spos = v;
+ loff_t i = *spos, j;
+ u64 n_packets = 0, n_bytes = 0;
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+
+ for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
+ if (!dd->rcd[j])
+ continue;
+ n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
+ n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
+ }
+ if (!n_packets && !n_bytes)
+ return SEQ_SKIP;
+ if (!ibd->fault_opcode->n_rxfaults[i] &&
+ !ibd->fault_opcode->n_txfaults[i])
+ return SEQ_SKIP;
+ seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
+ (unsigned long long)n_packets,
+ (unsigned long long)n_bytes,
+ (unsigned long long)ibd->fault_opcode->n_rxfaults[i],
+ (unsigned long long)ibd->fault_opcode->n_txfaults[i]);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(fault_stats);
+DEBUGFS_SEQ_FILE_OPEN(fault_stats);
+DEBUGFS_FILE_OPS(fault_stats);
+
+static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd)
+{
+ debugfs_remove_recursive(ibd->fault_opcode->dir);
+ kfree(ibd->fault_opcode);
+ ibd->fault_opcode = NULL;
+}
+
+static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd)
+{
+ struct dentry *parent = ibd->hfi1_ibdev_dbg;
+
+ ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL);
+ if (!ibd->fault_opcode)
+ return -ENOMEM;
+
+ ibd->fault_opcode->attr.interval = 1;
+ ibd->fault_opcode->attr.require_end = ULONG_MAX;
+ ibd->fault_opcode->attr.stacktrace_depth = 32;
+ ibd->fault_opcode->attr.dname = NULL;
+ ibd->fault_opcode->attr.verbose = 0;
+ ibd->fault_opcode->fault_by_opcode = false;
+ ibd->fault_opcode->opcode = 0;
+ ibd->fault_opcode->mask = 0xff;
+
+ ibd->fault_opcode->dir =
+ fault_create_debugfs_attr("fault_opcode",
+ parent,
+ &ibd->fault_opcode->attr);
+ if (IS_ERR(ibd->fault_opcode->dir)) {
+ kfree(ibd->fault_opcode);
+ return -ENOENT;
+ }
+
+ DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd);
+ if (!debugfs_create_bool("fault_by_opcode", 0600,
+ ibd->fault_opcode->dir,
+ &ibd->fault_opcode->fault_by_opcode))
+ goto fail;
+ if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir,
+ &ibd->fault_opcode->opcode))
+ goto fail;
+ if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir,
+ &ibd->fault_opcode->mask))
+ goto fail;
+
+ return 0;
+fail:
+ fault_exit_opcode_debugfs(ibd);
+ return -ENOMEM;
+}
+
+static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd)
+{
+ debugfs_remove_recursive(ibd->fault_packet->dir);
+ kfree(ibd->fault_packet);
+ ibd->fault_packet = NULL;
+}
+
+static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd)
+{
+ struct dentry *parent = ibd->hfi1_ibdev_dbg;
+
+ ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL);
+ if (!ibd->fault_packet)
+ return -ENOMEM;
+
+ ibd->fault_packet->attr.interval = 1;
+ ibd->fault_packet->attr.require_end = ULONG_MAX;
+ ibd->fault_packet->attr.stacktrace_depth = 32;
+ ibd->fault_packet->attr.dname = NULL;
+ ibd->fault_packet->attr.verbose = 0;
+ ibd->fault_packet->fault_by_packet = false;
+
+ ibd->fault_packet->dir =
+ fault_create_debugfs_attr("fault_packet",
+ parent,
+ &ibd->fault_opcode->attr);
+ if (IS_ERR(ibd->fault_packet->dir)) {
+ kfree(ibd->fault_packet);
+ return -ENOENT;
+ }
+
+ if (!debugfs_create_bool("fault_by_packet", 0600,
+ ibd->fault_packet->dir,
+ &ibd->fault_packet->fault_by_packet))
+ goto fail;
+ if (!debugfs_create_u64("fault_stats", 0400,
+ ibd->fault_packet->dir,
+ &ibd->fault_packet->n_faults))
+ goto fail;
+
+ return 0;
+fail:
+ fault_exit_packet_debugfs(ibd);
+ return -ENOMEM;
+}
+
+static void fault_exit_debugfs(struct hfi1_ibdev *ibd)
+{
+ fault_exit_opcode_debugfs(ibd);
+ fault_exit_packet_debugfs(ibd);
+}
+
+static int fault_init_debugfs(struct hfi1_ibdev *ibd)
+{
+ int ret = 0;
+
+ ret = fault_init_opcode_debugfs(ibd);
+ if (ret)
+ return ret;
+
+ ret = fault_init_packet_debugfs(ibd);
+ if (ret)
+ fault_exit_opcode_debugfs(ibd);
+
+ return ret;
+}
+
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return ibd->fault_suppress_err;
+}
+
+bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
+{
+ bool ret = false;
+ struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
+
+ if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode)
+ return false;
+ if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask))
+ return false;
+ ret = should_fail(&ibd->fault_opcode->attr, 1);
+ if (ret) {
+ trace_hfi1_fault_opcode(qp, opcode);
+ if (rx)
+ ibd->fault_opcode->n_rxfaults[opcode]++;
+ else
+ ibd->fault_opcode->n_txfaults[opcode]++;
+ }
+ return ret;
+}
+
+bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
+{
+ struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi;
+ struct hfi1_ibdev *ibd = dev_from_rdi(rdi);
+ bool ret = false;
+
+ if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet)
+ return false;
+
+ ret = should_fail(&ibd->fault_packet->attr, 1);
+ if (ret) {
+ ++ibd->fault_packet->n_faults;
+ trace_hfi1_fault_packet(packet);
+ }
+ return ret;
+}
+#endif
+
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
char name[sizeof("port0counters") + 1];
@@ -1129,12 +1332,22 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
!port_cntr_ops[i].ops.write ?
S_IRUGO : S_IRUGO | S_IWUSR);
}
+
+#ifdef CONFIG_FAULT_INJECTION
+ debugfs_create_bool("fault_suppress_err", 0600,
+ ibd->hfi1_ibdev_dbg,
+ &ibd->fault_suppress_err);
+ fault_init_debugfs(ibd);
+#endif
}
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
if (!hfi1_dbg_root)
goto out;
+#ifdef CONFIG_FAULT_INJECTION
+ fault_exit_debugfs(ibd);
+#endif
debugfs_remove(ibd->hfi1_ibdev_link);
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:
diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index b6fb6814f1b8..38c38a98156d 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
@@ -53,23 +53,79 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
void hfi1_dbg_init(void);
void hfi1_dbg_exit(void);
+
+#ifdef CONFIG_FAULT_INJECTION
+#include <linux/fault-inject.h>
+struct fault_opcode {
+ struct fault_attr attr;
+ struct dentry *dir;
+ bool fault_by_opcode;
+ u64 n_rxfaults[256];
+ u64 n_txfaults[256];
+ u8 opcode;
+ u8 mask;
+};
+
+struct fault_packet {
+ struct fault_attr attr;
+ struct dentry *dir;
+ bool fault_by_packet;
+ u64 n_faults;
+};
+
+bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
+bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
+#else
+static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
+ u32 opcode, bool rx)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return false;
+}
+#endif
+
#else
static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
}
-void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
+static inline void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
+{
+}
+
+static inline void hfi1_dbg_init(void)
{
}
-void hfi1_dbg_init(void)
+static inline void hfi1_dbg_exit(void)
{
}
-void hfi1_dbg_exit(void)
+static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
+ return false;
}
+static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
+ u32 opcode, bool rx)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return false;
+}
#endif
#endif /* _HFI1_DEBUGFS_H */
diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c
index bf64b5a7bfd7..bbb6069dec2a 100644
--- a/drivers/infiniband/hw/hfi1/device.c
+++ b/drivers/infiniband/hw/hfi1/device.c
@@ -69,7 +69,7 @@ int hfi1_cdev_init(int minor, const char *name,
cdev_init(cdev, fops);
cdev->owner = THIS_MODULE;
- cdev->kobj.parent = parent;
+ cdev_set_parent(cdev, parent);
kobject_set_name(&cdev->kobj, name);
ret = cdev_add(cdev, dev, 1);
diff --git a/drivers/infiniband/hw/hfi1/dma.c b/drivers/infiniband/hw/hfi1/dma.c
deleted file mode 100644
index 7e8dab892848..000000000000
--- a/drivers/infiniband/hw/hfi1/dma.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-#include <linux/types.h>
-#include <linux/scatterlist.h>
-
-#include "verbs.h"
-
-#define BAD_DMA_ADDRESS ((u64)0)
-
-/*
- * The following functions implement driver specific replacements
- * for the ib_dma_*() functions.
- *
- * These functions return kernel virtual addresses instead of
- * device bus addresses since the driver uses the CPU to copy
- * data instead of using hardware DMA.
- */
-
-static int hfi1_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
- return dma_addr == BAD_DMA_ADDRESS;
-}
-
-static u64 hfi1_dma_map_single(struct ib_device *dev, void *cpu_addr,
- size_t size, enum dma_data_direction direction)
-{
- if (WARN_ON(!valid_dma_direction(direction)))
- return BAD_DMA_ADDRESS;
-
- return (u64)cpu_addr;
-}
-
-static void hfi1_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- /* This is a stub, nothing to be done here */
-}
-
-static u64 hfi1_dma_map_page(struct ib_device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction)
-{
- u64 addr;
-
- if (WARN_ON(!valid_dma_direction(direction)))
- return BAD_DMA_ADDRESS;
-
- if (offset + size > PAGE_SIZE)
- return BAD_DMA_ADDRESS;
-
- addr = (u64)page_address(page);
- if (addr)
- addr += offset;
-
- return addr;
-}
-
-static void hfi1_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- /* This is a stub, nothing to be done here */
-}
-
-static int hfi1_map_sg(struct ib_device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction)
-{
- struct scatterlist *sg;
- u64 addr;
- int i;
- int ret = nents;
-
- if (WARN_ON(!valid_dma_direction(direction)))
- return BAD_DMA_ADDRESS;
-
- for_each_sg(sgl, sg, nents, i) {
- addr = (u64)page_address(sg_page(sg));
- if (!addr) {
- ret = 0;
- break;
- }
- sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
- sg->dma_length = sg->length;
-#endif
- }
- return ret;
-}
-
-static void hfi1_unmap_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- /* This is a stub, nothing to be done here */
-}
-
-static void hfi1_sync_single_for_cpu(struct ib_device *dev, u64 addr,
- size_t size, enum dma_data_direction dir)
-{
-}
-
-static void hfi1_sync_single_for_device(struct ib_device *dev, u64 addr,
- size_t size,
- enum dma_data_direction dir)
-{
-}
-
-static void *hfi1_dma_alloc_coherent(struct ib_device *dev, size_t size,
- u64 *dma_handle, gfp_t flag)
-{
- struct page *p;
- void *addr = NULL;
-
- p = alloc_pages(flag, get_order(size));
- if (p)
- addr = page_address(p);
- if (dma_handle)
- *dma_handle = (u64)addr;
- return addr;
-}
-
-static void hfi1_dma_free_coherent(struct ib_device *dev, size_t size,
- void *cpu_addr, u64 dma_handle)
-{
- free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops hfi1_dma_mapping_ops = {
- .mapping_error = hfi1_mapping_error,
- .map_single = hfi1_dma_map_single,
- .unmap_single = hfi1_dma_unmap_single,
- .map_page = hfi1_dma_map_page,
- .unmap_page = hfi1_dma_unmap_page,
- .map_sg = hfi1_map_sg,
- .unmap_sg = hfi1_unmap_sg,
- .sync_single_for_cpu = hfi1_sync_single_for_cpu,
- .sync_single_for_device = hfi1_sync_single_for_device,
- .alloc_coherent = hfi1_dma_alloc_coherent,
- .free_coherent = hfi1_dma_free_coherent
-};
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 4fbaee68012b..a50870e455a3 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -59,6 +59,8 @@
#include "trace.h"
#include "qp.h"
#include "sdma.h"
+#include "debugfs.h"
+#include "vnic.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -83,8 +85,8 @@ module_param_named(cu, hfi1_cu, uint, S_IRUGO);
MODULE_PARM_DESC(cu, "Credit return units");
unsigned long hfi1_cap_mask = HFI1_CAP_MASK_DEFAULT;
-static int hfi1_caps_set(const char *, const struct kernel_param *);
-static int hfi1_caps_get(char *, const struct kernel_param *);
+static int hfi1_caps_set(const char *val, const struct kernel_param *kp);
+static int hfi1_caps_get(char *buffer, const struct kernel_param *kp);
static const struct kernel_param_ops cap_ops = {
.set = hfi1_caps_set,
.get = hfi1_caps_get
@@ -100,6 +102,11 @@ MODULE_VERSION(HFI1_DRIVER_VERSION);
* MAX_PKT_RCV is the max # if packets processed per receive interrupt.
*/
#define MAX_PKT_RECV 64
+/*
+ * MAX_PKT_THREAD_RCV is the max # of packets processed before
+ * the qp_wait_list queue is flushed.
+ */
+#define MAX_PKT_RECV_THREAD (MAX_PKT_RECV * 4)
#define EGR_HEAD_UPDATE_THRESHOLD 16
struct hfi1_ib_stats hfi1_stats;
@@ -204,42 +211,6 @@ int hfi1_count_active_units(void)
}
/*
- * Return count of all units, optionally return in arguments
- * the number of usable (present) units, and the number of
- * ports that are up.
- */
-int hfi1_count_units(int *npresentp, int *nupp)
-{
- int nunits = 0, npresent = 0, nup = 0;
- struct hfi1_devdata *dd;
- unsigned long flags;
- int pidx;
- struct hfi1_pportdata *ppd;
-
- spin_lock_irqsave(&hfi1_devs_lock, flags);
-
- list_for_each_entry(dd, &hfi1_dev_list, list) {
- nunits++;
- if ((dd->flags & HFI1_PRESENT) && dd->kregbase)
- npresent++;
- for (pidx = 0; pidx < dd->num_pports; ++pidx) {
- ppd = dd->pport + pidx;
- if (ppd->lid && ppd->linkup)
- nup++;
- }
- }
-
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
-
- if (npresentp)
- *npresentp = npresent;
- if (nupp)
- *nupp = nup;
-
- return nunits;
-}
-
-/*
* Get address of eager buffer from it's index (allocated in chunks, not
* contiguous).
*/
@@ -259,7 +230,7 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf,
* allowed size ranges for the respective type and, optionally,
* return the proper encoding.
*/
-inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
+int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
{
if (unlikely(!PAGE_ALIGNED(size)))
return 0;
@@ -278,8 +249,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
{
struct ib_header *rhdr = packet->hdr;
u32 rte = rhf_rcv_type_err(packet->rhf);
- int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
- struct hfi1_ibport *ibp = &ppd->ibport_data;
+ int lnh = ib_get_lnh(rhdr);
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct hfi1_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
@@ -290,7 +261,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
/* For TIDERR and RC QPs preemptively schedule a NAK */
struct ib_other_headers *ohdr = NULL;
u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
- u16 lid = be16_to_cpu(rhdr->lrh[1]);
+ u16 lid = ib_get_dlid(rhdr);
u32 qp_num;
u32 rcv_flags = 0;
@@ -391,7 +362,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
u16 rlid;
u8 svc_type, sl, sc5;
- sc5 = hdr2sc(rhdr, packet->rhf);
+ sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
@@ -409,7 +380,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
svc_type = IB_CC_SVCTYPE_UD;
break;
case IB_QPT_UC:
- rlid = be16_to_cpu(rhdr->lrh[3]);
+ rlid = ib_get_slid(rhdr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
@@ -455,7 +426,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
struct ib_other_headers *ohdr = pkt->ohdr;
struct ib_grh *grh = NULL;
u32 rqpn = 0, bth1;
- u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]);
+ u16 rlid, dlid = ib_get_dlid(hdr);
u8 sc, svc_type;
bool is_mcast = false;
@@ -466,19 +437,19 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_UD:
- rlid = be16_to_cpu(hdr->lrh[3]);
+ rlid = ib_get_slid(hdr);
rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
svc_type = IB_CC_SVCTYPE_UD;
is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(dlid != be16_to_cpu(IB_LID_PERMISSIVE));
break;
case IB_QPT_UC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
case IB_QPT_RC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_RC;
break;
@@ -486,16 +457,16 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
return;
}
- sc = hdr2sc(hdr, pkt->rhf);
+ sc = hfi1_9B_get_sc5(hdr, pkt->rhf);
bth1 = be32_to_cpu(ohdr->bth[1]);
- if (do_cnp && (bth1 & HFI1_FECN_SMASK)) {
+ if (do_cnp && (bth1 & IB_FECN_SMASK)) {
u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh);
}
- if (!is_mcast && (bth1 & HFI1_BECN_SMASK)) {
+ if (!is_mcast && (bth1 & IB_BECN_SMASK)) {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 lqpn = bth1 & RVT_QPN_MASK;
u8 sl = ibp->sc_to_sl[sc];
@@ -594,7 +565,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
while (1) {
struct hfi1_devdata *dd = rcd->dd;
- struct hfi1_ibport *ibp = &rcd->ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
dd->rhf_offset;
struct rvt_qp *qp;
@@ -616,8 +587,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
hdr = packet->hdr;
-
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
@@ -629,7 +599,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
}
bth1 = be32_to_cpu(packet->ohdr->bth[1]);
- is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
+ is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK));
if (!is_ecn)
goto next;
@@ -647,31 +617,75 @@ static void __prescan_rxq(struct hfi1_packet *packet)
rcu_read_unlock();
/* turn off BECN, FECN */
- bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK);
+ bth1 &= ~(IB_FECN_SMASK | IB_BECN_SMASK);
packet->ohdr->bth[1] = cpu_to_be32(bth1);
next:
update_ps_mdata(&mdata, rcd);
}
}
-static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
+static void process_rcv_qp_work(struct hfi1_ctxtdata *rcd)
+{
+ struct rvt_qp *qp, *nqp;
+
+ /*
+ * Iterate over all QPs waiting to respond.
+ * The list won't change since the IRQ is only run on one CPU.
+ */
+ list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
+ list_del_init(&qp->rspwait);
+ if (qp->r_flags & RVT_R_RSP_NAK) {
+ qp->r_flags &= ~RVT_R_RSP_NAK;
+ hfi1_send_rc_ack(rcd, qp, 0);
+ }
+ if (qp->r_flags & RVT_R_RSP_SEND) {
+ unsigned long flags;
+
+ qp->r_flags &= ~RVT_R_RSP_SEND;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_rvt_state_ops[qp->state] &
+ RVT_PROCESS_OR_FLUSH_SEND)
+ hfi1_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ }
+ rvt_put_qp(qp);
+ }
+}
+
+static noinline int max_packet_exceeded(struct hfi1_packet *packet, int thread)
+{
+ if (thread) {
+ if ((packet->numpkt & (MAX_PKT_RECV_THREAD - 1)) == 0)
+ /* allow defered processing */
+ process_rcv_qp_work(packet->rcd);
+ cond_resched();
+ return RCV_PKT_OK;
+ } else {
+ this_cpu_inc(*packet->rcd->dd->rcv_limit);
+ return RCV_PKT_LIMIT;
+ }
+}
+
+static inline int check_max_packet(struct hfi1_packet *packet, int thread)
{
int ret = RCV_PKT_OK;
+ if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0))
+ ret = max_packet_exceeded(packet, thread);
+ return ret;
+}
+
+static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
+{
+ int ret;
+
/* Set up for the next packet */
packet->rhqoff += packet->rsize;
if (packet->rhqoff >= packet->maxcnt)
packet->rhqoff = 0;
packet->numpkt++;
- if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) {
- if (thread) {
- cond_resched();
- } else {
- ret = RCV_PKT_LIMIT;
- this_cpu_inc(*packet->rcd->dd->rcv_limit);
- }
- }
+ ret = check_max_packet(packet, thread);
packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
packet->rcd->dd->rhf_offset;
@@ -682,7 +696,7 @@ static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
{
- int ret = RCV_PKT_OK;
+ int ret;
packet->hdr = hfi1_get_msgheader(packet->rcd->dd,
packet->rhf_addr);
@@ -723,14 +737,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
if (packet->rhqoff >= packet->maxcnt)
packet->rhqoff = 0;
- if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) {
- if (thread) {
- cond_resched();
- } else {
- ret = RCV_PKT_LIMIT;
- this_cpu_inc(*packet->rcd->dd->rcv_limit);
- }
- }
+ ret = check_max_packet(packet, thread);
packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
packet->rcd->dd->rhf_offset;
@@ -767,38 +774,6 @@ static inline void finish_packet(struct hfi1_packet *packet)
packet->etail, rcv_intr_dynamic, packet->numpkt);
}
-static inline void process_rcv_qp_work(struct hfi1_packet *packet)
-{
- struct hfi1_ctxtdata *rcd;
- struct rvt_qp *qp, *nqp;
-
- rcd = packet->rcd;
- rcd->head = packet->rhqoff;
-
- /*
- * Iterate over all QPs waiting to respond.
- * The list won't change since the IRQ is only run on one CPU.
- */
- list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
- list_del_init(&qp->rspwait);
- if (qp->r_flags & RVT_R_RSP_NAK) {
- qp->r_flags &= ~RVT_R_RSP_NAK;
- hfi1_send_rc_ack(rcd, qp, 0);
- }
- if (qp->r_flags & RVT_R_RSP_SEND) {
- unsigned long flags;
-
- qp->r_flags &= ~RVT_R_RSP_SEND;
- spin_lock_irqsave(&qp->s_lock, flags);
- if (ib_rvt_state_ops[qp->state] &
- RVT_PROCESS_OR_FLUSH_SEND)
- hfi1_schedule_send(qp);
- spin_unlock_irqrestore(&qp->s_lock, flags);
- }
- rvt_put_qp(qp);
- }
-}
-
/*
* Handle receive interrupts when using the no dma rtail option.
*/
@@ -826,7 +801,8 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
last = RCV_PKT_DONE;
process_rcv_update(last, &packet);
}
- process_rcv_qp_work(&packet);
+ process_rcv_qp_work(rcd);
+ rcd->head = packet.rhqoff;
bail:
finish_packet(&packet);
return last;
@@ -854,26 +830,49 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
last = RCV_PKT_DONE;
process_rcv_update(last, &packet);
}
- process_rcv_qp_work(&packet);
+ process_rcv_qp_work(rcd);
+ rcd->head = packet.rhqoff;
bail:
finish_packet(&packet);
return last;
}
-static inline void set_all_nodma_rtail(struct hfi1_devdata *dd)
+static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt)
{
int i;
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
+ /*
+ * For dynamically allocated kernel contexts (like vnic) switch
+ * interrupt handler only for that context. Otherwise, switch
+ * interrupt handler for all statically allocated kernel contexts.
+ */
+ if (ctxt >= dd->first_dyn_alloc_ctxt) {
+ dd->rcd[ctxt]->do_interrupt =
+ &handle_receive_interrupt_nodma_rtail;
+ return;
+ }
+
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++)
dd->rcd[i]->do_interrupt =
&handle_receive_interrupt_nodma_rtail;
}
-static inline void set_all_dma_rtail(struct hfi1_devdata *dd)
+static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt)
{
int i;
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
+ /*
+ * For dynamically allocated kernel contexts (like vnic) switch
+ * interrupt handler only for that context. Otherwise, switch
+ * interrupt handler for all statically allocated kernel contexts.
+ */
+ if (ctxt >= dd->first_dyn_alloc_ctxt) {
+ dd->rcd[ctxt]->do_interrupt =
+ &handle_receive_interrupt_dma_rtail;
+ return;
+ }
+
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++)
dd->rcd[i]->do_interrupt =
&handle_receive_interrupt_dma_rtail;
}
@@ -883,8 +882,13 @@ void set_all_slowpath(struct hfi1_devdata *dd)
int i;
/* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
- dd->rcd[i]->do_interrupt = &handle_receive_interrupt;
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
+ struct hfi1_ctxtdata *rcd = dd->rcd[i];
+
+ if ((i < dd->first_dyn_alloc_ctxt) ||
+ (rcd && rcd->sc && (rcd->sc->type == SC_KERNEL)))
+ rcd->do_interrupt = &handle_receive_interrupt;
+ }
}
static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
@@ -896,7 +900,8 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
packet->rhf_addr);
u8 etype = rhf_rcv_type(packet->rhf);
- if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) {
+ if (etype == RHF_RCV_TYPE_IB &&
+ hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) {
int hwstate = read_logical_state(dd);
if (hwstate != LSTATE_ACTIVE) {
@@ -994,7 +999,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
last = RCV_PKT_DONE;
if (needset) {
dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
- set_all_nodma_rtail(dd);
+ set_nodma_rtail(dd, rcd->ctxt);
needset = 0;
}
} else {
@@ -1016,7 +1021,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
if (needset) {
dd_dev_info(dd,
"Switching to DMA_RTAIL\n");
- set_all_dma_rtail(dd);
+ set_dma_rtail(dd, rcd->ctxt);
needset = 0;
}
}
@@ -1024,7 +1029,8 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
process_rcv_update(last, &packet);
}
- process_rcv_qp_work(&packet);
+ process_rcv_qp_work(rcd);
+ rcd->head = packet.rhqoff;
bail:
/*
@@ -1064,10 +1070,10 @@ void receive_interrupt_work(struct work_struct *work)
set_link_state(ppd, HLS_UP_ACTIVE);
/*
- * Interrupt all kernel contexts that could have had an
- * interrupt during auto activation.
+ * Interrupt all statically allocated kernel contexts that could
+ * have had an interrupt during auto activation.
*/
- for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++)
+ for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++)
force_recv_intr(dd->rcd[i]);
}
@@ -1281,8 +1287,9 @@ int hfi1_reset_device(int unit)
spin_lock_irqsave(&dd->uctxt_lock, flags);
if (dd->rcd)
- for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) {
- if (!dd->rcd[i] || !dd->rcd[i]->cnt)
+ for (i = dd->first_dyn_alloc_ctxt;
+ i < dd->num_rcv_contexts; i++) {
+ if (!dd->rcd[i])
continue;
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
ret = -EBUSY;
@@ -1341,6 +1348,9 @@ void handle_eflags(struct hfi1_packet *packet)
*/
int process_receive_ib(struct hfi1_packet *packet)
{
+ if (unlikely(hfi1_dbg_fault_packet(packet)))
+ return RHF_RCV_CONTINUE;
+
trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
packet->rcd->ctxt,
rhf_err_flags(packet->rhf),
@@ -1350,6 +1360,11 @@ int process_receive_ib(struct hfi1_packet *packet)
packet->updegr,
rhf_egr_index(packet->rhf));
+ if (unlikely(
+ (hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+ (packet->rhf & RHF_DC_ERR))))
+ return RHF_RCV_CONTINUE;
+
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
return RHF_RCV_CONTINUE;
@@ -1359,15 +1374,31 @@ int process_receive_ib(struct hfi1_packet *packet)
return RHF_RCV_CONTINUE;
}
+static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
+{
+ /* Packet received in VNIC context via RSM */
+ if (packet->rcd->is_vnic)
+ return true;
+
+ if ((HFI1_GET_L2_TYPE(packet->ebuf) == OPA_VNIC_L2_TYPE) &&
+ (HFI1_GET_L4_TYPE(packet->ebuf) == OPA_VNIC_L4_ETHR))
+ return true;
+
+ return false;
+}
+
int process_receive_bypass(struct hfi1_packet *packet)
{
struct hfi1_devdata *dd = packet->rcd->dd;
- if (unlikely(rhf_err_flags(packet->rhf)))
+ if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
+ } else if (hfi1_is_vnic_packet(packet)) {
+ hfi1_vnic_bypass_rcv(packet);
+ return RHF_RCV_CONTINUE;
+ }
- dd_dev_err(dd,
- "Bypass packets are not supported in normal operation. Dropping\n");
+ dd_dev_err(dd, "Unsupported bypass packet. Dropping\n");
incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
u64 *flits = packet->ebuf;
@@ -1385,6 +1416,12 @@ int process_receive_bypass(struct hfi1_packet *packet)
int process_receive_error(struct hfi1_packet *packet)
{
+ /* KHdrHCRCErr -- KDETH packet with a bad HCRC */
+ if (unlikely(
+ hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+ rhf_rcv_type_err(packet->rhf) == 3))
+ return RHF_RCV_CONTINUE;
+
handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
@@ -1396,6 +1433,8 @@ int process_receive_error(struct hfi1_packet *packet)
int kdeth_process_expected(struct hfi1_packet *packet)
{
+ if (unlikely(hfi1_dbg_fault_packet(packet)))
+ return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
@@ -1408,6 +1447,8 @@ int kdeth_process_eager(struct hfi1_packet *packet)
{
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
+ if (unlikely(hfi1_dbg_fault_packet(packet)))
+ return RHF_RCV_CONTINUE;
dd_dev_err(packet->rcd->dd,
"Unhandled eager packet received. Dropping.\n");
diff --git a/drivers/infiniband/hw/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c
index 106349fc1fb9..d106d23016ba 100644
--- a/drivers/infiniband/hw/hfi1/efivar.c
+++ b/drivers/infiniband/hw/hfi1/efivar.c
@@ -45,6 +45,7 @@
*
*/
+#include <linux/ctype.h>
#include "efivar.h"
/* GUID for HFI1 variables in EFI */
@@ -150,15 +151,32 @@ fail:
int read_hfi1_efi_var(struct hfi1_devdata *dd, const char *kind,
unsigned long *size, void **return_data)
{
+ char prefix_name[64];
char name[64];
+ int result;
+ int i;
/* create a common prefix */
- snprintf(name, sizeof(name), "%04x:%02x:%02x.%x-%s",
+ snprintf(prefix_name, sizeof(prefix_name), "%04x:%02x:%02x.%x",
pci_domain_nr(dd->pcidev->bus),
dd->pcidev->bus->number,
PCI_SLOT(dd->pcidev->devfn),
- PCI_FUNC(dd->pcidev->devfn),
- kind);
+ PCI_FUNC(dd->pcidev->devfn));
+ snprintf(name, sizeof(name), "%s-%s", prefix_name, kind);
+ result = read_efi_var(name, size, return_data);
+
+ /*
+ * If reading the lowercase EFI variable fail, read the uppercase
+ * variable.
+ */
+ if (result) {
+ /* Converting to uppercase */
+ for (i = 0; prefix_name[i]; i++)
+ if (isalpha(prefix_name[i]))
+ prefix_name[i] = toupper(prefix_name[i]);
+ snprintf(name, sizeof(name), "%s-%s", prefix_name, kind);
+ result = read_efi_var(name, size, return_data);
+ }
- return read_efi_var(name, size, return_data);
+ return result;
}
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index bd786b7bd30b..3158128d57e8 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -48,6 +48,8 @@
#include <linux/cdev.h>
#include <linux/vmalloc.h>
#include <linux/io.h>
+#include <linux/sched/mm.h>
+#include <linux/bitmap.h>
#include <rdma/ib.h>
@@ -69,30 +71,37 @@
/*
* File operation functions
*/
-static int hfi1_file_open(struct inode *, struct file *);
-static int hfi1_file_close(struct inode *, struct file *);
-static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *);
-static unsigned int hfi1_poll(struct file *, struct poll_table_struct *);
-static int hfi1_file_mmap(struct file *, struct vm_area_struct *);
-
-static u64 kvirt_to_phys(void *);
-static int assign_ctxt(struct file *, struct hfi1_user_info *);
-static int init_subctxts(struct hfi1_ctxtdata *, const struct hfi1_user_info *);
-static int user_init(struct file *);
-static int get_ctxt_info(struct file *, void __user *, __u32);
-static int get_base_info(struct file *, void __user *, __u32);
-static int setup_ctxt(struct file *);
-static int setup_subctxt(struct hfi1_ctxtdata *);
-static int get_user_context(struct file *, struct hfi1_user_info *, int);
-static int find_shared_ctxt(struct file *, const struct hfi1_user_info *);
-static int allocate_ctxt(struct file *, struct hfi1_devdata *,
- struct hfi1_user_info *);
-static unsigned int poll_urgent(struct file *, struct poll_table_struct *);
-static unsigned int poll_next(struct file *, struct poll_table_struct *);
-static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
-static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
-static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
-static int vma_fault(struct vm_area_struct *, struct vm_fault *);
+static int hfi1_file_open(struct inode *inode, struct file *fp);
+static int hfi1_file_close(struct inode *inode, struct file *fp);
+static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from);
+static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt);
+static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma);
+
+static u64 kvirt_to_phys(void *addr);
+static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo);
+static int init_subctxts(struct hfi1_ctxtdata *uctxt,
+ const struct hfi1_user_info *uinfo);
+static int init_user_ctxt(struct hfi1_filedata *fd);
+static void user_init(struct hfi1_ctxtdata *uctxt);
+static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
+ __u32 len);
+static int get_base_info(struct hfi1_filedata *fd, void __user *ubase,
+ __u32 len);
+static int setup_base_ctxt(struct hfi1_filedata *fd);
+static int setup_subctxt(struct hfi1_ctxtdata *uctxt);
+
+static int find_sub_ctxt(struct hfi1_filedata *fd,
+ const struct hfi1_user_info *uinfo);
+static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
+ struct hfi1_user_info *uinfo);
+static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt);
+static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt);
+static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
+ unsigned long events);
+static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey);
+static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
+ int start_stop);
+static int vma_fault(struct vm_fault *vmf);
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
unsigned long arg);
@@ -172,6 +181,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
struct hfi1_devdata,
user_cdev);
+ if (!((dd->flags & HFI1_PRESENT) && dd->kregbase))
+ return -EINVAL;
+
if (!atomic_inc_not_zero(&dd->user_refcount))
return -ENXIO;
@@ -185,7 +197,8 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
if (fd) {
fd->rec_cpu_num = -1; /* no cpu affinity by default */
fd->mm = current->mm;
- atomic_inc(&fd->mm->mm_count);
+ mmgrab(fd->mm);
+ fd->dd = dd;
fp->private_data = fd;
} else {
fp->private_data = NULL;
@@ -228,20 +241,14 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sizeof(uinfo)))
return -EFAULT;
- ret = assign_ctxt(fp, &uinfo);
- if (ret < 0)
- return ret;
- ret = setup_ctxt(fp);
- if (ret)
- return ret;
- ret = user_init(fp);
+ ret = assign_ctxt(fd, &uinfo);
break;
case HFI1_IOCTL_CTXT_INFO:
- ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg,
+ ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg,
sizeof(struct hfi1_ctxt_info));
break;
case HFI1_IOCTL_USER_INFO:
- ret = get_base_info(fp, (void __user *)(unsigned long)arg,
+ ret = get_base_info(fd, (void __user *)(unsigned long)arg,
sizeof(struct hfi1_base_info));
break;
case HFI1_IOCTL_CREDIT_UPD:
@@ -255,7 +262,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sizeof(tinfo)))
return -EFAULT;
- ret = hfi1_user_exp_rcv_setup(fp, &tinfo);
+ ret = hfi1_user_exp_rcv_setup(fd, &tinfo);
if (!ret) {
/*
* Copy the number of tidlist entries we used
@@ -277,7 +284,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sizeof(tinfo)))
return -EFAULT;
- ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
+ ret = hfi1_user_exp_rcv_clear(fd, &tinfo);
if (ret)
break;
addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
@@ -292,7 +299,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sizeof(tinfo)))
return -EFAULT;
- ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
+ ret = hfi1_user_exp_rcv_invalid(fd, &tinfo);
if (ret)
break;
addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
@@ -429,7 +436,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
unsigned long count = 0;
ret = hfi1_user_sdma_process_request(
- kiocb->ki_filp, (struct iovec *)(from->iov + done),
+ fd, (struct iovec *)(from->iov + done),
dim, &count);
if (ret) {
reqs = ret;
@@ -585,8 +592,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* knows where it's own bitmap is within the page.
*/
memaddr = (unsigned long)(dd->events +
- ((uctxt->ctxt - dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK;
+ ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
+ HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK;
memlen = PAGE_SIZE;
/*
* v3.7 removes VM_RESERVED but the effect is kept by
@@ -596,6 +603,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
vmf = 1;
break;
case STATUS:
+ if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) {
+ ret = -EPERM;
+ goto done;
+ }
memaddr = kvirt_to_phys((void *)dd->status);
memlen = PAGE_SIZE;
flags |= VM_IO | VM_DONTEXPAND;
@@ -695,7 +706,7 @@ done:
* Local (non-chip) user memory is not mapped right away but as it is
* accessed by the user-level code.
*/
-static int vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int vma_fault(struct vm_fault *vmf)
{
struct page *page;
@@ -751,16 +762,19 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
/* release the cpu */
hfi1_put_proc_affinity(fdata->rec_cpu_num);
+ /* clean up rcv side */
+ hfi1_user_exp_rcv_free(fdata);
+
/*
* Clear any left over, unhandled events so the next process that
* gets this context doesn't get confused.
*/
- ev = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) *
+ ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fdata->subctxt;
*ev = 0;
- if (--uctxt->cnt) {
- uctxt->active_slaves &= ~(1 << fdata->subctxt);
+ __clear_bit(fdata->subctxt, uctxt->in_use_ctxts);
+ if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) {
mutex_unlock(&hfi1_mutex);
goto done;
}
@@ -790,8 +804,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
dd->rcd[uctxt->ctxt] = NULL;
- hfi1_user_exp_rcv_free(fdata);
- hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
+ hfi1_user_exp_rcv_grp_free(uctxt);
+ hfi1_clear_ctxt_pkey(dd, uctxt);
uctxt->rcvwait_to = 0;
uctxt->piowait_to = 0;
@@ -831,121 +845,135 @@ static u64 kvirt_to_phys(void *addr)
return paddr;
}
-static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
+static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
{
- int i_minor, ret = 0;
+ int ret;
unsigned int swmajor, swminor;
swmajor = uinfo->userversion >> 16;
- if (swmajor != HFI1_USER_SWMAJOR) {
- ret = -ENODEV;
- goto done;
- }
+ if (swmajor != HFI1_USER_SWMAJOR)
+ return -ENODEV;
swminor = uinfo->userversion & 0xffff;
mutex_lock(&hfi1_mutex);
- /* First, lets check if we need to setup a shared context? */
+ /*
+ * Get a sub context if necessary.
+ * ret < 0 error, 0 no context, 1 sub-context found
+ */
+ ret = 0;
if (uinfo->subctxt_cnt) {
- struct hfi1_filedata *fd = fp->private_data;
-
- ret = find_shared_ctxt(fp, uinfo);
- if (ret < 0)
- goto done_unlock;
- if (ret) {
+ ret = find_sub_ctxt(fd, uinfo);
+ if (ret > 0)
fd->rec_cpu_num =
hfi1_get_proc_affinity(fd->uctxt->numa_id);
- }
}
/*
- * We execute the following block if we couldn't find a
- * shared context or if context sharing is not required.
+ * Allocate a base context if context sharing is not required or we
+ * couldn't find a sub context.
*/
- if (!ret) {
- i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
- ret = get_user_context(fp, uinfo, i_minor);
- }
-done_unlock:
+ if (!ret)
+ ret = allocate_ctxt(fd, fd->dd, uinfo);
+
mutex_unlock(&hfi1_mutex);
-done:
+
+ /* Depending on the context type, do the appropriate init */
+ if (ret > 0) {
+ /*
+ * sub-context info can only be set up after the base
+ * context has been completed.
+ */
+ ret = wait_event_interruptible(fd->uctxt->wait, !test_bit(
+ HFI1_CTXT_BASE_UNINIT,
+ &fd->uctxt->event_flags));
+ if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) {
+ clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
+ return -ENOMEM;
+ }
+ /* The only thing a sub context needs is the user_xxx stuff */
+ if (!ret)
+ ret = init_user_ctxt(fd);
+
+ if (ret)
+ clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
+ } else if (!ret) {
+ ret = setup_base_ctxt(fd);
+ if (fd->uctxt->subctxt_cnt) {
+ /* If there is an error, set the failed bit. */
+ if (ret)
+ set_bit(HFI1_CTXT_BASE_FAILED,
+ &fd->uctxt->event_flags);
+ /*
+ * Base context is done, notify anybody using a
+ * sub-context that is waiting for this completion
+ */
+ clear_bit(HFI1_CTXT_BASE_UNINIT,
+ &fd->uctxt->event_flags);
+ wake_up(&fd->uctxt->wait);
+ }
+ }
+
return ret;
}
-static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo,
- int devno)
+/*
+ * The hfi1_mutex must be held when this function is called. It is
+ * necessary to ensure serialized access to the bitmask in_use_ctxts.
+ */
+static int find_sub_ctxt(struct hfi1_filedata *fd,
+ const struct hfi1_user_info *uinfo)
{
- struct hfi1_devdata *dd = NULL;
- int devmax, npresent, nup;
+ int i;
+ struct hfi1_devdata *dd = fd->dd;
+ u16 subctxt;
- devmax = hfi1_count_units(&npresent, &nup);
- if (!npresent)
- return -ENXIO;
+ for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) {
+ struct hfi1_ctxtdata *uctxt = dd->rcd[i];
- if (!nup)
- return -ENETDOWN;
+ /* Skip ctxts which are not yet open */
+ if (!uctxt ||
+ bitmap_empty(uctxt->in_use_ctxts,
+ HFI1_MAX_SHARED_CTXTS))
+ continue;
- dd = hfi1_lookup(devno);
- if (!dd)
- return -ENODEV;
- else if (!dd->freectxts)
- return -EBUSY;
+ /* Skip dynamically allocted kernel contexts */
+ if (uctxt->sc && (uctxt->sc->type == SC_KERNEL))
+ continue;
- return allocate_ctxt(fp, dd, uinfo);
-}
+ /* Skip ctxt if it doesn't match the requested one */
+ if (memcmp(uctxt->uuid, uinfo->uuid,
+ sizeof(uctxt->uuid)) ||
+ uctxt->jkey != generate_jkey(current_uid()) ||
+ uctxt->subctxt_id != uinfo->subctxt_id ||
+ uctxt->subctxt_cnt != uinfo->subctxt_cnt)
+ continue;
-static int find_shared_ctxt(struct file *fp,
- const struct hfi1_user_info *uinfo)
-{
- int devmax, ndev, i;
- int ret = 0;
- struct hfi1_filedata *fd = fp->private_data;
+ /* Verify the sharing process matches the master */
+ if (uctxt->userversion != uinfo->userversion)
+ return -EINVAL;
- devmax = hfi1_count_units(NULL, NULL);
+ /* Find an unused context */
+ subctxt = find_first_zero_bit(uctxt->in_use_ctxts,
+ HFI1_MAX_SHARED_CTXTS);
+ if (subctxt >= uctxt->subctxt_cnt)
+ return -EBUSY;
- for (ndev = 0; ndev < devmax; ndev++) {
- struct hfi1_devdata *dd = hfi1_lookup(ndev);
+ fd->uctxt = uctxt;
+ fd->subctxt = subctxt;
+ __set_bit(fd->subctxt, uctxt->in_use_ctxts);
- if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase))
- continue;
- for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) {
- struct hfi1_ctxtdata *uctxt = dd->rcd[i];
-
- /* Skip ctxts which are not yet open */
- if (!uctxt || !uctxt->cnt)
- continue;
- /* Skip ctxt if it doesn't match the requested one */
- if (memcmp(uctxt->uuid, uinfo->uuid,
- sizeof(uctxt->uuid)) ||
- uctxt->jkey != generate_jkey(current_uid()) ||
- uctxt->subctxt_id != uinfo->subctxt_id ||
- uctxt->subctxt_cnt != uinfo->subctxt_cnt)
- continue;
-
- /* Verify the sharing process matches the master */
- if (uctxt->userversion != uinfo->userversion ||
- uctxt->cnt >= uctxt->subctxt_cnt) {
- ret = -EINVAL;
- goto done;
- }
- fd->uctxt = uctxt;
- fd->subctxt = uctxt->cnt++;
- uctxt->active_slaves |= 1 << fd->subctxt;
- ret = 1;
- goto done;
- }
+ return 1;
}
-done:
- return ret;
+ return 0;
}
-static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
+static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
struct hfi1_user_info *uinfo)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt;
- unsigned ctxt;
+ unsigned int ctxt;
int ret, numa;
if (dd->flags & HFI1_FROZEN) {
@@ -959,7 +987,16 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
return -EIO;
}
- for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; ctxt++)
+ /*
+ * This check is sort of redundant to the next EBUSY error. It would
+ * also indicate an inconsistancy in the driver if this value was
+ * zero, but there were still contexts available.
+ */
+ if (!dd->freectxts)
+ return -EBUSY;
+
+ for (ctxt = dd->first_dyn_alloc_ctxt;
+ ctxt < dd->num_rcv_contexts; ctxt++)
if (!dd->rcd[ctxt])
break;
@@ -1001,12 +1038,12 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
goto ctxdata_free;
/*
- * Setup shared context resources if the user-level has requested
- * shared contexts and this is the 'master' process.
+ * Setup sub context resources if the user-level has requested
+ * sub contexts.
* This has to be done here so the rest of the sub-contexts find the
* proper master.
*/
- if (uinfo->subctxt_cnt && !fd->subctxt) {
+ if (uinfo->subctxt_cnt) {
ret = init_subctxts(uctxt, uinfo);
/*
* On error, we don't need to disable and de-allocate the
@@ -1043,7 +1080,7 @@ ctxdata_free:
static int init_subctxts(struct hfi1_ctxtdata *uctxt,
const struct hfi1_user_info *uinfo)
{
- unsigned num_subctxts;
+ u16 num_subctxts;
num_subctxts = uinfo->subctxt_cnt;
if (num_subctxts > HFI1_MAX_SHARED_CTXTS)
@@ -1051,9 +1088,8 @@ static int init_subctxts(struct hfi1_ctxtdata *uctxt,
uctxt->subctxt_cnt = uinfo->subctxt_cnt;
uctxt->subctxt_id = uinfo->subctxt_id;
- uctxt->active_slaves = 1;
uctxt->redirect_seq_cnt = 1;
- set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
+ set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
return 0;
}
@@ -1061,13 +1097,12 @@ static int init_subctxts(struct hfi1_ctxtdata *uctxt,
static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
{
int ret = 0;
- unsigned num_subctxts = uctxt->subctxt_cnt;
+ u16 num_subctxts = uctxt->subctxt_cnt;
uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE);
- if (!uctxt->subctxt_uregbase) {
- ret = -ENOMEM;
- goto bail;
- }
+ if (!uctxt->subctxt_uregbase)
+ return -ENOMEM;
+
/* We can take the size of the RcvHdr Queue from the master */
uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size *
num_subctxts);
@@ -1082,25 +1117,22 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
ret = -ENOMEM;
goto bail_rhdr;
}
- goto bail;
+
+ return 0;
+
bail_rhdr:
vfree(uctxt->subctxt_rcvhdr_base);
+ uctxt->subctxt_rcvhdr_base = NULL;
bail_ureg:
vfree(uctxt->subctxt_uregbase);
uctxt->subctxt_uregbase = NULL;
-bail:
+
return ret;
}
-static int user_init(struct file *fp)
+static void user_init(struct hfi1_ctxtdata *uctxt)
{
unsigned int rcvctrl_ops = 0;
- struct hfi1_filedata *fd = fp->private_data;
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
-
- /* make sure that the context has already been setup */
- if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
- return -EFAULT;
/* initialize poll variables... */
uctxt->urgent = 0;
@@ -1148,20 +1180,12 @@ static int user_init(struct file *fp)
else
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
-
- /* Notify any waiting slaves */
- if (uctxt->subctxt_cnt) {
- clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
- wake_up(&uctxt->wait);
- }
-
- return 0;
}
-static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
+static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
+ __u32 len)
{
struct hfi1_ctxt_info cinfo;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
int ret = 0;
@@ -1199,75 +1223,71 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
return ret;
}
-static int setup_ctxt(struct file *fp)
+static int init_user_ctxt(struct hfi1_filedata *fd)
+{
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ int ret;
+
+ ret = hfi1_user_sdma_alloc_queues(uctxt, fd);
+ if (ret)
+ return ret;
+
+ ret = hfi1_user_exp_rcv_init(fd);
+
+ return ret;
+}
+
+static int setup_base_ctxt(struct hfi1_filedata *fd)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
int ret = 0;
- /*
- * Context should be set up only once, including allocation and
- * programming of eager buffers. This is done if context sharing
- * is not requested or by the master process.
- */
- if (!uctxt->subctxt_cnt || !fd->subctxt) {
- ret = hfi1_init_ctxt(uctxt->sc);
- if (ret)
- goto done;
+ hfi1_init_ctxt(uctxt->sc);
- /* Now allocate the RcvHdr queue and eager buffers. */
- ret = hfi1_create_rcvhdrq(dd, uctxt);
- if (ret)
- goto done;
- ret = hfi1_setup_eagerbufs(uctxt);
- if (ret)
- goto done;
- if (uctxt->subctxt_cnt && !fd->subctxt) {
- ret = setup_subctxt(uctxt);
- if (ret)
- goto done;
- }
- } else {
- ret = wait_event_interruptible(uctxt->wait, !test_bit(
- HFI1_CTXT_MASTER_UNINIT,
- &uctxt->event_flags));
- if (ret)
- goto done;
- }
+ /* Now allocate the RcvHdr queue and eager buffers. */
+ ret = hfi1_create_rcvhdrq(dd, uctxt);
+ if (ret)
+ return ret;
- ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
+ ret = hfi1_setup_eagerbufs(uctxt);
if (ret)
- goto done;
- /*
- * Expected receive has to be setup for all processes (including
- * shared contexts). However, it has to be done after the master
- * context has been fully configured as it depends on the
- * eager/expected split of the RcvArray entries.
- * Setting it up here ensures that the subcontexts will be waiting
- * (due to the above wait_event_interruptible() until the master
- * is setup.
- */
- ret = hfi1_user_exp_rcv_init(fp);
+ goto setup_failed;
+
+ /* If sub-contexts are enabled, do the appropriate setup */
+ if (uctxt->subctxt_cnt)
+ ret = setup_subctxt(uctxt);
if (ret)
- goto done;
+ goto setup_failed;
- set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags);
-done:
+ ret = hfi1_user_exp_rcv_grp_init(fd);
+ if (ret)
+ goto setup_failed;
+
+ ret = init_user_ctxt(fd);
+ if (ret)
+ goto setup_failed;
+
+ user_init(uctxt);
+
+ return 0;
+
+setup_failed:
+ hfi1_free_ctxtdata(dd, uctxt);
return ret;
}
-static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
+static int get_base_info(struct hfi1_filedata *fd, void __user *ubase,
+ __u32 len)
{
struct hfi1_base_info binfo;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
ssize_t sz;
unsigned offset;
int ret = 0;
- trace_hfi1_uctxtdata(uctxt->dd, uctxt);
+ trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt);
memset(&binfo, 0, sizeof(binfo));
binfo.hw_version = dd->revision;
@@ -1305,7 +1325,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
*/
binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt,
fd->subctxt, 0);
- offset = offset_in_page((((uctxt->ctxt - dd->first_user_ctxt) *
+ offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fd->subctxt) *
sizeof(*dd->events));
binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt,
@@ -1399,12 +1419,12 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit)
}
spin_lock_irqsave(&dd->uctxt_lock, flags);
- for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts;
+ for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts;
ctxt++) {
uctxt = dd->rcd[ctxt];
if (uctxt) {
unsigned long *evs = dd->events +
- (uctxt->ctxt - dd->first_user_ctxt) *
+ (uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS;
int i;
/*
@@ -1431,7 +1451,7 @@ done:
* overflow conditions. start_stop==1 re-enables, to be used to
* re-init the software copy of the head register
*/
-static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
+static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
int start_stop)
{
struct hfi1_devdata *dd = uctxt->dd;
@@ -1466,7 +1486,7 @@ bail:
* User process then performs actions appropriate to bit having been
* set, if desired, and checks again in future.
*/
-static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
+static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
unsigned long events)
{
int i;
@@ -1476,7 +1496,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
if (!dd->events)
return 0;
- evs = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) *
+ evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + subctxt;
for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) {
@@ -1487,8 +1507,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
return 0;
}
-static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
- u16 pkey)
+static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey)
{
int ret = -ENOENT, i, intable = 0;
struct hfi1_pportdata *ppd = uctxt->ppd;
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 0dd50cdb039a..4042c11b2742 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -1004,7 +1004,9 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
{
u64 reg;
int ret;
- u8 ver_a, ver_b;
+ u8 ver_major;
+ u8 ver_minor;
+ u8 ver_patch;
/*
* DC Reset sequence
@@ -1073,10 +1075,10 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
return -ETIMEDOUT;
}
- read_misc_status(dd, &ver_a, &ver_b);
- dd_dev_info(dd, "8051 firmware version %d.%d\n",
- (int)ver_b, (int)ver_a);
- dd->dc8051_ver = dc8051_ver(ver_b, ver_a);
+ read_misc_status(dd, &ver_major, &ver_minor, &ver_patch);
+ dd_dev_info(dd, "8051 firmware version %d.%d.%d\n",
+ (int)ver_major, (int)ver_minor, (int)ver_patch);
+ dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch);
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 751a0fb29fa5..414a04a481c2 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_KERNEL_H
#define _HFI1_KERNEL_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -54,6 +54,7 @@
#include <linux/list.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
+#include <linux/idr.h>
#include <linux/io.h>
#include <linux/fs.h>
#include <linux/completion.h>
@@ -66,6 +67,7 @@
#include <linux/i2c-algo-bit.h>
#include <rdma/ib_hdrs.h>
#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
#include <rdma/rdma_vt.h>
#include "chip_registers.h"
@@ -194,12 +196,6 @@ struct hfi1_ctxtdata {
void *rcvhdrq;
/* kernel virtual address where hdrqtail is updated */
volatile __le64 *rcvhdrtail_kvaddr;
- /*
- * Shared page for kernel to signal user processes that send buffers
- * need disarming. The process should call HFI1_CMD_DISARM_BUFS
- * or HFI1_CMD_ACK_EVENT with IPATH_EVENT_DISARM_BUFS set.
- */
- unsigned long *user_event_mask;
/* when waiting for rcv or pioavail */
wait_queue_head_t wait;
/* rcvhdrq size (for freeing) */
@@ -222,13 +218,12 @@ struct hfi1_ctxtdata {
* (ignoring forks, dup, etc. for now)
*/
int cnt;
+ /* Device context index */
+ unsigned ctxt;
/*
- * how much space to leave at start of eager TID entries for
- * protocol use, on each TID
+ * non-zero if ctxt can be shared, and defines the maximum number of
+ * sub-contexts for this device context.
*/
- /* instead of calculating it */
- unsigned ctxt;
- /* non-zero if ctxt is being shared. */
u16 subctxt_cnt;
/* non-zero if ctxt is being shared. */
u16 subctxt_id;
@@ -278,16 +273,18 @@ struct hfi1_ctxtdata {
struct hfi1_devdata *dd;
/* so functions that need physical port can get it easily */
struct hfi1_pportdata *ppd;
+ /* associated msix interrupt */
+ u32 msix_intr;
/* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
void *subctxt_uregbase;
/* An array of pages for the eager receive buffers * N */
void *subctxt_rcvegrbuf;
/* An array of pages for the eager header queue entries * N */
void *subctxt_rcvhdr_base;
+ /* Bitmask of in use context(s) */
+ DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS);
/* The version of the library which opened this ctxt */
u32 userversion;
- /* Bitmask of active slaves */
- u32 active_slaves;
/* Type of packets or conditions we want to poll for */
u16 poll_type;
/* receive packet sequence counter */
@@ -337,6 +334,12 @@ struct hfi1_ctxtdata {
* packets with the wrong interrupt handler.
*/
int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+
+ /* Indicates that this is vnic context */
+ bool is_vnic;
+
+ /* vnic queue index this context is mapped to */
+ u8 vnic_q_idx;
};
/*
@@ -356,12 +359,11 @@ struct hfi1_packet {
u64 rhf;
u32 maxcnt;
u32 rhqoff;
- u32 hdrqtail;
- int numpkt;
u16 tlen;
- u16 hlen;
s16 etail;
- u16 rsize;
+ u8 hlen;
+ u8 numpkt;
+ u8 rsize;
u8 updegr;
u8 rcv_flags;
u8 etype;
@@ -475,7 +477,7 @@ struct rvt_sge_state;
#define HFI1_PART_ENFORCE_OUT 0x2
/* how often we check for synthetic counter wrap around */
-#define SYNTH_CNT_TIME 2
+#define SYNTH_CNT_TIME 3
/* Counter flags */
#define CNTR_NORMAL 0x0 /* Normal counters, just read register */
@@ -809,6 +811,32 @@ struct hfi1_asic_data {
struct hfi1_i2c_bus *i2c_bus1;
};
+/* sizes for both the QP and RSM map tables */
+#define NUM_MAP_ENTRIES 256
+#define NUM_MAP_REGS 32
+
+/*
+ * Number of VNIC contexts used. Ensure it is less than or equal to
+ * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
+ */
+#define HFI1_NUM_VNIC_CTXT 8
+
+/* Number of VNIC RSM entries */
+#define NUM_VNIC_MAP_ENTRIES 8
+
+/* Virtual NIC information */
+struct hfi1_vnic_data {
+ struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
+ struct kmem_cache *txreq_cache;
+ u8 num_vports;
+ struct idr vesw_idr;
+ u8 rmt_start;
+ u8 num_ctxt;
+ u32 msix_idx;
+};
+
+struct hfi1_vnic_vport_info;
+
/* device data struct now contains only "general per-device" info.
* fields related to a physical IB port are in a hfi1_pportdata struct.
*/
@@ -927,8 +955,9 @@ struct hfi1_devdata {
spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
/* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */
spinlock_t uctxt_lock; /* rcd and user context changes */
- /* exclusive access to 8051 */
- spinlock_t dc8051_lock;
+ struct mutex dc8051_lock; /* exclusive access to 8051 */
+ struct workqueue_struct *update_cntr_wq;
+ struct work_struct update_cntr_work;
/* exclusive access to 8051 memory */
spinlock_t dc8051_memlock;
int dc8051_timed_out; /* remember if the 8051 timed out */
@@ -1016,12 +1045,20 @@ struct hfi1_devdata {
/* initial vl15 credits to use */
u16 vl15_init;
+ /*
+ * Cached value for vl15buf, read during verify cap interrupt. VL15
+ * credits are to be kept at 0 and set when handling the link-up
+ * interrupt. This removes the possibility of receiving VL15 MAD
+ * packets before this HFI is ready.
+ */
+ u16 vl15buf_cached;
+
/* Misc small ints */
u8 n_krcv_queues;
u8 qos_shift;
u16 irev; /* implementation revision */
- u16 dc8051_ver; /* 8051 firmware version */
+ u32 dc8051_ver; /* 8051 firmware version */
spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
struct platform_config platform_config;
@@ -1032,6 +1069,7 @@ struct hfi1_devdata {
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
u32 num_msix_entries;
+ u32 first_dyn_msix_idx;
/* INTx information */
u32 requested_intx_irq; /* did we request one? */
@@ -1116,6 +1154,9 @@ struct hfi1_devdata {
send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
+ int (*process_vnic_dma_send)(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen);
/* hfi1_pportdata, points to array of (physical) port-specific
* data structs, indexed by pidx (0..n-1)
*/
@@ -1127,8 +1168,8 @@ struct hfi1_devdata {
u16 flags;
/* Number of physical ports available */
u8 num_pports;
- /* Lowest context number which can be used by user processes */
- u8 first_user_ctxt;
+ /* Lowest context number which can be used by user processes or VNIC */
+ u8 first_dyn_alloc_ctxt;
/* adding a new field here would make it part of this cacheline */
/* seqlock for sc2vl */
@@ -1168,15 +1209,24 @@ struct hfi1_devdata {
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
bool aspm_enabled; /* ASPM state: enabled/disabled */
- struct rhashtable sdma_rht;
+ struct rhashtable *sdma_rht;
struct kobject kobj;
+
+ /* vnic data */
+ struct hfi1_vnic_data vnic;
};
+static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
+{
+ return (dd->vnic.rmt_start + spare) > NUM_MAP_ENTRIES;
+}
+
/* 8051 firmware version helper */
-#define dc8051_ver(a, b) ((a) << 8 | (b))
-#define dc8051_ver_maj(a) ((a & 0xff00) >> 8)
-#define dc8051_ver_min(a) (a & 0x00ff)
+#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c))
+#define dc8051_ver_maj(a) (((a) & 0xff0000) >> 16)
+#define dc8051_ver_min(a) (((a) & 0x00ff00) >> 8)
+#define dc8051_ver_patch(a) ((a) & 0x0000ff)
/* f_put_tid types */
#define PT_EXPECTED 0
@@ -1189,10 +1239,11 @@ struct mmu_rb_handler;
/* Private data for file operations */
struct hfi1_filedata {
+ struct hfi1_devdata *dd;
struct hfi1_ctxtdata *uctxt;
- unsigned subctxt;
struct hfi1_user_sdma_comp_q *cq;
struct hfi1_user_sdma_pkt_q *pq;
+ u16 subctxt;
/* for cpu affinity; -1 if none */
int rec_cpu_num;
u32 tid_n_pinned;
@@ -1214,28 +1265,31 @@ struct hfi1_devdata *hfi1_lookup(int unit);
extern u32 hfi1_cpulist_count;
extern unsigned long *hfi1_cpulist;
-int hfi1_init(struct hfi1_devdata *, int);
-int hfi1_count_units(int *npresentp, int *nupp);
+int hfi1_init(struct hfi1_devdata *dd, int reinit);
int hfi1_count_active_units(void);
-int hfi1_diag_add(struct hfi1_devdata *);
-void hfi1_diag_remove(struct hfi1_devdata *);
+int hfi1_diag_add(struct hfi1_devdata *dd);
+void hfi1_diag_remove(struct hfi1_devdata *dd);
void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup);
void handle_user_interrupt(struct hfi1_ctxtdata *rcd);
-int hfi1_create_rcvhdrq(struct hfi1_devdata *, struct hfi1_ctxtdata *);
-int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *);
+int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
+int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd);
int hfi1_create_ctxts(struct hfi1_devdata *dd);
-struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32, int);
-void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *,
- struct hfi1_devdata *, u8, u8);
-void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *);
-
-int handle_receive_interrupt(struct hfi1_ctxtdata *, int);
-int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int);
-int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
+struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
+ int numa);
+void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
+ struct hfi1_devdata *dd, u8 hw_pidx, u8 port);
+void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
+
+int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
+int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
+int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
void set_all_slowpath(struct hfi1_devdata *dd);
+void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd);
+void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd);
+void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd);
extern const struct pci_device_id hfi1_pci_tbl[];
@@ -1255,16 +1309,24 @@ int hfi1_reset_device(int);
/* return the driver's idea of the logical OPA port state */
static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
{
- return ppd->lstate; /* use the cached value */
+ /*
+ * The driver does some processing from the time the logical
+ * link state is at INIT to the time the SM can be notified
+ * as such. Return IB_PORT_DOWN until the software state
+ * is ready.
+ */
+ if (ppd->lstate == IB_PORT_INIT && !(ppd->host_link_state & HLS_UP))
+ return IB_PORT_DOWN;
+ else
+ return ppd->lstate;
}
void receive_interrupt_work(struct work_struct *work);
/* extract service channel from header and rhf */
-static inline int hdr2sc(struct ib_header *hdr, u64 rhf)
+static inline int hfi1_9B_get_sc5(struct ib_header *hdr, u64 rhf)
{
- return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
- ((!!(rhf_dc_info(rhf))) << 4);
+ return ib_get_sc(hdr) | ((!!(rhf_dc_info(rhf))) << 4);
}
#define HFI1_JKEY_WIDTH 16
@@ -1520,7 +1582,7 @@ bad:
u32 lrh_max_header_bytes(struct hfi1_devdata *dd);
int mtu_to_enum(u32 mtu, int default_if_bad);
-u16 enum_to_mtu(int);
+u16 enum_to_mtu(int mtu);
static inline int valid_ib_mtu(unsigned int mtu)
{
return mtu == 256 || mtu == 512 ||
@@ -1534,17 +1596,18 @@ static inline int valid_opa_max_mtu(unsigned int mtu)
(valid_ib_mtu(mtu) || mtu == 8192 || mtu == 10240);
}
-int set_mtu(struct hfi1_pportdata *);
+int set_mtu(struct hfi1_pportdata *ppd);
-int hfi1_set_lid(struct hfi1_pportdata *, u32, u8);
-void hfi1_disable_after_error(struct hfi1_devdata *);
-int hfi1_set_uevent_bits(struct hfi1_pportdata *, const int);
-int hfi1_rcvbuf_validate(u32, u8, u16 *);
+int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc);
+void hfi1_disable_after_error(struct hfi1_devdata *dd);
+int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit);
+int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encode);
-int fm_get_table(struct hfi1_pportdata *, int, void *);
-int fm_set_table(struct hfi1_pportdata *, int, void *);
+int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t);
+int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t);
-void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf);
+void set_up_vau(struct hfi1_devdata *dd, u8 vau);
+void set_up_vl15(struct hfi1_devdata *dd, u16 vl15buf);
void reset_link_credits(struct hfi1_devdata *dd);
void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu);
@@ -1584,6 +1647,11 @@ static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u8 port)
return &dd->pport[pidx].ibport_data;
}
+static inline struct hfi1_ibport *rcd_to_iport(struct hfi1_ctxtdata *rcd)
+{
+ return &rcd->ppd->ibport_data;
+}
+
void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
bool do_cnp);
static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
@@ -1593,9 +1661,9 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
u32 bth1;
bth1 = be32_to_cpu(ohdr->bth[1]);
- if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) {
+ if (unlikely(bth1 & (IB_BECN_SMASK | IB_FECN_SMASK))) {
hfi1_process_ecn_slowpath(qp, pkt, do_cnp);
- return bth1 & HFI1_FECN_SMASK;
+ return !!(bth1 & IB_FECN_SMASK);
}
return false;
}
@@ -1659,19 +1727,19 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
#define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1)
/* ctxt_flag bit offsets */
- /* context has been setup */
-#define HFI1_CTXT_SETUP_DONE 1
+ /* base context has not finished initializing */
+#define HFI1_CTXT_BASE_UNINIT 1
+ /* base context initaliation failed */
+#define HFI1_CTXT_BASE_FAILED 2
/* waiting for a packet to arrive */
-#define HFI1_CTXT_WAITING_RCV 2
- /* master has not finished initializing */
-#define HFI1_CTXT_MASTER_UNINIT 4
+#define HFI1_CTXT_WAITING_RCV 3
/* waiting for an urgent packet to arrive */
-#define HFI1_CTXT_WAITING_URG 5
+#define HFI1_CTXT_WAITING_URG 4
/* free up any allocated data at closes */
-struct hfi1_devdata *hfi1_init_dd(struct pci_dev *,
- const struct pci_device_id *);
-void hfi1_free_devdata(struct hfi1_devdata *);
+struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
+ const struct pci_device_id *ent);
+void hfi1_free_devdata(struct hfi1_devdata *dd);
struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
/* LED beaconing functions */
@@ -1746,24 +1814,24 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
extern const char ib_hfi1_version[];
-int hfi1_device_create(struct hfi1_devdata *);
-void hfi1_device_remove(struct hfi1_devdata *);
+int hfi1_device_create(struct hfi1_devdata *dd);
+void hfi1_device_remove(struct hfi1_devdata *dd);
int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
struct kobject *kobj);
-int hfi1_verbs_register_sysfs(struct hfi1_devdata *);
-void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *);
+int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd);
+void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
/* Hook for sysfs read of QSFP */
int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
-int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *);
-void hfi1_pcie_cleanup(struct pci_dev *);
-int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *);
+int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent);
+void hfi1_pcie_cleanup(struct pci_dev *pdev);
+int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
-void hfi1_pcie_flr(struct hfi1_devdata *);
-int pcie_speeds(struct hfi1_devdata *);
-void request_msix(struct hfi1_devdata *, u32 *, struct hfi1_msix_entry *);
-void hfi1_enable_intx(struct pci_dev *);
+int pcie_speeds(struct hfi1_devdata *dd);
+void request_msix(struct hfi1_devdata *dd, u32 *nent,
+ struct hfi1_msix_entry *entry);
+void hfi1_enable_intx(struct pci_dev *pdev);
void restore_pci_variables(struct hfi1_devdata *dd);
int do_pcie_gen3_transition(struct hfi1_devdata *dd);
int parse_platform_config(struct hfi1_devdata *dd);
@@ -1793,8 +1861,6 @@ int kdeth_process_expected(struct hfi1_packet *packet);
int kdeth_process_eager(struct hfi1_packet *packet);
int process_receive_invalid(struct hfi1_packet *packet);
-void update_sge(struct rvt_sge_state *ss, u32 length);
-
/* global module parameter variables */
extern unsigned int hfi1_max_mtu;
extern unsigned int hfi1_cu;
@@ -1940,6 +2006,10 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
dev_info(&(dd)->pcidev->dev, "%s: " fmt, \
get_unit_name((dd)->unit), ##__VA_ARGS__)
+#define dd_dev_info_ratelimited(dd, fmt, ...) \
+ dev_info_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
+ get_unit_name((dd)->unit), ##__VA_ARGS__)
+
#define dd_dev_dbg(dd, fmt, ...) \
dev_dbg(&(dd)->pcidev->dev, "%s: " fmt, \
get_unit_name((dd)->unit), ##__VA_ARGS__)
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index e3b5bc93bc70..4a11d4da4c92 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -53,6 +53,7 @@
#include <linux/module.h>
#include <linux/printk.h>
#include <linux/hrtimer.h>
+#include <linux/bitmap.h>
#include <rdma/rdma_vt.h>
#include "hfi.h"
@@ -65,10 +66,12 @@
#include "verbs.h"
#include "aspm.h"
#include "affinity.h"
+#include "vnic.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
+#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
/*
* min buffers we want to have per context, after driver
*/
@@ -100,9 +103,9 @@ static unsigned hfi1_rcvarr_split = 25;
module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO);
MODULE_PARM_DESC(rcvarr_split, "Percent of context's RcvArray entries used for Eager buffers");
-static uint eager_buffer_size = (2 << 20); /* 2MB */
+static uint eager_buffer_size = (8 << 20); /* 8MB */
module_param(eager_buffer_size, uint, S_IRUGO);
-MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 2MB");
+MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 8MB");
static uint rcvhdrcnt = 2048; /* 2x the max eager buffer count */
module_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO);
@@ -116,7 +119,7 @@ unsigned int user_credit_return_threshold = 33; /* default is 33% */
module_param(user_credit_return_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)");
-static inline u64 encode_rcv_header_entry_size(u16);
+static inline u64 encode_rcv_header_entry_size(u16 size);
static struct idr hfi1_unit_table;
u32 hfi1_cpulist_count;
@@ -139,7 +142,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
goto nomem;
/* create one or more kernel contexts */
- for (i = 0; i < dd->first_user_ctxt; ++i) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
struct hfi1_pportdata *ppd;
struct hfi1_ctxtdata *rcd;
@@ -174,13 +177,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
goto nomem;
}
- ret = hfi1_init_ctxt(rcd->sc);
- if (ret < 0) {
- dd_dev_err(dd,
- "Failed to setup kernel receive context, failing\n");
- ret = -EFAULT;
- goto bail;
- }
+ hfi1_init_ctxt(rcd->sc);
}
/*
@@ -192,7 +189,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
return 0;
nomem:
ret = -ENOMEM;
-bail:
+
if (dd->rcd) {
for (i = 0; i < dd->num_rcv_contexts; ++i)
hfi1_free_ctxtdata(dd, dd->rcd[i]);
@@ -214,9 +211,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
u32 base;
if (dd->rcv_entries.nctxt_extra >
- dd->num_rcv_contexts - dd->first_user_ctxt)
+ dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt)
kctxt_ngroups = (dd->rcv_entries.nctxt_extra -
- (dd->num_rcv_contexts - dd->first_user_ctxt));
+ (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt));
rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa);
if (rcd) {
u32 rcvtids, max_entries;
@@ -226,7 +223,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
INIT_LIST_HEAD(&rcd->qp_wait_list);
rcd->ppd = ppd;
rcd->dd = dd;
- rcd->cnt = 1;
+ __set_bit(0, rcd->in_use_ctxts);
rcd->ctxt = ctxt;
dd->rcd[ctxt] = rcd;
rcd->numa_id = numa;
@@ -238,27 +235,29 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
* Calculate the context's RcvArray entry starting point.
* We do this here because we have to take into account all
* the RcvArray entries that previous context would have
- * taken and we have to account for any extra groups
- * assigned to the kernel or user contexts.
+ * taken and we have to account for any extra groups assigned
+ * to the static (kernel) or dynamic (vnic/user) contexts.
*/
- if (ctxt < dd->first_user_ctxt) {
+ if (ctxt < dd->first_dyn_alloc_ctxt) {
if (ctxt < kctxt_ngroups) {
base = ctxt * (dd->rcv_entries.ngroups + 1);
rcd->rcv_array_groups++;
- } else
+ } else {
base = kctxt_ngroups +
(ctxt * dd->rcv_entries.ngroups);
+ }
} else {
- u16 ct = ctxt - dd->first_user_ctxt;
+ u16 ct = ctxt - dd->first_dyn_alloc_ctxt;
base = ((dd->n_krcv_queues * dd->rcv_entries.ngroups) +
kctxt_ngroups);
if (ct < dd->rcv_entries.nctxt_extra) {
base += ct * (dd->rcv_entries.ngroups + 1);
rcd->rcv_array_groups++;
- } else
+ } else {
base += dd->rcv_entries.nctxt_extra +
(ct * dd->rcv_entries.ngroups);
+ }
}
rcd->eager_base = base * dd->rcv_entries.group_size;
@@ -297,14 +296,15 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
* The resulting value will be rounded down to the closest
* multiple of dd->rcv_entries.group_size.
*/
- rcd->egrbufs.buffers = kcalloc(rcd->egrbufs.count,
- sizeof(*rcd->egrbufs.buffers),
- GFP_KERNEL);
+ rcd->egrbufs.buffers = kzalloc_node(
+ rcd->egrbufs.count * sizeof(*rcd->egrbufs.buffers),
+ GFP_KERNEL, numa);
if (!rcd->egrbufs.buffers)
goto bail;
- rcd->egrbufs.rcvtids = kcalloc(rcd->egrbufs.count,
- sizeof(*rcd->egrbufs.rcvtids),
- GFP_KERNEL);
+ rcd->egrbufs.rcvtids = kzalloc_node(
+ rcd->egrbufs.count *
+ sizeof(*rcd->egrbufs.rcvtids),
+ GFP_KERNEL, numa);
if (!rcd->egrbufs.rcvtids)
goto bail;
rcd->egrbufs.size = eager_buffer_size;
@@ -321,9 +321,10 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
}
rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE;
- if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */
- rcd->opstats = kzalloc(sizeof(*rcd->opstats),
- GFP_KERNEL);
+ /* Applicable only for statically created kernel contexts */
+ if (ctxt < dd->first_dyn_alloc_ctxt) {
+ rcd->opstats = kzalloc_node(sizeof(*rcd->opstats),
+ GFP_KERNEL, numa);
if (!rcd->opstats)
goto bail;
}
@@ -481,6 +482,9 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
default_pkey_idx = 1;
ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY;
+ ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
+ ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
+
if (loopback) {
hfi1_early_err(&pdev->dev,
"Faking data partition 0x8001 in idx %u\n",
@@ -584,7 +588,7 @@ static void enable_chip(struct hfi1_devdata *dd)
* Enable kernel ctxts' receive and receive interrupt.
* Other ctxts done as user opens and initializes them.
*/
- for (i = 0; i < dd->first_user_ctxt; ++i) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
@@ -615,7 +619,7 @@ static int create_workqueues(struct hfi1_devdata *dd)
alloc_workqueue(
"hfi%d_%d",
WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE,
- dd->num_sdma,
+ HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES,
dd->unit, pidx);
if (!ppd->hfi1_wq)
goto wq_error;
@@ -678,6 +682,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
dd->process_pio_send = hfi1_verbs_send_pio;
dd->process_dma_send = hfi1_verbs_send_dma;
dd->pio_inline_send = pio_copy;
+ dd->process_vnic_dma_send = hfi1_vnic_send_dma;
if (is_ax(dd)) {
atomic_set(&dd->drop_packet, DROP_PACKET_ON);
@@ -713,7 +718,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
}
/* dd->rcd can be NULL if early initialization failed */
- for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) {
+ for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
/*
* Set up the (kernel) rcvhdr queue and egr TIDs. If doing
* re-init, the simplest way to handle this is to free
@@ -959,7 +964,6 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
kfree(rcd->egrbufs.buffers);
sc_free(rcd->sc);
- vfree(rcd->user_event_mask);
vfree(rcd->subctxt_uregbase);
vfree(rcd->subctxt_rcvegrbuf);
vfree(rcd->subctxt_rcvhdr_base);
@@ -1077,11 +1081,11 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
spin_lock_init(&dd->uctxt_lock);
spin_lock_init(&dd->hfi1_diag_trans_lock);
spin_lock_init(&dd->sc_init_lock);
- spin_lock_init(&dd->dc8051_lock);
spin_lock_init(&dd->dc8051_memlock);
seqlock_init(&dd->sc2vl_lock);
spin_lock_init(&dd->sde_map_lock);
spin_lock_init(&dd->pio_map_lock);
+ mutex_init(&dd->dc8051_lock);
init_waitqueue_head(&dd->event_queue);
dd->int_counter = alloc_percpu(u64);
@@ -1424,6 +1428,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* First, lock the non-writable module parameters */
HFI1_CAP_LOCK();
+ /* Validate dev ids */
+ if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
+ ent->device == PCI_DEVICE_ID_INTEL1)) {
+ hfi1_early_err(&pdev->dev,
+ "Failing on unknown Intel deviceid 0x%x\n",
+ ent->device);
+ ret = -ENODEV;
+ goto bail;
+ }
+
/* Validate some global module parameters */
ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
if (ret)
@@ -1469,15 +1483,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (ret)
goto bail;
- if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
- ent->device == PCI_DEVICE_ID_INTEL1)) {
- hfi1_early_err(&pdev->dev,
- "Failing on unknown Intel deviceid 0x%x\n",
- ent->device);
- ret = -ENODEV;
- goto clean_bail;
- }
-
/*
* Do device-specific initialization, function table setup, dd
* allocation, etc.
@@ -1496,6 +1501,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* do the generic initialization */
initfail = hfi1_init(dd, 0);
+ /* setup vnic */
+ hfi1_vnic_setup(dd);
+
ret = hfi1_register_ib_device(dd);
/*
@@ -1529,6 +1537,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
hfi1_device_remove(dd);
if (!ret)
hfi1_unregister_ib_device(dd);
+ hfi1_vnic_cleanup(dd);
postinit_cleanup(dd);
if (initfail)
ret = initfail;
@@ -1573,6 +1582,9 @@ static void remove_one(struct pci_dev *pdev)
/* unregister from IB core */
hfi1_unregister_ib_device(dd);
+ /* cleanup vnic */
+ hfi1_vnic_cleanup(dd);
+
/*
* Disable the IB link, disable interrupts on the device,
* clear dma engines, etc.
@@ -1612,8 +1624,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize *
sizeof(u32));
- gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
- GFP_USER : GFP_KERNEL;
+ if ((rcd->ctxt < dd->first_dyn_alloc_ctxt) ||
+ (rcd->sc && (rcd->sc->type == SC_KERNEL)))
+ gfp_flags = GFP_KERNEL;
+ else
+ gfp_flags = GFP_USER;
rcd->rcvhdrq = dma_zalloc_coherent(
&dd->pcidev->dev, amt, &rcd->rcvhdrq_dma,
gfp_flags | __GFP_COMP);
@@ -1667,8 +1682,6 @@ bail_free:
dd_dev_err(dd,
"attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
rcd->ctxt);
- vfree(rcd->user_event_mask);
- rcd->user_event_mask = NULL;
dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
@@ -1757,6 +1770,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
!HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) {
dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n",
rcd->ctxt);
+ ret = -ENOMEM;
goto bail_rcvegrbuf_phys;
}
@@ -1834,7 +1848,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
"ctxt%u: current Eager buffer size is invalid %u\n",
rcd->ctxt, rcd->egrbufs.rcvtid_size);
ret = -EINVAL;
- goto bail;
+ goto bail_rcvegrbuf_phys;
}
for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
@@ -1842,7 +1856,8 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->egrbufs.rcvtids[idx].dma, order);
cond_resched();
}
- goto bail;
+
+ return 0;
bail_rcvegrbuf_phys:
for (idx = 0; idx < rcd->egrbufs.alloced &&
@@ -1856,6 +1871,6 @@ bail_rcvegrbuf_phys:
rcd->egrbufs.buffers[idx].dma = 0;
rcd->egrbufs.buffers[idx].len = 0;
}
-bail:
+
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 65348d16ab2f..04a5082d5ac5 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
@@ -47,6 +47,7 @@
#include <linux/pci.h>
#include <linux/delay.h>
+#include <linux/bitmap.h>
#include "hfi.h"
#include "common.h"
@@ -129,21 +130,27 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
* the remote values. Both sides must be using the values.
*/
if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
- set_up_vl15(dd, dd->vau, dd->vl15_init);
+ set_up_vau(dd, dd->vau);
+ set_up_vl15(dd, dd->vl15_init);
assign_remote_cm_au_table(dd, dd->vcu);
- ppd->neighbor_guid =
- read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
- ppd->neighbor_type =
- read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
- DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
- ppd->neighbor_port_number =
- read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
- DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
- dd_dev_info(dd, "Neighbor GUID: %llx Neighbor type %d\n",
- ppd->neighbor_guid,
- ppd->neighbor_type);
}
+ ppd->neighbor_guid =
+ read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
+ ppd->neighbor_type =
+ read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
+ DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
+ ppd->neighbor_port_number =
+ read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
+ DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
+ ppd->neighbor_fm_security =
+ read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
+ DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
+ dd_dev_info(dd,
+ "Neighbor Guid %llx, Type %d, Port Num %d\n",
+ ppd->neighbor_guid, ppd->neighbor_type,
+ ppd->neighbor_port_number);
+
/* physical link went up */
ppd->linkup = 1;
ppd->offline_disabled_reason =
@@ -184,7 +191,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd)
unsigned long flags;
spin_lock_irqsave(&dd->uctxt_lock, flags);
- if (!rcd->cnt)
+ if (bitmap_empty(rcd->in_use_ctxts, HFI1_MAX_SHARED_CTXTS))
goto done;
if (test_and_clear_bit(HFI1_CTXT_WAITING_RCV, &rcd->event_flags)) {
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 6e595afca24c..5977673a52d4 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -53,6 +53,7 @@
#include "mad.h"
#include "trace.h"
#include "qp.h"
+#include "vnic.h"
/* the reset value from the FM is supposed to be 0xffff, handle both */
#define OPA_LINK_WIDTH_RESET_OLD 0x0fff
@@ -650,9 +651,11 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
pi->port_packet_format.supported =
- cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
+ cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
+ OPA_PORT_PACKET_FORMAT_16B);
pi->port_packet_format.enabled =
- cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
+ cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
+ OPA_PORT_PACKET_FORMAT_16B);
/* flit_control.interleave is (OPA V1, version .76):
* bits use
@@ -701,7 +704,13 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
pi->buffer_units = cpu_to_be32(buffer_units);
- pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported);
+ pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported |
+ OPA_CAP_MASK3_IsEthOnFabricSupported);
+ /* Driver does not support mcast/collective configuration */
+ pi->opa_cap_mask &=
+ cpu_to_be16(~OPA_CAP_MASK3_IsAddrRangeConfigSupported);
+ pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7)
+ << 3 | (HFI1_MCAST_NR & 0x7));
/* HFI supports a replay buffer 128 LTPs in size */
pi->replay_depth.buffer = 0x80;
@@ -1146,16 +1155,6 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
ppd->linkinit_reason =
(pi->partenforce_filterraw &
OPA_PI_MASK_LINKINIT_REASON);
- /* enable/disable SW pkey checking as per FM control */
- if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_IN)
- ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
- else
- ppd->part_enforce &= ~HFI1_PART_ENFORCE_IN;
-
- if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_OUT)
- ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
- else
- ppd->part_enforce &= ~HFI1_PART_ENFORCE_OUT;
/* Must be a valid unicast LID address. */
if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
@@ -1167,9 +1166,9 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
spin_lock_irqsave(&ibp->rvp.lock, flags);
if (ibp->rvp.sm_ah) {
if (smlid != ibp->rvp.sm_lid)
- ibp->rvp.sm_ah->attr.dlid = smlid;
+ rdma_ah_set_dlid(&ibp->rvp.sm_ah->attr, smlid);
if (msl != ibp->rvp.sm_sl)
- ibp->rvp.sm_ah->attr.sl = msl;
+ rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl);
}
spin_unlock_irqrestore(&ibp->rvp.lock, flags);
if (smlid != ibp->rvp.sm_lid)
@@ -1465,25 +1464,15 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
}
-static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
-{
- u64 *val = data;
-
- *val++ = read_csr(dd, SEND_SC2VLT0);
- *val++ = read_csr(dd, SEND_SC2VLT1);
- *val++ = read_csr(dd, SEND_SC2VLT2);
- *val++ = read_csr(dd, SEND_SC2VLT3);
- return 0;
-}
-
#define ILLEGAL_VL 12
/*
* filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
* for SC15, which must map to VL15). If we don't remap things this
* way it is possible for VL15 counters to increment when we try to
* send on a SC which is mapped to an invalid VL.
+ * When getting the table convert ILLEGAL_VL back to VL15.
*/
-static void filter_sc2vlt(void *data)
+static void filter_sc2vlt(void *data, bool set)
{
int i;
u8 *pd = data;
@@ -1491,8 +1480,14 @@ static void filter_sc2vlt(void *data)
for (i = 0; i < OPA_MAX_SCS; i++) {
if (i == 15)
continue;
- if ((pd[i] & 0x1f) == 0xf)
- pd[i] = ILLEGAL_VL;
+
+ if (set) {
+ if ((pd[i] & 0x1f) == 0xf)
+ pd[i] = ILLEGAL_VL;
+ } else {
+ if ((pd[i] & 0x1f) == ILLEGAL_VL)
+ pd[i] = 0xf;
+ }
}
}
@@ -1500,7 +1495,7 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
{
u64 *val = data;
- filter_sc2vlt(data);
+ filter_sc2vlt(data, true);
write_csr(dd, SEND_SC2VLT0, *val++);
write_csr(dd, SEND_SC2VLT1, *val++);
@@ -1512,6 +1507,19 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
return 0;
}
+static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
+{
+ u64 *val = (u64 *)data;
+
+ *val++ = read_csr(dd, SEND_SC2VLT0);
+ *val++ = read_csr(dd, SEND_SC2VLT1);
+ *val++ = read_csr(dd, SEND_SC2VLT2);
+ *val++ = read_csr(dd, SEND_SC2VLT3);
+
+ filter_sc2vlt((u64 *)data, false);
+ return 0;
+}
+
static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
u32 *resp_len)
@@ -1986,31 +1994,6 @@ struct opa_pma_mad {
u8 data[2024];
} __packed;
-struct opa_class_port_info {
- u8 base_version;
- u8 class_version;
- __be16 cap_mask;
- __be32 cap_mask2_resp_time;
-
- u8 redirect_gid[16];
- __be32 redirect_tc_fl;
- __be32 redirect_lid;
- __be32 redirect_sl_qp;
- __be32 redirect_qkey;
-
- u8 trap_gid[16];
- __be32 trap_tc_fl;
- __be32 trap_lid;
- __be32 trap_hl_qp;
- __be32 trap_qkey;
-
- __be16 trap_pkey;
- __be16 redirect_pkey;
-
- u8 trap_sl_rsvd;
- u8 reserved[3];
-} __packed;
-
struct opa_port_status_req {
__u8 port_num;
__u8 reserved[3];
@@ -4406,7 +4389,7 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
switch (in_mad->base_version) {
case OPA_MGMT_BASE_VERSION:
if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
- dev_err(ibdev->dma_device, "invalid in_mad_size\n");
+ dev_err(ibdev->dev.parent, "invalid in_mad_size\n");
return IB_MAD_RESULT_FAILURE;
}
return hfi1_process_opa_mad(ibdev, mad_flags, port,
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 4ac8f330c5cb..6a9f6f9819e1 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -207,8 +207,8 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
/*
* Save BARs and command to rewrite after device reset.
*/
- dd->pcibar0 = addr;
- dd->pcibar1 = addr >> 32;
+ pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0);
+ pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1);
pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom);
pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command);
pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl);
@@ -240,36 +240,6 @@ void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
iounmap(dd->piobase);
}
-/*
- * Do a Function Level Reset (FLR) on the device.
- * Based on static function drivers/pci/pci.c:pcie_flr().
- */
-void hfi1_pcie_flr(struct hfi1_devdata *dd)
-{
- int i;
- u16 status;
-
- /* no need to check for the capability - we know the device has it */
-
- /* wait for Transaction Pending bit to clear, at most a few ms */
- for (i = 0; i < 4; i++) {
- if (i)
- msleep((1 << (i - 1)) * 100);
-
- pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVSTA, &status);
- if (!(status & PCI_EXP_DEVSTA_TRPND))
- goto clear;
- }
-
- dd_dev_err(dd, "Transaction Pending bit is not clearing, proceeding with reset anyway\n");
-
-clear:
- pcie_capability_set_word(dd->pcidev, PCI_EXP_DEVCTL,
- PCI_EXP_DEVCTL_BCR_FLR);
- /* PCIe spec requires the function to be back within 100ms */
- msleep(100);
-}
-
static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt,
struct hfi1_msix_entry *hfi1_msix_entry)
{
@@ -583,7 +553,7 @@ pci_mmio_enabled(struct pci_dev *pdev)
if (words == ~0ULL)
ret = PCI_ERS_RESULT_NEED_RESET;
dd_dev_info(dd,
- "HFI1 mmio_enabled function called, read wordscntr %Lx, returning %d\n",
+ "HFI1 mmio_enabled function called, read wordscntr %llx, returning %d\n",
words, ret);
}
return ret;
@@ -598,15 +568,6 @@ pci_slot_reset(struct pci_dev *pdev)
return PCI_ERS_RESULT_CAN_RECOVER;
}
-static pci_ers_result_t
-pci_link_reset(struct pci_dev *pdev)
-{
- struct hfi1_devdata *dd = pci_get_drvdata(pdev);
-
- dd_dev_info(dd, "HFI1 link_reset function called, ignored\n");
- return PCI_ERS_RESULT_CAN_RECOVER;
-}
-
static void
pci_resume(struct pci_dev *pdev)
{
@@ -625,7 +586,6 @@ pci_resume(struct pci_dev *pdev)
const struct pci_error_handlers hfi1_pci_err_handler = {
.error_detected = pci_error_detected,
.mmio_enabled = pci_mmio_enabled,
- .link_reset = pci_link_reset,
.slot_reset = pci_slot_reset,
.resume = pci_resume,
};
@@ -673,12 +633,12 @@ MODULE_PARM_DESC(pcie_retry, "Driver will try this many times to reach requested
#define UNSET_PSET 255
#define DEFAULT_DISCRETE_PSET 2 /* discrete HFI */
-#define DEFAULT_MCP_PSET 4 /* MCP HFI */
+#define DEFAULT_MCP_PSET 6 /* MCP HFI */
static uint pcie_pset = UNSET_PSET;
module_param(pcie_pset, uint, S_IRUGO);
MODULE_PARM_DESC(pcie_pset, "PCIe Eq Pset value to use, range is 0-10");
-static uint pcie_ctle = 1; /* discrete on, integrated off */
+static uint pcie_ctle = 3; /* discrete on, integrated on */
module_param(pcie_ctle, uint, S_IRUGO);
MODULE_PARM_DESC(pcie_ctle, "PCIe static CTLE mode, bit 0 - discrete on/off, bit 1 - integrated on/off");
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 615be68e40b3..ed72b5aca139 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -703,6 +703,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
{
struct send_context_info *sci;
struct send_context *sc = NULL;
+ int req_type = type;
dma_addr_t dma;
unsigned long flags;
u64 reg;
@@ -729,6 +730,13 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
return NULL;
}
+ /*
+ * VNIC contexts are dynamically allocated.
+ * Hence, pick a user context for VNIC.
+ */
+ if (type == SC_VNIC)
+ type = SC_USER;
+
spin_lock_irqsave(&dd->sc_lock, flags);
ret = sc_hw_alloc(dd, type, &sw_index, &hw_context);
if (ret) {
@@ -738,6 +746,15 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
return NULL;
}
+ /*
+ * VNIC contexts are used by kernel driver.
+ * Hence, mark them as kernel contexts.
+ */
+ if (req_type == SC_VNIC) {
+ dd->send_contexts[sw_index].type = SC_KERNEL;
+ type = SC_KERNEL;
+ }
+
sci = &dd->send_contexts[sw_index];
sci->sc = sc;
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 867e5ffc3595..99ca5edb0b43 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -1,7 +1,7 @@
#ifndef _PIO_H
#define _PIO_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -54,6 +54,12 @@
#define SC_USER 3 /* must be the last one: it may take all left */
#define SC_MAX 4 /* count of send context types */
+/*
+ * SC_VNIC types are allocated (dynamically) from the user context pool,
+ * (SC_USER) and used by kernel driver as kernel contexts (SC_KERNEL).
+ */
+#define SC_VNIC SC_MAX
+
/* invalid send context index */
#define INVALID_SCI 0xff
@@ -195,7 +201,7 @@ struct sc_config_sizes {
* | mask | --/ |--------------------|
* |--------------------------| -/ | * |
* | actual_vls (max 8) | -/ |--------------------|
- * |--------------------------| --/ | ksc[n] -> sc n |
+ * |--------------------------| --/ | ksc[n-1] -> sc n |
* | vls (max 8) | -/ +--------------------+
* |--------------------------| --/
* | map[0] |-/
@@ -208,21 +214,21 @@ struct sc_config_sizes {
* |--------------------------| |--------------------|
* | map[vls - 1] |- | * |
* +--------------------------+ \- |--------------------|
- * \- | ksc[m] -> sc m+n |
+ * \- | ksc[m-1] -> sc m+n |
* \ +--------------------+
* \-
* \
- * \- +--------------------+
- * \- | mask |
- * \ |--------------------|
- * \- | ksc[0] -> sc 1+m+n |
- * \- |--------------------|
- * >| ksc[1] -> sc 2+m+n |
- * |--------------------|
- * | * |
- * |--------------------|
- * | ksc[o] -> sc o+m+n |
- * +--------------------+
+ * \- +----------------------+
+ * \- | mask |
+ * \ |----------------------|
+ * \- | ksc[0] -> sc 1+m+n |
+ * \- |----------------------|
+ * >| ksc[1] -> sc 2+m+n |
+ * |----------------------|
+ * | * |
+ * |----------------------|
+ * | ksc[o-1] -> sc o+m+n |
+ * +----------------------+
*
*/
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index d752d6768a49..1a7af9f60c13 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -79,43 +79,6 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
}
-/*
- * Convert the AETH credit code into the number of credits.
- */
-static const u16 credit_table[31] = {
- 0, /* 0 */
- 1, /* 1 */
- 2, /* 2 */
- 3, /* 3 */
- 4, /* 4 */
- 6, /* 5 */
- 8, /* 6 */
- 12, /* 7 */
- 16, /* 8 */
- 24, /* 9 */
- 32, /* A */
- 48, /* B */
- 64, /* C */
- 96, /* D */
- 128, /* E */
- 192, /* F */
- 256, /* 10 */
- 384, /* 11 */
- 512, /* 12 */
- 768, /* 13 */
- 1024, /* 14 */
- 1536, /* 15 */
- 2048, /* 16 */
- 3072, /* 17 */
- 4096, /* 18 */
- 6144, /* 19 */
- 8192, /* 1A */
- 12288, /* 1B */
- 16384, /* 1C */
- 24576, /* 1D */
- 32768 /* 1E */
-};
-
const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
[IB_WR_RDMA_WRITE] = {
.length = sizeof(struct ib_rdma_wr),
@@ -331,7 +294,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
ah = ibah_to_rvtah(wqe->ud_wr.ah);
if (wqe->length > (1 << ah->log_pmtu))
return -EINVAL;
- if (ibp->sl_to_sc[ah->attr.sl] == 0xf)
+ if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf)
return -EINVAL;
default:
break;
@@ -340,68 +303,6 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
}
/**
- * hfi1_compute_aeth - compute the AETH (syndrome + MSN)
- * @qp: the queue pair to compute the AETH for
- *
- * Returns the AETH.
- */
-__be32 hfi1_compute_aeth(struct rvt_qp *qp)
-{
- u32 aeth = qp->r_msn & HFI1_MSN_MASK;
-
- if (qp->ibqp.srq) {
- /*
- * Shared receive queues don't generate credits.
- * Set the credit field to the invalid value.
- */
- aeth |= HFI1_AETH_CREDIT_INVAL << HFI1_AETH_CREDIT_SHIFT;
- } else {
- u32 min, max, x;
- u32 credits;
- struct rvt_rwq *wq = qp->r_rq.wq;
- u32 head;
- u32 tail;
-
- /* sanity check pointers before trusting them */
- head = wq->head;
- if (head >= qp->r_rq.size)
- head = 0;
- tail = wq->tail;
- if (tail >= qp->r_rq.size)
- tail = 0;
- /*
- * Compute the number of credits available (RWQEs).
- * There is a small chance that the pair of reads are
- * not atomic, which is OK, since the fuzziness is
- * resolved as further ACKs go out.
- */
- credits = head - tail;
- if ((int)credits < 0)
- credits += qp->r_rq.size;
- /*
- * Binary search the credit table to find the code to
- * use.
- */
- min = 0;
- max = 31;
- for (;;) {
- x = (min + max) / 2;
- if (credit_table[x] == credits)
- break;
- if (credit_table[x] > credits) {
- max = x;
- } else {
- if (min == x)
- break;
- min = x;
- }
- }
- aeth |= x << HFI1_AETH_CREDIT_SHIFT;
- }
- return cpu_to_be32(aeth);
-}
-
-/**
* _hfi1_schedule_send - schedule progress
* @qp: the QP
*
@@ -457,44 +358,6 @@ void hfi1_schedule_send(struct rvt_qp *qp)
_hfi1_schedule_send(qp);
}
-/**
- * hfi1_get_credit - handle credit in aeth
- * @qp: the qp
- * @aeth: the Acknowledge Extended Transport Header
- *
- * The QP s_lock should be held.
- */
-void hfi1_get_credit(struct rvt_qp *qp, u32 aeth)
-{
- u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK;
-
- lockdep_assert_held(&qp->s_lock);
- /*
- * If the credit is invalid, we can send
- * as many packets as we like. Otherwise, we have to
- * honor the credit field.
- */
- if (credit == HFI1_AETH_CREDIT_INVAL) {
- if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
- qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
- if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
- qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
- hfi1_schedule_send(qp);
- }
- }
- } else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
- /* Compute new LSN (i.e., MSN + credit) */
- credit = (aeth + credit_table[credit]) & HFI1_MSN_MASK;
- if (cmp_msn(credit, qp->s_lsn) > 0) {
- qp->s_lsn = credit;
- if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
- qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
- hfi1_schedule_send(qp);
- }
- }
- }
-}
-
void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
{
unsigned long flags;
@@ -744,7 +607,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
send_context = qp_to_send_context(qp, priv->s_sc);
seq_printf(s,
- "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
+ "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
iter->n,
qp_idle(qp) ? "I" : "B",
qp->ibqp.qp_num,
@@ -763,16 +626,18 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
qp->s_last_psn,
qp->s_psn, qp->s_next_psn,
qp->s_sending_psn, qp->s_sending_hpsn,
+ qp->r_psn,
qp->s_last, qp->s_acked, qp->s_cur,
qp->s_tail, qp->s_head, qp->s_size,
qp->s_avail,
qp->remote_qpn,
- qp->remote_ah_attr.dlid,
- qp->remote_ah_attr.sl,
+ rdma_ah_get_dlid(&qp->remote_ah_attr),
+ rdma_ah_get_sl(&qp->remote_ah_attr),
qp->pmtu,
qp->s_retry,
qp->s_retry_cnt,
qp->s_rnr_retry_cnt,
+ qp->s_rnr_retry,
sde,
sde ? sde->this_idx : 0,
send_context,
@@ -782,31 +647,17 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
qp->pid);
}
-void qp_comm_est(struct rvt_qp *qp)
-{
- qp->r_flags |= RVT_R_COMM_EST;
- if (qp->ibqp.event_handler) {
- struct ib_event ev;
-
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_COMM_EST;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
-}
-
-void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
- gfp_t gfp)
+void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv;
- priv = kzalloc_node(sizeof(*priv), gfp, rdi->dparms.node);
+ priv = kzalloc_node(sizeof(*priv), GFP_KERNEL, rdi->dparms.node);
if (!priv)
return ERR_PTR(-ENOMEM);
priv->owner = qp;
- priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), gfp,
+ priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), GFP_KERNEL,
rdi->dparms.node);
if (!priv->s_ahg) {
kfree(priv);
@@ -819,8 +670,6 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
iowait_sleep,
iowait_wakeup,
iowait_sdma_drained);
- setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp);
- qp->s_timer.function = hfi1_rc_timeout;
return priv;
}
@@ -861,7 +710,6 @@ void flush_qp_waiters(struct rvt_qp *qp)
{
lockdep_assert_held(&qp->s_lock);
flush_iowait(qp);
- hfi1_stop_rc_timers(qp);
}
void stop_send_queue(struct rvt_qp *qp)
@@ -869,7 +717,6 @@ void stop_send_queue(struct rvt_qp *qp)
struct hfi1_qp_priv *priv = qp->priv;
cancel_work_sync(&priv->s_iowait.iowork);
- hfi1_del_timers_sync(qp);
}
void quiesce_qp(struct rvt_qp *qp)
@@ -883,9 +730,7 @@ void quiesce_qp(struct rvt_qp *qp)
void notify_qp_reset(struct rvt_qp *qp)
{
- struct hfi1_qp_priv *priv = qp->priv;
-
- priv->r_adefered = 0;
+ qp->r_adefered = 0;
clear_ahg(qp);
}
@@ -900,7 +745,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp)
qp->s_mig_state = IB_MIG_MIGRATED;
qp->remote_ah_attr = qp->alt_ah_attr;
- qp->port_num = qp->alt_ah_attr.port_num;
+ qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
qp->s_pkey_index = qp->s_alt_pkey_index;
qp->s_flags |= RVT_S_AHG_CLEAR;
priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
@@ -930,7 +775,7 @@ u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
u8 sc, vl;
ibp = &dd->pport[qp->port_num - 1].ibport_data;
- sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ sc = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
vl = sc_to_vlt(dd, sc);
mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu);
@@ -961,17 +806,20 @@ int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
void notify_error_qp(struct rvt_qp *qp)
{
- struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
+ seqlock_t *lock = priv->s_iowait.lock;
- write_seqlock(&dev->iowait_lock);
- if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
- qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
- list_del_init(&priv->s_iowait.list);
- priv->s_iowait.lock = NULL;
- rvt_put_qp(qp);
+ if (lock) {
+ write_seqlock(lock);
+ if (!list_empty(&priv->s_iowait.list) &&
+ !(qp->s_flags & RVT_S_BUSY)) {
+ qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
+ list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
+ rvt_put_qp(qp);
+ }
+ write_sequnlock(lock);
}
- write_sequnlock(&dev->iowait_lock);
if (!(qp->s_flags & RVT_S_BUSY)) {
qp->s_hdrwords = 0;
@@ -1010,7 +858,7 @@ void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl)
if (qp->port_num == ppd->port &&
(qp->ibqp.qp_type == IB_QPT_UC ||
qp->ibqp.qp_type == IB_QPT_RC) &&
- qp->remote_ah_attr.sl == sl &&
+ rdma_ah_get_sl(&qp->remote_ah_attr) == sl &&
(ib_rvt_state_ops[qp->state] &
RVT_POST_SEND_OK)) {
spin_lock_irq(&qp->r_lock);
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index 587d84d65bb8..6fe542b6a927 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -71,14 +71,6 @@ static inline void clear_ahg(struct rvt_qp *qp)
}
/**
- * hfi1_compute_aeth - compute the AETH (syndrome + MSN)
- * @qp: the queue pair to compute the AETH for
- *
- * Returns the AETH.
- */
-__be32 hfi1_compute_aeth(struct rvt_qp *qp);
-
-/**
* hfi1_create_qp - create a queue pair for a device
* @ibpd: the protection domain who's device we create the queue pair for
* @init_attr: the attributes of the queue pair
@@ -91,14 +83,6 @@ __be32 hfi1_compute_aeth(struct rvt_qp *qp);
struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
-/**
- * hfi1_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
- * @aeth: the Acknowledge Extended Transport Header
- *
- * The QP s_lock should be held.
- */
-void hfi1_get_credit(struct rvt_qp *qp, u32 aeth);
/**
* hfi1_qp_wakeup - wake up on the indicated event
@@ -131,12 +115,6 @@ int qp_iter_next(struct qp_iter *iter);
*/
void qp_iter_print(struct seq_file *s, struct qp_iter *iter);
-/**
- * qp_comm_est - handle trap with QP established
- * @qp: the QP
- */
-void qp_comm_est(struct rvt_qp *qp);
-
void _hfi1_schedule_send(struct rvt_qp *qp);
void hfi1_schedule_send(struct rvt_qp *qp);
@@ -145,8 +123,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp);
/*
* Functions provided by hfi1 driver for rdmavt to use
*/
-void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
- gfp_t gfp);
+void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp);
void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
unsigned free_all_qps(struct rvt_dev_info *rdi);
void notify_qp_reset(struct rvt_qp *qp);
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 809b26eb6d3c..1080778a1f7c 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -57,133 +57,6 @@
/* cut down ridiculously long IB macro names */
#define OP(x) RC_OP(x)
-/**
- * hfi1_add_retry_timer - add/start a retry timer
- * @qp - the QP
- *
- * add a retry timer on the QP
- */
-static inline void hfi1_add_retry_timer(struct rvt_qp *qp)
-{
- struct ib_qp *ibqp = &qp->ibqp;
- struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
-
- lockdep_assert_held(&qp->s_lock);
- qp->s_flags |= RVT_S_TIMER;
- /* 4.096 usec. * (1 << qp->timeout) */
- qp->s_timer.expires = jiffies + qp->timeout_jiffies +
- rdi->busy_jiffies;
- add_timer(&qp->s_timer);
-}
-
-/**
- * hfi1_add_rnr_timer - add/start an rnr timer
- * @qp - the QP
- * @to - timeout in usecs
- *
- * add an rnr timer on the QP
- */
-void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
-{
- struct hfi1_qp_priv *priv = qp->priv;
-
- lockdep_assert_held(&qp->s_lock);
- qp->s_flags |= RVT_S_WAIT_RNR;
- priv->s_rnr_timer.expires = jiffies + usecs_to_jiffies(to);
- add_timer(&priv->s_rnr_timer);
-}
-
-/**
- * hfi1_mod_retry_timer - mod a retry timer
- * @qp - the QP
- *
- * Modify a potentially already running retry
- * timer
- */
-static inline void hfi1_mod_retry_timer(struct rvt_qp *qp)
-{
- struct ib_qp *ibqp = &qp->ibqp;
- struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
-
- lockdep_assert_held(&qp->s_lock);
- qp->s_flags |= RVT_S_TIMER;
- /* 4.096 usec. * (1 << qp->timeout) */
- mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
- rdi->busy_jiffies);
-}
-
-/**
- * hfi1_stop_retry_timer - stop a retry timer
- * @qp - the QP
- *
- * stop a retry timer and return if the timer
- * had been pending.
- */
-static inline int hfi1_stop_retry_timer(struct rvt_qp *qp)
-{
- int rval = 0;
-
- lockdep_assert_held(&qp->s_lock);
- /* Remove QP from retry */
- if (qp->s_flags & RVT_S_TIMER) {
- qp->s_flags &= ~RVT_S_TIMER;
- rval = del_timer(&qp->s_timer);
- }
- return rval;
-}
-
-/**
- * hfi1_stop_rc_timers - stop all timers
- * @qp - the QP
- *
- * stop any pending timers
- */
-void hfi1_stop_rc_timers(struct rvt_qp *qp)
-{
- struct hfi1_qp_priv *priv = qp->priv;
-
- lockdep_assert_held(&qp->s_lock);
- /* Remove QP from all timers */
- if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
- qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
- del_timer(&qp->s_timer);
- del_timer(&priv->s_rnr_timer);
- }
-}
-
-/**
- * hfi1_stop_rnr_timer - stop an rnr timer
- * @qp - the QP
- *
- * stop an rnr timer and return if the timer
- * had been pending.
- */
-static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp)
-{
- int rval = 0;
- struct hfi1_qp_priv *priv = qp->priv;
-
- lockdep_assert_held(&qp->s_lock);
- /* Remove QP from rnr timer */
- if (qp->s_flags & RVT_S_WAIT_RNR) {
- qp->s_flags &= ~RVT_S_WAIT_RNR;
- rval = del_timer(&priv->s_rnr_timer);
- }
- return rval;
-}
-
-/**
- * hfi1_del_timers_sync - wait for any timeout routines to exit
- * @qp - the QP
- */
-void hfi1_del_timers_sync(struct rvt_qp *qp)
-{
- struct hfi1_qp_priv *priv = qp->priv;
-
- del_timer_sync(&qp->s_timer);
- del_timer_sync(&priv->s_rnr_timer);
-}
-
static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
{
@@ -194,7 +67,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
ss->sg_list = wqe->sg_list + 1;
ss->num_sge = wqe->wr.num_sge;
ss->total_len = wqe->length;
- hfi1_skip_sge(ss, len, 0);
+ rvt_skip_sge(ss, len, false);
return wqe->length - len;
}
@@ -284,7 +157,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
e->sent = 1;
}
- ohdr->u.aeth = hfi1_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
qp->s_ack_rdma_psn = e->psn;
bth2 = mask_psn(qp->s_ack_rdma_psn++);
@@ -293,7 +166,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
ps->s_txreq->ss = NULL;
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
- ohdr->u.at.aeth = hfi1_compute_aeth(qp);
+ ohdr->u.at.aeth = rvt_compute_aeth(qp);
ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
hwords += sizeof(ohdr->u.at) / sizeof(u32);
bth2 = mask_psn(e->psn);
@@ -315,7 +188,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
len = pmtu;
middle = HFI1_CAP_IS_KSET(SDMA_AHG);
} else {
- ohdr->u.aeth = hfi1_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
@@ -338,11 +211,11 @@ normal:
ps->s_txreq->ss = NULL;
if (qp->s_nak_state)
ohdr->u.aeth =
- cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) |
+ cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
(qp->s_nak_state <<
- HFI1_AETH_CREDIT_SHIFT));
+ IB_AETH_CREDIT_SHIFT));
else
- ohdr->u.aeth = hfi1_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
len = 0;
bth0 = OP(ACKNOWLEDGE) << 24;
@@ -401,7 +274,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto bail_no_tx;
ohdr = &ps->s_txreq->phdr.hdr.u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
/* Sending responses has higher priority over sending requests. */
@@ -414,7 +287,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto bail;
/* We are in the error state, flush the work request. */
smp_read_barrier_depends(); /* see post_one_send() */
- if (qp->s_last == ACCESS_ONCE(qp->s_head))
+ if (qp->s_last == READ_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (iowait_sdma_pending(&priv->s_iowait)) {
@@ -457,7 +330,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
newreq = 0;
if (qp->s_cur == qp->s_tail) {
/* Check if send work queue is empty. */
- if (qp->s_tail == qp->s_head) {
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (qp->s_tail == READ_ONCE(qp->s_head)) {
clear_ahg(qp);
goto bail;
}
@@ -518,7 +392,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
case IB_WR_SEND_WITH_INV:
/* If no credit, return. */
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
+ rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
@@ -555,7 +429,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
+ rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
@@ -840,7 +714,7 @@ bail_no_tx:
void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
int is_fecn)
{
- struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u64 pbc, pbc_flags = 0;
u16 lrh0;
@@ -854,6 +728,9 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
struct ib_other_headers *ohdr;
unsigned long flags;
+ /* clear the defer count */
+ qp->r_adefered = 0;
+
/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
if (qp->s_flags & RVT_S_RESP_PENDING)
goto queue_ack;
@@ -866,9 +743,10 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
/* Construct the header */
/* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */
hwords = 6;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
hwords += hfi1_make_grh(ibp, &hdr.u.l.grh,
- &qp->remote_ah_attr.grh, hwords, 0);
+ rdma_ah_read_grh(&qp->remote_ah_attr),
+ hwords, 0);
ohdr = &hdr.u.l.oth;
lrh0 = HFI1_LRH_GRH;
} else {
@@ -880,22 +758,24 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
if (qp->s_mig_state == IB_MIG_MIGRATED)
bth0 |= IB_BTH_MIG_REQ;
if (qp->r_nak_state)
- ohdr->u.aeth = cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) |
+ ohdr->u.aeth = cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
(qp->r_nak_state <<
- HFI1_AETH_CREDIT_SHIFT));
+ IB_AETH_CREDIT_SHIFT));
else
- ohdr->u.aeth = hfi1_compute_aeth(qp);
- sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ ohdr->u.aeth = rvt_compute_aeth(qp);
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
- lrh0 |= (sc5 & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
+ lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr)
+ & 0xf) << 4;
hdr.lrh[0] = cpu_to_be16(lrh0);
- hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ hdr.lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr));
hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
- hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
+ hdr.lrh[3] = cpu_to_be16(ppd->lid |
+ rdma_ah_get_path_bits(&qp->remote_ah_attr));
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
- ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << HFI1_BECN_SHIFT);
+ ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << IB_BECN_SHIFT);
ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn));
/* Don't try to send ACKs if the link isn't ACTIVE */
@@ -1038,7 +918,7 @@ done:
* Back up requester to resend the last un-ACKed request.
* The QP r_lock and s_lock should be held and interrupts disabled.
*/
-static void restart_rc(struct rvt_qp *qp, u32 psn, int wait)
+void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
{
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
struct hfi1_ibport *ibp;
@@ -1075,44 +955,6 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait)
}
/*
- * This is called from s_timer for missing responses.
- */
-void hfi1_rc_timeout(unsigned long arg)
-{
- struct rvt_qp *qp = (struct rvt_qp *)arg;
- struct hfi1_ibport *ibp;
- unsigned long flags;
-
- spin_lock_irqsave(&qp->r_lock, flags);
- spin_lock(&qp->s_lock);
- if (qp->s_flags & RVT_S_TIMER) {
- ibp = to_iport(qp->ibqp.device, qp->port_num);
- ibp->rvp.n_rc_timeouts++;
- qp->s_flags &= ~RVT_S_TIMER;
- del_timer(&qp->s_timer);
- trace_hfi1_timeout(qp, qp->s_last_psn + 1);
- restart_rc(qp, qp->s_last_psn + 1, 1);
- hfi1_schedule_send(qp);
- }
- spin_unlock(&qp->s_lock);
- spin_unlock_irqrestore(&qp->r_lock, flags);
-}
-
-/*
- * This is called from s_timer for RNR timeouts.
- */
-void hfi1_rc_rnr_retry(unsigned long arg)
-{
- struct rvt_qp *qp = (struct rvt_qp *)arg;
- unsigned long flags;
-
- spin_lock_irqsave(&qp->s_lock, flags);
- hfi1_stop_rnr_timer(qp);
- hfi1_schedule_send(qp);
- spin_unlock_irqrestore(&qp->s_lock, flags);
-}
-
-/*
* Set qp->s_sending_psn to the next PSN after the given one.
* This would be psn+1 except when RDMA reads are present.
*/
@@ -1150,16 +992,16 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
u32 psn;
lockdep_assert_held(&qp->s_lock);
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK))
return;
/* Find out where the BTH is */
- if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH)
+ if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
ohdr = &hdr->u.oth;
else
ohdr = &hdr->u.l.oth;
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ opcode = ib_bth_get_opcode(ohdr);
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
WARN_ON(!qp->s_rdma_ack_cnt);
@@ -1178,7 +1020,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
!(qp->s_flags &
(RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
- hfi1_add_retry_timer(qp);
+ rvt_add_retry_timer(qp);
while (qp->s_last != qp->s_acked) {
u32 s_last;
@@ -1188,13 +1030,17 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
s_last = qp->s_last;
+ trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
rvt_put_swqe(wqe);
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before re-sending,
@@ -1236,12 +1082,16 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
rvt_put_swqe(wqe);
s_last = qp->s_last;
+ trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
} else {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -1252,10 +1102,11 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
*/
if (ppd->dd->flags & HFI1_HAS_SEND_DMA) {
struct sdma_engine *engine;
+ u8 sl = rdma_ah_get_sl(&qp->remote_ah_attr);
u8 sc5;
/* For now use sc to find engine */
- sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ sc5 = ibp->sl_to_sc[sl];
engine = qp_to_sdma_engine(qp, sc5);
sdma_engine_progress_schedule(engine);
}
@@ -1308,7 +1159,6 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
int ret = 0;
u32 ack_psn;
int diff;
- unsigned long to;
lockdep_assert_held(&qp->s_lock);
/*
@@ -1318,10 +1168,10 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
* request but will include an ACK'ed request(s).
*/
ack_psn = psn;
- if (aeth >> 29)
+ if (aeth >> IB_AETH_NAK_SHIFT)
ack_psn--;
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
- ibp = to_iport(qp->ibqp.device, qp->port_num);
+ ibp = rcd_to_iport(rcd);
/*
* The MSN might be for a later WQE than the PSN indicates so
@@ -1357,7 +1207,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
/* Retry this request. */
if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
qp->r_flags |= RVT_R_RDMAR_SEQ;
- restart_rc(qp, qp->s_last_psn + 1, 0);
+ hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
rvt_get_qp(qp);
@@ -1398,7 +1248,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
break;
}
- switch (aeth >> 29) {
+ switch (aeth >> IB_AETH_NAK_SHIFT) {
case 0: /* ACK */
this_cpu_inc(*ibp->rvp.rc_acks);
if (qp->s_acked != qp->s_tail) {
@@ -1406,7 +1256,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
* We are expecting more ACKs so
* mod the retry timer.
*/
- hfi1_mod_retry_timer(qp);
+ rvt_mod_retry_timer(qp);
/*
* We can stop re-sending the earlier packets and
* continue with the next packet the receiver wants.
@@ -1415,7 +1265,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
reset_psn(qp, psn + 1);
} else {
/* No more acks - kill all timers */
- hfi1_stop_rc_timers(qp);
+ rvt_stop_rc_timers(qp);
if (cmp_psn(qp->s_psn, psn) <= 0) {
qp->s_state = OP(SEND_LAST);
qp->s_psn = psn + 1;
@@ -1425,7 +1275,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
qp->s_flags &= ~RVT_S_WAIT_ACK;
hfi1_schedule_send(qp);
}
- hfi1_get_credit(qp, aeth);
+ rvt_get_credit(qp, aeth);
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
qp->s_retry = qp->s_retry_cnt;
update_last_psn(qp, psn);
@@ -1452,11 +1302,8 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
reset_psn(qp, psn);
qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
- hfi1_stop_rc_timers(qp);
- to =
- ib_hfi1_rnr_table[(aeth >> HFI1_AETH_CREDIT_SHIFT) &
- HFI1_AETH_CREDIT_MASK];
- hfi1_add_rnr_timer(qp, to);
+ rvt_stop_rc_timers(qp);
+ rvt_add_rnr_timer(qp, aeth);
return 0;
case 3: /* NAK */
@@ -1464,8 +1311,8 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
goto bail_stop;
/* The last valid PSN is the previous PSN. */
update_last_psn(qp, psn - 1);
- switch ((aeth >> HFI1_AETH_CREDIT_SHIFT) &
- HFI1_AETH_CREDIT_MASK) {
+ switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
+ IB_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
ibp->rvp.n_seq_naks++;
/*
@@ -1474,7 +1321,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
* RDMA READ response which terminates the RDMA
* READ.
*/
- restart_rc(qp, psn, 0);
+ hfi1_restart_rc(qp, psn, 0);
hfi1_schedule_send(qp);
break;
@@ -1513,7 +1360,7 @@ reserved:
}
/* cannot be reached */
bail_stop:
- hfi1_stop_rc_timers(qp);
+ rvt_stop_rc_timers(qp);
return ret;
}
@@ -1528,7 +1375,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
lockdep_assert_held(&qp->s_lock);
/* Remove QP from retry timer */
- hfi1_stop_rc_timers(qp);
+ rvt_stop_rc_timers(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
@@ -1542,7 +1389,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
ibp->rvp.n_rdma_seq++;
qp->r_flags |= RVT_R_RDMAR_SEQ;
- restart_rc(qp, qp->s_last_psn + 1, 0);
+ hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
rvt_get_qp(qp);
@@ -1586,7 +1433,7 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
/* Ignore invalid responses. */
smp_read_barrier_depends(); /* see post_one_send */
- if (cmp_psn(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
+ if (cmp_psn(psn, READ_ONCE(qp->s_next_psn)) >= 0)
goto ack_done;
/* Ignore duplicate responses. */
@@ -1595,8 +1442,8 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
/* Update credits for "ghost" ACKs */
if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
aeth = be32_to_cpu(ohdr->u.aeth);
- if ((aeth >> 29) == 0)
- hfi1_get_credit(qp, aeth);
+ if ((aeth >> IB_AETH_NAK_SHIFT) == 0)
+ rvt_get_credit(qp, aeth);
}
goto ack_done;
}
@@ -1656,8 +1503,7 @@ read_middle:
* We got a response so update the timeout.
* 4.096 usec. * (1 << qp->timeout)
*/
- qp->s_flags |= RVT_S_TIMER;
- mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
+ rvt_mod_retry_timer(qp);
if (qp->s_flags & RVT_S_WAIT_ACK) {
qp->s_flags &= ~RVT_S_WAIT_ACK;
hfi1_schedule_send(qp);
@@ -1673,7 +1519,7 @@ read_middle:
qp->s_rdma_read_len -= pmtu;
update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
- hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0, 0);
+ hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, false, false);
goto bail;
case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1681,7 +1527,7 @@ read_middle:
if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
goto ack_done;
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/*
* Check that the data size is >= 0 && <= pmtu.
* Remember to account for ICRC (4).
@@ -1705,7 +1551,7 @@ read_middle:
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
goto ack_op_err;
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/*
* Check that the data size is >= 1 && <= pmtu.
* Remember to account for ICRC (4).
@@ -1717,7 +1563,7 @@ read_last:
if (unlikely(tlen != qp->s_rdma_read_len))
goto ack_len_err;
aeth = be32_to_cpu(ohdr->u.aeth);
- hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0, 0);
+ hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, false, false);
WARN_ON(qp->s_rdma_read_sge.num_sge);
(void)do_rc_ack(qp, aeth, psn,
OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
@@ -1757,9 +1603,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
static inline void rc_cancel_ack(struct rvt_qp *qp)
{
- struct hfi1_qp_priv *priv = qp->priv;
-
- priv->r_adefered = 0;
+ qp->r_adefered = 0;
if (list_empty(&qp->rspwait))
return;
list_del_init(&qp->rspwait);
@@ -1786,7 +1630,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
struct rvt_qp *qp, u32 opcode, u32 psn,
int diff, struct hfi1_ctxtdata *rcd)
{
- struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct rvt_ack_entry *e;
unsigned long flags;
u8 i, prev;
@@ -1961,25 +1805,6 @@ send_ack:
return 0;
}
-void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
-{
- unsigned long flags;
- int lastwqe;
-
- spin_lock_irqsave(&qp->s_lock, flags);
- lastwqe = rvt_error_qp(qp, err);
- spin_unlock_irqrestore(&qp->s_lock, flags);
-
- if (lastwqe) {
- struct ib_event ev;
-
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
-}
-
static inline void update_ack_queue(struct rvt_qp *qp, unsigned n)
{
unsigned next;
@@ -2095,7 +1920,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
- struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct ib_other_headers *ohdr = packet->ohdr;
u32 bth0, opcode;
u32 hdrsize = packet->hlen;
@@ -2106,8 +1931,9 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
int diff;
struct ib_reth *reth;
unsigned long flags;
- int ret, is_fecn = 0;
- int copy_last = 0;
+ int ret;
+ bool is_fecn = false;
+ bool copy_last = false;
u32 rkey;
lockdep_assert_held(&qp->r_lock);
@@ -2118,7 +1944,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
is_fecn = process_ecn(qp, packet, false);
psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ opcode = ib_bth_get_opcode(ohdr);
/*
* Process responses (ACKs) before anything else. Note that the
@@ -2180,7 +2006,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
}
if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
- qp_comm_est(qp);
+ rvt_comm_est(qp);
/* OK, process the packet. */
switch (opcode) {
@@ -2201,7 +2027,7 @@ send_middle:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto nack_inv;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
+ hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -2241,7 +2067,7 @@ send_last_inv:
wc.wc_flags = IB_WC_WITH_INVALIDATE;
goto send_last;
case OP(RDMA_WRITE_LAST):
- copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
+ copy_last = rvt_is_user_qp(qp);
/* fall through */
case OP(SEND_LAST):
no_immediate_data:
@@ -2249,7 +2075,7 @@ no_immediate_data:
wc.ex.imm_data = 0;
send_last:
/* Get the number of bytes the message was padded by. */
- pad = (bth0 >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -2259,7 +2085,7 @@ send_last:
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last);
+ hfi1_copy_sge(&qp->r_sge, data, tlen, true, copy_last);
rvt_put_ss(&qp->r_sge);
qp->r_msn++;
if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
@@ -2273,7 +2099,7 @@ send_last:
wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
/*
* It seems that IB mandates the presence of an SL in a
* work completion only for the UD transport (see section
@@ -2285,7 +2111,7 @@ send_last:
*
* See also OPA Vol. 1, section 9.7.6, and table 9-17.
*/
- wc.sl = qp->remote_ah_attr.sl;
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
@@ -2297,7 +2123,7 @@ send_last:
break;
case OP(RDMA_WRITE_ONLY):
- copy_last = 1;
+ copy_last = rvt_is_user_qp(qp);
/* fall through */
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
@@ -2333,8 +2159,11 @@ send_last:
ret = hfi1_rvt_get_rwqe(qp, 1);
if (ret < 0)
goto nack_op_err;
- if (!ret)
+ if (!ret) {
+ /* peer will send again */
+ rvt_put_ss(&qp->r_sge);
goto rnr_nak;
+ }
wc.ex.imm_data = ohdr->u.rc.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
goto send_last;
@@ -2485,13 +2314,11 @@ send_last:
qp->r_nak_state = 0;
/* Send an ACK if requested or required. */
if (psn & IB_BTH_REQ_ACK) {
- struct hfi1_qp_priv *priv = qp->priv;
-
if (packet->numpkt == 0) {
rc_cancel_ack(qp);
goto send_ack;
}
- if (priv->r_adefered >= HFI1_PSN_CREDIT) {
+ if (qp->r_adefered >= HFI1_PSN_CREDIT) {
rc_cancel_ack(qp);
goto send_ack;
}
@@ -2499,7 +2326,7 @@ send_last:
rc_cancel_ack(qp);
goto send_ack;
}
- priv->r_adefered++;
+ qp->r_adefered++;
rc_defered_ack(rcd, qp);
}
return;
@@ -2512,7 +2339,7 @@ rnr_nak:
return;
nack_op_err:
- hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
@@ -2522,7 +2349,7 @@ nack_op_err:
nack_inv_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_inv:
- hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
@@ -2532,7 +2359,7 @@ nack_inv:
nack_acc_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_acc:
- hfi1_rc_error(qp, IB_WC_LOC_PROT_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
send_ack:
@@ -2547,7 +2374,7 @@ void hfi1_rc_hdrerr(
{
int has_grh = rcv_flags & HFI1_HAS_GRH;
struct ib_other_headers *ohdr;
- struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
int diff;
u32 opcode;
u32 psn, bth0;
@@ -2562,7 +2389,7 @@ void hfi1_rc_hdrerr(
return;
psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ opcode = ib_bth_get_opcode(ohdr);
/* Only deal with RDMA Writes for now */
if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 717ed4b159d3..3a17daba28a9 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -54,44 +54,6 @@
#include "trace.h"
/*
- * Convert the AETH RNR timeout code into the number of microseconds.
- */
-const u32 ib_hfi1_rnr_table[32] = {
- 655360, /* 00: 655.36 */
- 10, /* 01: .01 */
- 20, /* 02 .02 */
- 30, /* 03: .03 */
- 40, /* 04: .04 */
- 60, /* 05: .06 */
- 80, /* 06: .08 */
- 120, /* 07: .12 */
- 160, /* 08: .16 */
- 240, /* 09: .24 */
- 320, /* 0A: .32 */
- 480, /* 0B: .48 */
- 640, /* 0C: .64 */
- 960, /* 0D: .96 */
- 1280, /* 0E: 1.28 */
- 1920, /* 0F: 1.92 */
- 2560, /* 10: 2.56 */
- 3840, /* 11: 3.84 */
- 5120, /* 12: 5.12 */
- 7680, /* 13: 7.68 */
- 10240, /* 14: 10.24 */
- 15360, /* 15: 15.36 */
- 20480, /* 16: 20.48 */
- 30720, /* 17: 30.72 */
- 40960, /* 18: 40.96 */
- 61440, /* 19: 61.44 */
- 81920, /* 1A: 81.92 */
- 122880, /* 1B: 122.88 */
- 163840, /* 1C: 163.84 */
- 245760, /* 1D: 245.76 */
- 327680, /* 1E: 327.68 */
- 491520 /* 1F: 491.52 */
-};
-
-/*
* Validate a RWQE and fill in the SGE state.
* Return 1 if OK.
*/
@@ -257,72 +219,84 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
{
__be64 guid;
unsigned long flags;
- u8 sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
if (!has_grh) {
- if (qp->alt_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
+ IB_AH_GRH)
goto err;
} else {
- if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
+ const struct ib_global_route *grh;
+
+ if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
+ IB_AH_GRH))
goto err;
- guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
+ grh = rdma_ah_read_grh(&qp->alt_ah_attr);
+ guid = get_sguid(ibp, grh->sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
guid))
goto err;
if (!gid_ok(
&hdr->u.l.grh.sgid,
- qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
- qp->alt_ah_attr.grh.dgid.global.interface_id))
+ grh->dgid.global.subnet_prefix,
+ grh->dgid.global.interface_id))
goto err;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
- sc5, be16_to_cpu(hdr->lrh[3])))) {
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
+ ib_get_slid(hdr)))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
(u16)bth0,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ ib_get_sl(hdr),
0, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
+ ib_get_slid(hdr),
+ ib_get_dlid(hdr));
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
- ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
+ if (ib_get_slid(hdr) !=
+ rdma_ah_get_dlid(&qp->alt_ah_attr) ||
+ ppd_from_ibp(ibp)->port !=
+ rdma_ah_get_port_num(&qp->alt_ah_attr))
goto err;
spin_lock_irqsave(&qp->s_lock, flags);
hfi1_migrate_qp(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else {
if (!has_grh) {
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH)
goto err;
} else {
- if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH))
+ const struct ib_global_route *grh;
+
+ if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH))
goto err;
- guid = get_sguid(ibp,
- qp->remote_ah_attr.grh.sgid_index);
+ grh = rdma_ah_read_grh(&qp->remote_ah_attr);
+ guid = get_sguid(ibp, grh->sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
guid))
goto err;
if (!gid_ok(
&hdr->u.l.grh.sgid,
- qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
- qp->remote_ah_attr.grh.dgid.global.interface_id))
+ grh->dgid.global.subnet_prefix,
+ grh->dgid.global.interface_id))
goto err;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
- sc5, be16_to_cpu(hdr->lrh[3])))) {
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
+ ib_get_slid(hdr)))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
(u16)bth0,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ ib_get_sl(hdr),
0, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
+ ib_get_slid(hdr),
+ ib_get_dlid(hdr));
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid ||
+ if (ib_get_slid(hdr) !=
+ rdma_ah_get_dlid(&qp->remote_ah_attr) ||
ppd_from_ibp(ibp)->port != qp->port_num)
goto err;
if (qp->s_mig_state == IB_MIG_REARM &&
@@ -358,10 +332,9 @@ static void ruc_loopback(struct rvt_qp *sqp)
u64 sdata;
atomic64_t *maddr;
enum ib_wc_status send_status;
- int release;
+ bool release;
int ret;
- int copy_last = 0;
- u32 to;
+ bool copy_last = false;
int local_ops = 0;
rcu_read_lock();
@@ -425,7 +398,7 @@ again:
memset(&wc, 0, sizeof(wc));
send_status = IB_WC_SUCCESS;
- release = 1;
+ release = true;
sqp->s_sge.sge = wqe->sg_list[0];
sqp->s_sge.sg_list = wqe->sg_list + 1;
sqp->s_sge.num_sge = wqe->wr.num_sge;
@@ -476,7 +449,7 @@ send:
/* skip copy_last set and qp_access_flags recheck */
goto do_write;
case IB_WR_RDMA_WRITE:
- copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
+ copy_last = rvt_is_user_qp(qp);
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto inv_err;
do_write:
@@ -500,7 +473,7 @@ do_write:
wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
- release = 0;
+ release = false;
sqp->s_sge.sg_list = NULL;
sqp->s_sge.num_sge = 1;
qp->r_sge.sge = wqe->sg_list[0];
@@ -581,8 +554,8 @@ do_write:
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
@@ -618,8 +591,8 @@ rnr_nak:
spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
goto clr_busy;
- to = ib_hfi1_rnr_table[qp->r_min_rnr_timer];
- hfi1_add_rnr_timer(sqp, to);
+ rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
+ IB_AETH_CREDIT_SHIFT);
goto clr_busy;
op_err:
@@ -637,7 +610,7 @@ acc_err:
wc.status = IB_WC_LOC_PROT_ERR;
err:
/* responder goes to error state */
- hfi1_rc_error(qp, wc.status);
+ rvt_rc_error(qp, wc.status);
serr:
spin_lock_irqsave(&sqp->s_lock, flags);
@@ -676,7 +649,7 @@ done:
* Return the size of the header in 32 bit words.
*/
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
- struct ib_global_route *grh, u32 hwords, u32 nwords)
+ const struct ib_global_route *grh, u32 hwords, u32 nwords)
{
hdr->version_tclass_flow =
cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
@@ -770,15 +743,17 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
extra_bytes = -ps->s_txreq->s_cur_size & 3;
nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2;
lrh0 = HFI1_LRH_BTH;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
- qp->s_hdrwords += hfi1_make_grh(ibp,
- &ps->s_txreq->phdr.hdr.u.l.grh,
- &qp->remote_ah_attr.grh,
- qp->s_hdrwords, nwords);
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
+ qp->s_hdrwords +=
+ hfi1_make_grh(ibp,
+ &ps->s_txreq->phdr.hdr.u.l.grh,
+ rdma_ah_read_grh(&qp->remote_ah_attr),
+ qp->s_hdrwords, nwords);
lrh0 = HFI1_LRH_GRH;
middle = 0;
}
- lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
+ lrh0 |= (priv->s_sc & 0xf) << 12 |
+ (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4;
/*
* reset s_ahg/AHG fields
*
@@ -802,11 +777,13 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
else
qp->s_flags &= ~RVT_S_AHG_VALID;
ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
- ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ ps->s_txreq->phdr.hdr.lrh[1] =
+ cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr));
ps->s_txreq->phdr.hdr.lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
- qp->remote_ah_attr.src_path_bits);
+ ps->s_txreq->phdr.hdr.lrh[3] =
+ cpu_to_be16(ppd_from_ibp(ibp)->lid |
+ rdma_ah_get_path_bits(&qp->remote_ah_attr));
bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
bth0 |= extra_bytes << 20;
ohdr->bth[0] = cpu_to_be32(bth0);
@@ -814,7 +791,7 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
if (qp->s_flags & RVT_S_ECN) {
qp->s_flags &= ~RVT_S_ECN;
/* we recently received a FECN, so return a BECN */
- bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT);
+ bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT);
}
ohdr->bth[1] = cpu_to_be32(bth1);
ohdr->bth[2] = cpu_to_be32(bth2);
@@ -823,59 +800,102 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
/* when sending, force a reschedule every one of these periods */
#define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
+/**
+ * schedule_send_yield - test for a yield required for QP send engine
+ * @timeout: Final time for timeout slice for jiffies
+ * @qp: a pointer to QP
+ * @ps: a pointer to a structure with commonly lookup values for
+ * the the send engine progress
+ *
+ * This routine checks if the time slice for the QP has expired
+ * for RC QPs, if so an additional work entry is queued. At this
+ * point, other QPs have an opportunity to be scheduled. It
+ * returns true if a yield is required, otherwise, false
+ * is returned.
+ */
+static bool schedule_send_yield(struct rvt_qp *qp,
+ struct hfi1_pkt_state *ps)
+{
+ if (unlikely(time_after(jiffies, ps->timeout))) {
+ if (!ps->in_thread ||
+ workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) {
+ spin_lock_irqsave(&qp->s_lock, ps->flags);
+ qp->s_flags &= ~RVT_S_BUSY;
+ hfi1_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, ps->flags);
+ this_cpu_inc(*ps->ppd->dd->send_schedule);
+ trace_hfi1_rc_expired_time_slice(qp, true);
+ return true;
+ }
+
+ cond_resched();
+ this_cpu_inc(*ps->ppd->dd->send_schedule);
+ ps->timeout = jiffies + ps->timeout_int;
+ }
+
+ trace_hfi1_rc_expired_time_slice(qp, false);
+ return false;
+}
+
+void hfi1_do_send_from_rvt(struct rvt_qp *qp)
+{
+ hfi1_do_send(qp, false);
+}
+
void _hfi1_do_send(struct work_struct *work)
{
struct iowait *wait = container_of(work, struct iowait, iowork);
struct rvt_qp *qp = iowait_to_qp(wait);
- hfi1_do_send(qp);
+ hfi1_do_send(qp, true);
}
/**
* hfi1_do_send - perform a send on a QP
* @work: contains a pointer to the QP
+ * @in_thread: true if in a workqueue thread
*
* Process entries in the send work queue until credit or queue is
* exhausted. Only allow one CPU to send a packet per QP.
* Otherwise, two threads could send packets out of order.
*/
-void hfi1_do_send(struct rvt_qp *qp)
+void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
{
struct hfi1_pkt_state ps;
struct hfi1_qp_priv *priv = qp->priv;
int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
- unsigned long timeout;
- unsigned long timeout_int;
- int cpu;
ps.dev = to_idev(qp->ibqp.device);
ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
ps.ppd = ppd_from_ibp(ps.ibp);
+ ps.in_thread = in_thread;
+
+ trace_hfi1_rc_do_send(qp, in_thread);
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
- if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
- ) - 1)) ==
- ps.ppd->lid)) {
+ if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
+ ~((1 << ps.ppd->lmc) - 1)) ==
+ ps.ppd->lid)) {
ruc_loopback(qp);
return;
}
make_req = hfi1_make_rc_req;
- timeout_int = (qp->timeout_jiffies);
+ ps.timeout_int = qp->timeout_jiffies;
break;
case IB_QPT_UC:
- if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
- ) - 1)) ==
- ps.ppd->lid)) {
+ if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
+ ~((1 << ps.ppd->lmc) - 1)) ==
+ ps.ppd->lid)) {
ruc_loopback(qp);
return;
}
make_req = hfi1_make_uc_req;
- timeout_int = SEND_RESCHED_TIMEOUT;
+ ps.timeout_int = SEND_RESCHED_TIMEOUT;
break;
default:
make_req = hfi1_make_ud_req;
- timeout_int = SEND_RESCHED_TIMEOUT;
+ ps.timeout_int = SEND_RESCHED_TIMEOUT;
}
spin_lock_irqsave(&qp->s_lock, ps.flags);
@@ -888,9 +908,11 @@ void hfi1_do_send(struct rvt_qp *qp)
qp->s_flags |= RVT_S_BUSY;
- timeout = jiffies + (timeout_int) / 8;
- cpu = priv->s_sde ? priv->s_sde->cpu :
+ ps.timeout_int = ps.timeout_int / 8;
+ ps.timeout = jiffies + ps.timeout_int;
+ ps.cpu = priv->s_sde ? priv->s_sde->cpu :
cpumask_first(cpumask_of_node(ps.ppd->dd->node));
+
/* insure a pre-built packet is handled */
ps.s_txreq = get_waiting_verbs_txreq(qp);
do {
@@ -906,28 +928,9 @@ void hfi1_do_send(struct rvt_qp *qp)
/* Record that s_ahg is empty. */
qp->s_hdrwords = 0;
/* allow other tasks to run */
- if (unlikely(time_after(jiffies, timeout))) {
- if (workqueue_congested(cpu,
- ps.ppd->hfi1_wq)) {
- spin_lock_irqsave(
- &qp->s_lock,
- ps.flags);
- qp->s_flags &= ~RVT_S_BUSY;
- hfi1_schedule_send(qp);
- spin_unlock_irqrestore(
- &qp->s_lock,
- ps.flags);
- this_cpu_inc(
- *ps.ppd->dd->send_schedule);
- return;
- }
- if (!irqs_disabled()) {
- cond_resched();
- this_cpu_inc(
- *ps.ppd->dd->send_schedule);
- }
- timeout = jiffies + (timeout_int) / 8;
- }
+ if (schedule_send_yield(qp, &ps))
+ return;
+
spin_lock_irqsave(&qp->s_lock, ps.flags);
}
} while (make_req(qp, &ps));
@@ -948,8 +951,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
last = qp->s_last;
old_last = last;
+ trace_hfi1_qp_send_completion(qp, wqe, last);
if (++last >= qp->s_size)
last = 0;
+ trace_hfi1_qp_send_completion(qp, wqe, last);
qp->s_last = last;
/* See post_send() */
barrier();
@@ -959,7 +964,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
- rvt_qp_swqe_complete(qp, wqe, status);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ status);
if (qp->s_acked == old_last)
qp->s_acked = last;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 1d81cac1fa6c..bfd0d5187e9b 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -856,7 +856,7 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
{
struct sdma_rht_node *rht_node;
struct sdma_engine *sde = NULL;
- const struct cpumask *current_mask = tsk_cpus_allowed(current);
+ const struct cpumask *current_mask = &current->cpus_allowed;
unsigned long cpu_id;
/*
@@ -868,7 +868,7 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
cpu_id = smp_processor_id();
rcu_read_lock();
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id,
sdma_rht_params);
if (rht_node && rht_node->map[vl]) {
@@ -962,7 +962,12 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
continue;
}
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ if (vl >= ARRAY_SIZE(rht_node->map)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (!rht_node) {
rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
@@ -982,7 +987,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
rht_node->map[vl]->ctr = 1;
rht_node->map[vl]->sde[0] = sde;
- ret = rhashtable_insert_fast(&dd->sdma_rht,
+ ret = rhashtable_insert_fast(dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
if (ret) {
@@ -1025,7 +1030,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
if (cpumask_test_cpu(cpu, mask))
continue;
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (rht_node) {
bool empty = true;
@@ -1049,7 +1054,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
}
if (empty) {
- ret = rhashtable_remove_fast(&dd->sdma_rht,
+ ret = rhashtable_remove_fast(dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
WARN_ON(ret);
@@ -1108,7 +1113,7 @@ void sdma_seqfile_dump_cpu_list(struct seq_file *s,
struct sdma_rht_node *rht_node;
int i, j;
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid,
sdma_rht_params);
if (!rht_node)
return;
@@ -1322,6 +1327,12 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
synchronize_rcu();
kfree(dd->per_sdma);
dd->per_sdma = NULL;
+
+ if (dd->sdma_rht) {
+ rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL);
+ kfree(dd->sdma_rht);
+ dd->sdma_rht = NULL;
+ }
}
/**
@@ -1341,12 +1352,14 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
{
unsigned this_idx;
struct sdma_engine *sde;
+ struct rhashtable *tmp_sdma_rht;
u16 descq_cnt;
void *curr_head;
struct hfi1_pportdata *ppd = dd->pport + port;
u32 per_sdma_credits;
uint idle_cnt = sdma_idle_cnt;
size_t num_engines = dd->chip_sdma_engines;
+ int ret = -ENOMEM;
if (!HFI1_CAP_IS_KSET(SDMA)) {
HFI1_CAP_CLEAR(SDMA_AHG);
@@ -1378,7 +1391,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
/* alloc memory for array of send engines */
dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL);
if (!dd->per_sdma)
- return -ENOMEM;
+ return ret;
idle_cnt = ns_to_cclock(dd, idle_cnt);
if (!sdma_desct_intr)
@@ -1507,18 +1520,27 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
dd->flags |= HFI1_HAS_SEND_DMA;
dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0;
dd->num_sdma = num_engines;
- if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
+ ret = sdma_map_init(dd, port, ppd->vls_operational, NULL);
+ if (ret < 0)
goto bail;
- if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
+ tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL);
+ if (!tmp_sdma_rht) {
+ ret = -ENOMEM;
goto bail;
+ }
+
+ ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
+ if (ret < 0)
+ goto bail;
+ dd->sdma_rht = tmp_sdma_rht;
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
return 0;
bail:
sdma_clean(dd, num_engines);
- return -ENOMEM;
+ return ret;
}
/**
@@ -1604,7 +1626,6 @@ void sdma_exit(struct hfi1_devdata *dd)
sdma_finalput(&sde->state);
}
sdma_clean(dd, dd->num_sdma);
- rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
}
/*
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 21f1e2834f37..64f10b8b5db8 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -966,34 +966,34 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status);
* | mask | --/ |--------------------|
* |--------------------------| -/ | * |
* | actual_vls (max 8) | -/ |--------------------|
- * |--------------------------| --/ | sde[n] -> eng n |
+ * |--------------------------| --/ | sde[n-1] -> eng n |
* | vls (max 8) | -/ +--------------------+
* |--------------------------| --/
* | map[0] |-/
- * |--------------------------| +--------------------+
- * | map[1] |--- | mask |
- * |--------------------------| \---- |--------------------|
- * | * | \-- | sde[0] -> eng 1+n |
- * | * | \---- |--------------------|
- * | * | \->| sde[1] -> eng 2+n |
- * |--------------------------| |--------------------|
- * | map[vls - 1] |- | * |
- * +--------------------------+ \- |--------------------|
- * \- | sde[m] -> eng m+n |
- * \ +--------------------+
+ * |--------------------------| +---------------------+
+ * | map[1] |--- | mask |
+ * |--------------------------| \---- |---------------------|
+ * | * | \-- | sde[0] -> eng 1+n |
+ * | * | \---- |---------------------|
+ * | * | \->| sde[1] -> eng 2+n |
+ * |--------------------------| |---------------------|
+ * | map[vls - 1] |- | * |
+ * +--------------------------+ \- |---------------------|
+ * \- | sde[m-1] -> eng m+n |
+ * \ +---------------------+
* \-
* \
- * \- +--------------------+
- * \- | mask |
- * \ |--------------------|
- * \- | sde[0] -> eng 1+m+n|
- * \- |--------------------|
- * >| sde[1] -> eng 2+m+n|
- * |--------------------|
- * | * |
- * |--------------------|
- * | sde[o] -> eng o+m+n|
- * +--------------------+
+ * \- +----------------------+
+ * \- | mask |
+ * \ |----------------------|
+ * \- | sde[0] -> eng 1+m+n |
+ * \- |----------------------|
+ * >| sde[1] -> eng 2+m+n |
+ * |----------------------|
+ * | * |
+ * |----------------------|
+ * | sde[o-1] -> eng o+m+n|
+ * +----------------------+
*
*/
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 919a5474e651..2f3bbcac1e34 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -196,7 +196,8 @@ static const struct sysfs_ops port_cc_sysfs_ops = {
};
static struct attribute *port_cc_default_attributes[] = {
- &cc_prescan_attr.attr
+ &cc_prescan_attr.attr,
+ NULL
};
static struct kobj_type port_cc_ktype = {
@@ -542,7 +543,7 @@ static ssize_t show_nctxts(struct device *device,
* give a more accurate picture of total contexts available.
*/
return scnprintf(buf, PAGE_SIZE, "%u\n",
- min(dd->num_rcv_contexts - dd->first_user_ctxt,
+ min(dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt,
(u32)dd->sc_sizes[SC_USER].count));
}
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 01f525cd985a..eafae487face 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -51,13 +51,12 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr)
{
struct ib_other_headers *ohdr;
u8 opcode;
- u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
- if (lnh == HFI1_LRH_BTH)
+ if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
ohdr = &hdr->u.oth;
else
ohdr = &hdr->u.l.oth;
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ opcode = ib_bth_get_opcode(ohdr);
return hdr_len_by_opcode[opcode] == 0 ?
0 : hdr_len_by_opcode[opcode] - (12 + 8);
}
@@ -130,14 +129,14 @@ const char *parse_everbs_hdrs(
case OP(RC, ACKNOWLEDGE):
trace_seq_printf(p, AETH_PRN, be32_to_cpu(eh->aeth) >> 24,
parse_syndrome(be32_to_cpu(eh->aeth) >> 24),
- be32_to_cpu(eh->aeth) & HFI1_MSN_MASK);
+ be32_to_cpu(eh->aeth) & IB_MSN_MASK);
break;
/* aeth + atomicacketh */
case OP(RC, ATOMIC_ACKNOWLEDGE):
trace_seq_printf(p, AETH_PRN " " ATOMICACKETH_PRN,
be32_to_cpu(eh->at.aeth) >> 24,
parse_syndrome(be32_to_cpu(eh->at.aeth) >> 24),
- be32_to_cpu(eh->at.aeth) & HFI1_MSN_MASK,
+ be32_to_cpu(eh->at.aeth) & IB_MSN_MASK,
ib_u64_get(&eh->at.atomic_ack_eth));
break;
/* atomiceth */
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index 26ae789e47cf..4eb4cc798035 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -57,12 +57,14 @@
#define UCTXT_FMT \
"cred:%u, credaddr:0x%llx, piobase:0x%p, rcvhdr_cnt:%u, " \
- "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx"
+ "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx, subctxt_cnt:%u"
TRACE_EVENT(hfi1_uctxtdata,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt),
- TP_ARGS(dd, uctxt),
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt,
+ unsigned int subctxt),
+ TP_ARGS(dd, uctxt, subctxt),
TP_STRUCT__entry(DD_DEV_ENTRY(dd)
__field(unsigned int, ctxt)
+ __field(unsigned int, subctxt)
__field(u32, credits)
__field(u64, hw_free)
__field(void __iomem *, piobase)
@@ -70,9 +72,11 @@ TRACE_EVENT(hfi1_uctxtdata,
__field(u64, rcvhdrq_dma)
__field(u32, eager_cnt)
__field(u64, rcvegr_dma)
+ __field(unsigned int, subctxt_cnt)
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = uctxt->ctxt;
+ __entry->subctxt = subctxt;
__entry->credits = uctxt->sc->credits;
__entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
__entry->piobase = uctxt->sc->base_addr;
@@ -80,17 +84,20 @@ TRACE_EVENT(hfi1_uctxtdata,
__entry->rcvhdrq_dma = uctxt->rcvhdrq_dma;
__entry->eager_cnt = uctxt->egrbufs.alloced;
__entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma;
+ __entry->subctxt_cnt = uctxt->subctxt_cnt;
),
- TP_printk("[%s] ctxt %u " UCTXT_FMT,
+ TP_printk("[%s] ctxt %u:%u " UCTXT_FMT,
__get_str(dev),
__entry->ctxt,
+ __entry->subctxt,
__entry->credits,
__entry->hw_free,
__entry->piobase,
__entry->rcvhdrq_cnt,
__entry->rcvhdrq_dma,
__entry->eager_cnt,
- __entry->rcvegr_dma
+ __entry->rcvegr_dma,
+ __entry->subctxt_cnt
)
);
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index 382fcda3a5f6..090f6b506953 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -139,11 +139,11 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
__entry->pkey =
be32_to_cpu(ohdr->bth[0]) & 0xffff;
__entry->f =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) &
- HFI1_FECN_MASK;
+ (be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) &
+ IB_FECN_MASK;
__entry->b =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) &
- HFI1_BECN_MASK;
+ (be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) &
+ IB_BECN_MASK;
__entry->qpn =
be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
__entry->a =
diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h
index d308454af7fd..deac77ddaeab 100644
--- a/drivers/infiniband/hw/hfi1/trace_misc.h
+++ b/drivers/infiniband/hw/hfi1/trace_misc.h
@@ -72,6 +72,54 @@ TRACE_EVENT(hfi1_interrupt,
__entry->src)
);
+#ifdef CONFIG_FAULT_INJECTION
+TRACE_EVENT(hfi1_fault_opcode,
+ TP_PROTO(struct rvt_qp *qp, u8 opcode),
+ TP_ARGS(qp, opcode),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u8, opcode)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->opcode = opcode;
+ ),
+ TP_printk("[%s] qpn 0x%x opcode 0x%x",
+ __get_str(dev), __entry->qpn, __entry->opcode)
+);
+
+TRACE_EVENT(hfi1_fault_packet,
+ TP_PROTO(struct hfi1_packet *packet),
+ TP_ARGS(packet),
+ TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->ppd->dd)
+ __field(u64, eflags)
+ __field(u32, ctxt)
+ __field(u32, hlen)
+ __field(u32, tlen)
+ __field(u32, updegr)
+ __field(u32, etail)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->ppd->dd);
+ __entry->eflags = rhf_err_flags(packet->rhf);
+ __entry->ctxt = packet->rcd->ctxt;
+ __entry->hlen = packet->hlen;
+ __entry->tlen = packet->tlen;
+ __entry->updegr = packet->updegr;
+ __entry->etail = rhf_egr_index(packet->rhf);
+ ),
+ TP_printk(
+ "[%s] ctxt %d eflags 0x%llx hlen %d tlen %d updegr %d etail %d",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->eflags,
+ __entry->hlen,
+ __entry->tlen,
+ __entry->updegr,
+ __entry->etail
+ )
+);
+#endif
+
#endif /* __HFI1_TRACE_MISC_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/trace_rc.h b/drivers/infiniband/hw/hfi1/trace_rc.h
index 5ea5005f9f41..8ce476570462 100644
--- a/drivers/infiniband/hw/hfi1/trace_rc.h
+++ b/drivers/infiniband/hw/hfi1/trace_rc.h
@@ -1,5 +1,5 @@
/*
-* Copyright(c) 2015, 2016 Intel Corporation.
+* Copyright(c) 2015, 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -104,11 +104,6 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_ack,
TP_ARGS(qp, psn)
);
-DEFINE_EVENT(hfi1_rc_template, hfi1_timeout,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn)
-);
-
DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
index 415d6be42c5d..c59809a7f121 100644
--- a/drivers/infiniband/hw/hfi1/trace_tx.h
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h
@@ -633,6 +633,83 @@ DEFINE_EVENT(hfi1_bct_template, bct_get,
TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
TP_ARGS(dd, bc));
+TRACE_EVENT(
+ hfi1_qp_send_completion,
+ TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 idx),
+ TP_ARGS(qp, wqe, idx),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(struct rvt_swqe *, wqe)
+ __field(u64, wr_id)
+ __field(u32, qpn)
+ __field(u32, qpt)
+ __field(u32, length)
+ __field(u32, idx)
+ __field(u32, ssn)
+ __field(enum ib_wr_opcode, opcode)
+ __field(int, send_flags)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->wqe = wqe;
+ __entry->wr_id = wqe->wr.wr_id;
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->qpt = qp->ibqp.qp_type;
+ __entry->length = wqe->length;
+ __entry->idx = idx;
+ __entry->ssn = wqe->ssn;
+ __entry->opcode = wqe->wr.opcode;
+ __entry->send_flags = wqe->wr.send_flags;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x qpt %u wqe %p idx %u wr_id %llx length %u ssn %u opcode %x send_flags %x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->qpt,
+ __entry->wqe,
+ __entry->idx,
+ __entry->wr_id,
+ __entry->length,
+ __entry->ssn,
+ __entry->opcode,
+ __entry->send_flags
+ )
+);
+
+DECLARE_EVENT_CLASS(
+ hfi1_do_send_template,
+ TP_PROTO(struct rvt_qp *qp, bool flag),
+ TP_ARGS(qp, flag),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(bool, flag)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->flag = flag;
+ ),
+ TP_printk(
+ "[%s] qpn %x flag %d",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->flag
+ )
+);
+
+DEFINE_EVENT(
+ hfi1_do_send_template, hfi1_rc_do_send,
+ TP_PROTO(struct rvt_qp *qp, bool flag),
+ TP_ARGS(qp, flag)
+);
+
+DEFINE_EVENT(
+ hfi1_do_send_template, hfi1_rc_expired_time_slice,
+ TP_PROTO(struct rvt_qp *qp, bool flag),
+ TP_ARGS(qp, flag)
+);
+
#endif /* __HFI1_TRACE_TX_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index b141a78ae38b..5da1e4546543 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -94,7 +94,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
ohdr = &ps->s_txreq->phdr.hdr.u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
/* Get the next send request. */
@@ -296,7 +296,7 @@ bail_no_tx:
*/
void hfi1_uc_rcv(struct hfi1_packet *packet)
{
- struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
@@ -320,7 +320,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
process_ecn(qp, packet, true);
psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ opcode = ib_bth_get_opcode(ohdr);
/* Compare the PSN verses the expected PSN. */
if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) {
@@ -384,7 +384,7 @@ inv:
}
if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
- qp_comm_est(qp);
+ rvt_comm_est(qp);
/* OK, process the packet. */
switch (opcode) {
@@ -419,7 +419,7 @@ send_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto rewind;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, 0, 0);
+ hfi1_copy_sge(&qp->r_sge, data, pmtu, false, false);
break;
case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -433,7 +433,7 @@ no_immediate_data:
wc.wc_flags = 0;
send_last:
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -444,14 +444,14 @@ send_last:
if (unlikely(wc.byte_len > qp->r_len))
goto rewind;
wc.opcode = IB_WC_RECV;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 0, 0);
+ hfi1_copy_sge(&qp->r_sge, data, tlen, false, false);
rvt_put_ss(&qp->s_rdma_read_sge);
last_imm:
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
/*
* It seems that IB mandates the presence of an SL in a
* work completion only for the UD transport (see section
@@ -463,7 +463,7 @@ last_imm:
*
* See also OPA Vol. 1, section 9.7.6, and table 9-17.
*/
- wc.sl = qp->remote_ah_attr.sl;
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
@@ -519,7 +519,7 @@ rdma_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto drop;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
+ hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -528,7 +528,7 @@ rdma_last_imm:
wc.wc_flags = IB_WC_WITH_IMM;
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -548,14 +548,14 @@ rdma_last_imm:
}
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
+ hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
goto last_imm;
case OP(RDMA_WRITE_LAST):
rdma_last:
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -564,7 +564,7 @@ rdma_last:
tlen -= (hdrsize + pad + 4);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
+ hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
break;
@@ -584,5 +584,5 @@ drop:
return;
op_err:
- hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
}
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index c071955c0272..6a4e95cefae5 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -68,7 +68,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
struct hfi1_pportdata *ppd;
struct rvt_qp *qp;
- struct ib_ah_attr *ah_attr;
+ struct rdma_ah_attr *ah_attr;
unsigned long flags;
struct rvt_sge_state ssge;
struct rvt_sge *sge;
@@ -103,17 +103,17 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (qp->ibqp.qp_num > 1) {
u16 pkey;
u16 slid;
- u8 sc5 = ibp->sl_to_sc[ah_attr->sl];
+ u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
pkey = hfi1_get_pkey(ibp, sqp->s_pkey_index);
- slid = ppd->lid | (ah_attr->src_path_bits &
+ slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
if (unlikely(ingress_pkey_check(ppd, pkey, sc5,
qp->s_pkey_index, slid))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, pkey,
- ah_attr->sl,
+ rdma_ah_get_sl(ah_attr),
sqp->ibqp.qp_num, qp->ibqp.qp_num,
- slid, ah_attr->dlid);
+ slid, rdma_ah_get_dlid(ah_attr));
goto drop;
}
}
@@ -131,13 +131,13 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (unlikely(qkey != qp->qkey)) {
u16 lid;
- lid = ppd->lid | (ah_attr->src_path_bits &
+ lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey,
- ah_attr->sl,
+ rdma_ah_get_sl(ah_attr),
sqp->ibqp.qp_num, qp->ibqp.qp_num,
lid,
- ah_attr->dlid);
+ rdma_ah_get_dlid(ah_attr));
goto drop;
}
}
@@ -167,7 +167,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
ret = hfi1_rvt_get_rwqe(qp, 0);
if (ret < 0) {
- hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
goto bail_unlock;
}
if (!ret) {
@@ -183,16 +183,16 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
goto bail_unlock;
}
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
struct ib_grh grh;
- struct ib_global_route grd = ah_attr->grh;
+ const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr);
- hfi1_make_grh(ibp, &grh, &grd, 0, 0);
+ hfi1_make_grh(ibp, &grh, grd, 0, 0);
hfi1_copy_sge(&qp->r_sge, &grh,
- sizeof(grh), 1, 0);
+ sizeof(grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else {
- hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+ rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
}
ssge.sg_list = swqe->sg_list + 1;
ssge.sge = *swqe->sg_list;
@@ -206,7 +206,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (len > sge->sge_length)
len = sge->sge_length;
WARN_ON_ONCE(len == 0);
- hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1, 0);
+ hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, true, false);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
@@ -243,12 +243,13 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
} else {
wc.pkey_index = 0;
}
- wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
+ wc.slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1));
/* Check for loopback when the port lid is not set */
if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI)
wc.slid = be16_to_cpu(IB_LID_PERMISSIVE);
- wc.sl = ah_attr->sl;
- wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
+ wc.sl = rdma_ah_get_sl(ah_attr);
+ wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
@@ -272,7 +273,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
struct ib_other_headers *ohdr;
- struct ib_ah_attr *ah_attr;
+ struct rdma_ah_attr *ah_attr;
struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp;
struct rvt_swqe *wqe;
@@ -319,9 +320,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
- if (ah_attr->dlid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
- ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
- lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
+ if (rdma_ah_get_dlid(ah_attr) < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) {
+ lid = rdma_ah_get_dlid(ah_attr) & ~((1 << ppd->lmc) - 1);
if (unlikely(!loopback &&
(lid == ppd->lid ||
(lid == be16_to_cpu(IB_LID_PERMISSIVE) &&
@@ -356,7 +357,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_hdrwords = 7;
ps->s_txreq->s_cur_size = wqe->length;
ps->s_txreq->ss = &qp->s_sge;
- qp->s_srate = ah_attr->static_rate;
+ qp->s_srate = rdma_ah_get_static_rate(ah_attr);
qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
qp->s_wqe = wqe;
qp->s_sge.sge = wqe->sg_list[0];
@@ -364,11 +365,11 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length;
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
/* Header size in 32-bit words. */
qp->s_hdrwords += hfi1_make_grh(ibp,
&ps->s_txreq->phdr.hdr.u.l.grh,
- &ah_attr->grh,
+ rdma_ah_read_grh(ah_attr),
qp->s_hdrwords, nwords);
lrh0 = HFI1_LRH_GRH;
ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
@@ -388,8 +389,8 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
} else {
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
}
- sc5 = ibp->sl_to_sc[ah_attr->sl];
- lrh0 |= (ah_attr->sl & 0xf) << 4;
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
+ lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4;
if (qp->ibqp.qp_type == IB_QPT_SMI) {
lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
priv->s_sc = 0xf;
@@ -402,15 +403,17 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
ps->s_txreq->psc = priv->s_sendcontext;
ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
- ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);
+ ps->s_txreq->phdr.hdr.lrh[1] =
+ cpu_to_be16(rdma_ah_get_dlid(ah_attr));
ps->s_txreq->phdr.hdr.lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
+ if (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) {
ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
} else {
lid = ppd->lid;
if (lid) {
- lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
+ lid |= rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1);
ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(lid);
} else {
ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
@@ -537,7 +540,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
bth0 = pkey | (IB_OPCODE_CNP << 24);
ohdr->bth[0] = cpu_to_be32(bth0);
- ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << HFI1_BECN_SHIFT));
+ ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT));
ohdr->bth[2] = 0; /* PSN 0 */
hdr.lrh[0] = cpu_to_be16(lrh0);
@@ -672,7 +675,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
u32 src_qp;
u16 dlid, pkey;
int mgmt_pkey_idx = -1;
- struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
@@ -680,7 +683,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
bool has_grh = rcv_flags & HFI1_HAS_GRH;
- u8 sc5 = hdr2sc(hdr, packet->rhf);
+ u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
u32 bth1;
u8 sl_from_sc, sl;
u16 slid;
@@ -688,18 +691,16 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
- dlid = be16_to_cpu(hdr->lrh[1]);
+ dlid = ib_get_dlid(hdr);
bth1 = be32_to_cpu(ohdr->bth[1]);
- slid = be16_to_cpu(hdr->lrh[3]);
- pkey = (u16)be32_to_cpu(ohdr->bth[0]);
- sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
- extra_bytes = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ slid = ib_get_slid(hdr);
+ pkey = ib_bth_get_pkey(ohdr);
+ opcode = ib_bth_get_opcode(ohdr);
+ sl = ib_get_sl(hdr);
+ extra_bytes = ib_bth_get_pad(ohdr);
extra_bytes += (SIZE_OF_CRC << 2);
sl_from_sc = ibp->sc_to_sl[sc5];
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
- opcode &= 0xff;
-
process_ecn(qp, packet, (opcode != IB_OPCODE_CNP));
/*
* Get the number of bytes the message was padded by
@@ -796,7 +797,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
ret = hfi1_rvt_get_rwqe(qp, 0);
if (ret < 0) {
- hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
return;
}
if (!ret) {
@@ -812,13 +813,13 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
}
if (has_grh) {
hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh,
- sizeof(struct ib_grh), 1, 0);
+ sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else {
- hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+ rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
}
hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
- 1, 0);
+ true, false);
rvt_put_ss(&qp->r_sge);
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
return;
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 64d26525435a..a8f0aa4722f6 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -45,6 +45,7 @@
*
*/
#include <asm/page.h>
+#include <linux/string.h>
#include "user_exp_rcv.h"
#include "trace.h"
@@ -52,7 +53,7 @@
struct tid_group {
struct list_head list;
- unsigned base;
+ u32 base;
u8 size;
u8 used;
u8 map;
@@ -81,20 +82,25 @@ struct tid_pageset {
(unsigned long)(len) - 1) & PAGE_MASK) - \
((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
-static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *,
- struct hfi1_filedata *);
-static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
-static int set_rcvarray_entry(struct file *, unsigned long, u32,
- struct tid_group *, struct page **, unsigned);
-static int tid_rb_insert(void *, struct mmu_rb_node *);
+static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
+ struct exp_tid_set *set,
+ struct hfi1_filedata *fd);
+static u32 find_phys_blocks(struct page **pages, unsigned npages,
+ struct tid_pageset *list);
+static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr,
+ u32 rcventry, struct tid_group *grp,
+ struct page **pages, unsigned npages);
+static int tid_rb_insert(void *arg, struct mmu_rb_node *node);
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
struct tid_rb_node *tnode);
-static void tid_rb_remove(void *, struct mmu_rb_node *);
-static int tid_rb_invalidate(void *, struct mmu_rb_node *);
-static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
- struct tid_pageset *, unsigned, u16, struct page **,
- u32 *, unsigned *, unsigned *);
-static int unprogram_rcvarray(struct file *, u32, struct tid_group **);
+static void tid_rb_remove(void *arg, struct mmu_rb_node *node);
+static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
+static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr,
+ struct tid_group *grp, struct tid_pageset *sets,
+ unsigned start, u16 count, struct page **pages,
+ u32 *tidlist, unsigned *tididx, unsigned *pmapped);
+static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
+ struct tid_group **grp);
static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
static struct mmu_rb_ops tid_rb_ops = {
@@ -148,62 +154,72 @@ static inline void tid_group_move(struct tid_group *group,
tid_group_add_tail(group, s2);
}
+int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd)
+{
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ struct hfi1_devdata *dd = fd->dd;
+ u32 tidbase;
+ u32 i;
+ struct tid_group *grp, *gptr;
+
+ exp_tid_group_init(&uctxt->tid_group_list);
+ exp_tid_group_init(&uctxt->tid_used_list);
+ exp_tid_group_init(&uctxt->tid_full_list);
+
+ tidbase = uctxt->expected_base;
+ for (i = 0; i < uctxt->expected_count /
+ dd->rcv_entries.group_size; i++) {
+ grp = kzalloc(sizeof(*grp), GFP_KERNEL);
+ if (!grp)
+ goto grp_failed;
+
+ grp->size = dd->rcv_entries.group_size;
+ grp->base = tidbase;
+ tid_group_add_tail(grp, &uctxt->tid_group_list);
+ tidbase += dd->rcv_entries.group_size;
+ }
+
+ return 0;
+
+grp_failed:
+ list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
+ list) {
+ list_del_init(&grp->list);
+ kfree(grp);
+ }
+
+ return -ENOMEM;
+}
+
/*
* Initialize context and file private data needed for Expected
* receive caching. This needs to be done after the context has
* been configured with the eager/expected RcvEntry counts.
*/
-int hfi1_user_exp_rcv_init(struct file *fp)
+int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
- unsigned tidbase;
- int i, ret = 0;
+ int ret = 0;
spin_lock_init(&fd->tid_lock);
spin_lock_init(&fd->invalid_lock);
- if (!uctxt->subctxt_cnt || !fd->subctxt) {
- exp_tid_group_init(&uctxt->tid_group_list);
- exp_tid_group_init(&uctxt->tid_used_list);
- exp_tid_group_init(&uctxt->tid_full_list);
-
- tidbase = uctxt->expected_base;
- for (i = 0; i < uctxt->expected_count /
- dd->rcv_entries.group_size; i++) {
- struct tid_group *grp;
-
- grp = kzalloc(sizeof(*grp), GFP_KERNEL);
- if (!grp) {
- /*
- * If we fail here, the groups already
- * allocated will be freed by the close
- * call.
- */
- ret = -ENOMEM;
- goto done;
- }
- grp->size = dd->rcv_entries.group_size;
- grp->base = tidbase;
- tid_group_add_tail(grp, &uctxt->tid_group_list);
- tidbase += dd->rcv_entries.group_size;
- }
- }
-
fd->entry_to_rb = kcalloc(uctxt->expected_count,
- sizeof(struct rb_node *),
- GFP_KERNEL);
+ sizeof(struct rb_node *),
+ GFP_KERNEL);
if (!fd->entry_to_rb)
return -ENOMEM;
if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) {
fd->invalid_tid_idx = 0;
- fd->invalid_tids = kzalloc(uctxt->expected_count *
- sizeof(u32), GFP_KERNEL);
+ fd->invalid_tids = kcalloc(uctxt->expected_count,
+ sizeof(*fd->invalid_tids),
+ GFP_KERNEL);
if (!fd->invalid_tids) {
- ret = -ENOMEM;
- goto done;
+ kfree(fd->entry_to_rb);
+ fd->entry_to_rb = NULL;
+ return -ENOMEM;
}
/*
@@ -246,41 +262,44 @@ int hfi1_user_exp_rcv_init(struct file *fp)
fd->tid_limit = uctxt->expected_count;
}
spin_unlock(&fd->tid_lock);
-done:
+
return ret;
}
-int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
+void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt)
{
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct tid_group *grp, *gptr;
- if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
- return 0;
+ list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
+ list) {
+ list_del_init(&grp->list);
+ kfree(grp);
+ }
+ hfi1_clear_tids(uctxt);
+}
+
+void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
+{
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+
/*
* The notifier would have been removed when the process'es mm
* was freed.
*/
- if (fd->handler)
+ if (fd->handler) {
hfi1_mmu_rb_unregister(fd->handler);
-
- kfree(fd->invalid_tids);
-
- if (!uctxt->cnt) {
+ } else {
if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
- list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
- list) {
- list_del_init(&grp->list);
- kfree(grp);
- }
- hfi1_clear_tids(uctxt);
}
+ kfree(fd->invalid_tids);
+ fd->invalid_tids = NULL;
+
kfree(fd->entry_to_rb);
- return 0;
+ fd->entry_to_rb = NULL;
}
/*
@@ -349,10 +368,10 @@ static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
* can fit into the group. If the group becomes fully
* used, move it to tid_full_list.
*/
-int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
+int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo)
{
int ret = 0, need_group = 0, pinned;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets,
@@ -449,7 +468,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
struct tid_group *grp =
tid_group_pop(&uctxt->tid_group_list);
- ret = program_rcvarray(fp, vaddr, grp, pagesets,
+ ret = program_rcvarray(fd, vaddr, grp, pagesets,
pageidx, dd->rcv_entries.group_size,
pages, tidlist, &tididx, &mapped);
/*
@@ -495,7 +514,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
unsigned use = min_t(unsigned, pageset_count - pageidx,
grp->size - grp->used);
- ret = program_rcvarray(fp, vaddr, grp, pagesets,
+ ret = program_rcvarray(fd, vaddr, grp, pagesets,
pageidx, use, pages, tidlist,
&tididx, &mapped);
if (ret < 0) {
@@ -545,7 +564,7 @@ nomem:
* everything done so far so we don't leak resources.
*/
tinfo->tidlist = (unsigned long)&tidlist;
- hfi1_user_exp_rcv_clear(fp, tinfo);
+ hfi1_user_exp_rcv_clear(fd, tinfo);
tinfo->tidlist = 0;
ret = -EFAULT;
goto bail;
@@ -569,28 +588,25 @@ bail:
return ret > 0 ? 0 : ret;
}
-int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
+int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo)
{
int ret = 0;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
u32 *tidinfo;
unsigned tididx;
- tidinfo = kcalloc(tinfo->tidcnt, sizeof(*tidinfo), GFP_KERNEL);
- if (!tidinfo)
- return -ENOMEM;
+ if (unlikely(tinfo->tidcnt > fd->tid_used))
+ return -EINVAL;
- if (copy_from_user(tidinfo, (void __user *)(unsigned long)
- tinfo->tidlist, sizeof(tidinfo[0]) *
- tinfo->tidcnt)) {
- ret = -EFAULT;
- goto done;
- }
+ tidinfo = memdup_user((void __user *)(unsigned long)tinfo->tidlist,
+ sizeof(tidinfo[0]) * tinfo->tidcnt);
+ if (IS_ERR(tidinfo))
+ return PTR_ERR(tidinfo);
mutex_lock(&uctxt->exp_lock);
for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
- ret = unprogram_rcvarray(fp, tidinfo[tididx], NULL);
+ ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL);
if (ret) {
hfi1_cdbg(TID, "Failed to unprogram rcv array %d",
ret);
@@ -602,17 +618,17 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
spin_unlock(&fd->tid_lock);
tinfo->tidcnt = tididx;
mutex_unlock(&uctxt->exp_lock);
-done:
+
kfree(tidinfo);
return ret;
}
-int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo)
+int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
unsigned long *ev = uctxt->dd->events +
- (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
+ (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fd->subctxt);
u32 *array;
int ret = 0;
@@ -724,7 +740,7 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages,
/**
* program_rcvarray() - program an RcvArray group with receive buffers
- * @fp: file pointer
+ * @fd: filedata pointer
* @vaddr: starting user virtual address
* @grp: RcvArray group
* @sets: array of struct tid_pageset holding information on physically
@@ -749,13 +765,12 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages,
* -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or
* number of RcvArray entries programmed.
*/
-static int program_rcvarray(struct file *fp, unsigned long vaddr,
+static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr,
struct tid_group *grp,
struct tid_pageset *sets,
unsigned start, u16 count, struct page **pages,
u32 *tidlist, unsigned *tididx, unsigned *pmapped)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
u16 idx;
@@ -796,7 +811,7 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr,
npages = sets[setidx].count;
pageidx = sets[setidx].idx;
- ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE),
+ ret = set_rcvarray_entry(fd, vaddr + (pageidx * PAGE_SIZE),
rcventry, grp, pages + pageidx,
npages);
if (ret)
@@ -818,12 +833,11 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr,
return idx;
}
-static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
+static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr,
u32 rcventry, struct tid_group *grp,
struct page **pages, unsigned npages)
{
int ret;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct tid_rb_node *node;
struct hfi1_devdata *dd = uctxt->dd;
@@ -877,10 +891,9 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
return 0;
}
-static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
+static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
struct tid_group **grp)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
struct tid_rb_node *node;
@@ -1016,8 +1029,8 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
* process in question.
*/
ev = uctxt->dd->events +
- (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
+ (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
+ HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
}
fdata->invalid_tid_idx++;
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 9bc8d9fba87e..5250c897298d 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_USER_EXP_RCV_H
#define _HFI1_USER_EXP_RCV_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -70,10 +70,15 @@
(tid) |= EXP_TID_SET(field, (value)); \
} while (0)
-int hfi1_user_exp_rcv_init(struct file *);
-int hfi1_user_exp_rcv_free(struct hfi1_filedata *);
-int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *);
-int hfi1_user_exp_rcv_clear(struct file *, struct hfi1_tid_info *);
-int hfi1_user_exp_rcv_invalid(struct file *, struct hfi1_tid_info *);
+void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt);
+int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd);
+int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd);
+void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd);
+int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo);
+int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo);
+int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo);
#endif /* _HFI1_USER_EXP_RCV_H */
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 20f4ddcac3b0..e341e6dcc388 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -46,7 +46,7 @@
*/
#include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/device.h>
#include <linux/module.h>
@@ -73,7 +73,8 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
{
unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
size = (cache_size * (1UL << 20)); /* convert to bytes */
- unsigned usr_ctxts = dd->num_rcv_contexts - dd->first_user_ctxt;
+ unsigned int usr_ctxts =
+ dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
bool can_lock = capable(CAP_IPC_LOCK);
/*
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 7d22f8ee98ef..d55339f5d73b 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -60,6 +60,7 @@
#include <linux/mmu_context.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
+#include <linux/string.h>
#include "hfi.h"
#include "sdma.h"
@@ -142,7 +143,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
/* KDETH OM multipliers and switch over point */
#define KDETH_OM_SMALL 4
+#define KDETH_OM_SMALL_SHIFT 2
#define KDETH_OM_LARGE 64
+#define KDETH_OM_LARGE_SHIFT 6
#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
/* Tx request flag bits */
@@ -152,9 +155,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
/* SDMA request flag bits */
#define SDMA_REQ_FOR_THREAD 1
#define SDMA_REQ_SEND_DONE 2
-#define SDMA_REQ_HAVE_AHG 3
-#define SDMA_REQ_HAS_ERROR 4
-#define SDMA_REQ_DONE_ERROR 5
+#define SDMA_REQ_HAS_ERROR 3
+#define SDMA_REQ_DONE_ERROR 4
#define SDMA_PKT_Q_INACTIVE BIT(0)
#define SDMA_PKT_Q_ACTIVE BIT(1)
@@ -213,7 +215,7 @@ struct user_sdma_request {
* each request will need it's own engine pointer.
*/
struct sdma_engine *sde;
- u8 ahg_idx;
+ s8 ahg_idx;
u32 ahg[9];
/*
* KDETH.Offset (Eager) field
@@ -228,12 +230,6 @@ struct user_sdma_request {
*/
u32 tidoffset;
/*
- * KDETH.OM
- * Remember this because the header template always sets it
- * to 0.
- */
- u8 omfactor;
- /*
* We copy the iovs for this request (based on
* info.iovcnt). These are only the data vectors
*/
@@ -283,39 +279,43 @@ struct user_sdma_txreq {
hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \
(pq)->subctxt, ##__VA_ARGS__)
-static int user_sdma_send_pkts(struct user_sdma_request *, unsigned);
-static int num_user_pages(const struct iovec *);
-static void user_sdma_txreq_cb(struct sdma_txreq *, int);
-static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
-static void user_sdma_free_request(struct user_sdma_request *, bool);
-static int pin_vector_pages(struct user_sdma_request *,
- struct user_sdma_iovec *);
-static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned,
- unsigned);
-static int check_header_template(struct user_sdma_request *,
- struct hfi1_pkt_header *, u32, u32);
-static int set_txreq_header(struct user_sdma_request *,
- struct user_sdma_txreq *, u32);
-static int set_txreq_header_ahg(struct user_sdma_request *,
- struct user_sdma_txreq *, u32);
-static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *,
- struct hfi1_user_sdma_comp_q *,
- u16, enum hfi1_sdma_comp_state, int);
-static inline u32 set_pkt_bth_psn(__be32, u8, u32);
+static int user_sdma_send_pkts(struct user_sdma_request *req,
+ unsigned maxpkts);
+static int num_user_pages(const struct iovec *iov);
+static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
+static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
+static void user_sdma_free_request(struct user_sdma_request *req, bool unpin);
+static int pin_vector_pages(struct user_sdma_request *req,
+ struct user_sdma_iovec *iovec);
+static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
+ unsigned start, unsigned npages);
+static int check_header_template(struct user_sdma_request *req,
+ struct hfi1_pkt_header *hdr, u32 lrhlen,
+ u32 datalen);
+static int set_txreq_header(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx, u32 datalen);
+static int set_txreq_header_ahg(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx, u32 len);
+static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
+ struct hfi1_user_sdma_comp_q *cq,
+ u16 idx, enum hfi1_sdma_comp_state state,
+ int ret);
+static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags);
static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
static int defer_packet_queue(
- struct sdma_engine *,
- struct iowait *,
- struct sdma_txreq *,
- unsigned seq);
-static void activate_packet_queue(struct iowait *, int);
-static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
-static int sdma_rb_insert(void *, struct mmu_rb_node *);
+ struct sdma_engine *sde,
+ struct iowait *wait,
+ struct sdma_txreq *txreq,
+ unsigned int seq);
+static void activate_packet_queue(struct iowait *wait, int reason);
+static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+ unsigned long len);
+static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode);
static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
void *arg2, bool *stop);
-static void sdma_rb_remove(void *, struct mmu_rb_node *);
-static int sdma_rb_invalidate(void *, struct mmu_rb_node *);
+static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode);
+static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
static struct mmu_rb_ops sdma_rb_ops = {
.filter = sdma_rb_filter,
@@ -371,44 +371,27 @@ static void sdma_kmem_cache_ctor(void *obj)
memset(tx, 0, sizeof(*tx));
}
-int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
+int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
+ struct hfi1_filedata *fd)
{
- struct hfi1_filedata *fd;
- int ret = 0;
- unsigned memsize;
+ int ret = -ENOMEM;
char buf[64];
struct hfi1_devdata *dd;
struct hfi1_user_sdma_comp_q *cq;
struct hfi1_user_sdma_pkt_q *pq;
unsigned long flags;
- if (!uctxt || !fp) {
- ret = -EBADF;
- goto done;
- }
+ if (!uctxt || !fd)
+ return -EBADF;
- fd = fp->private_data;
-
- if (!hfi1_sdma_comp_ring_size) {
- ret = -EINVAL;
- goto done;
- }
+ if (!hfi1_sdma_comp_ring_size)
+ return -EINVAL;
dd = uctxt->dd;
pq = kzalloc(sizeof(*pq), GFP_KERNEL);
if (!pq)
- goto pq_nomem;
-
- memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size;
- pq->reqs = kzalloc(memsize, GFP_KERNEL);
- if (!pq->reqs)
- goto pq_reqs_nomem;
-
- memsize = BITS_TO_LONGS(hfi1_sdma_comp_ring_size) * sizeof(long);
- pq->req_in_use = kzalloc(memsize, GFP_KERNEL);
- if (!pq->req_in_use)
- goto pq_reqs_no_in_use;
+ return -ENOMEM;
INIT_LIST_HEAD(&pq->list);
pq->dd = dd;
@@ -424,10 +407,23 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
activate_packet_queue, NULL);
pq->reqidx = 0;
+
+ pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
+ sizeof(*pq->reqs),
+ GFP_KERNEL);
+ if (!pq->reqs)
+ goto pq_reqs_nomem;
+
+ pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size),
+ sizeof(*pq->req_in_use),
+ GFP_KERNEL);
+ if (!pq->req_in_use)
+ goto pq_reqs_no_in_use;
+
snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
fd->subctxt);
pq->txreq_cache = kmem_cache_create(buf,
- sizeof(struct user_sdma_txreq),
+ sizeof(struct user_sdma_txreq),
L1_CACHE_BYTES,
SLAB_HWCACHE_ALIGN,
sdma_kmem_cache_ctor);
@@ -436,31 +432,36 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
uctxt->ctxt);
goto pq_txreq_nomem;
}
- fd->pq = pq;
+
cq = kzalloc(sizeof(*cq), GFP_KERNEL);
if (!cq)
goto cq_nomem;
- memsize = PAGE_ALIGN(sizeof(*cq->comps) * hfi1_sdma_comp_ring_size);
- cq->comps = vmalloc_user(memsize);
+ cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps)
+ * hfi1_sdma_comp_ring_size));
if (!cq->comps)
goto cq_comps_nomem;
cq->nentries = hfi1_sdma_comp_ring_size;
- fd->cq = cq;
ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
&pq->handler);
if (ret) {
dd_dev_err(dd, "Failed to register with MMU %d", ret);
- goto done;
+ goto pq_mmu_fail;
}
+ fd->pq = pq;
+ fd->cq = cq;
+
spin_lock_irqsave(&uctxt->sdma_qlock, flags);
list_add(&pq->list, &uctxt->sdma_queues);
spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
- goto done;
+ return 0;
+
+pq_mmu_fail:
+ vfree(cq->comps);
cq_comps_nomem:
kfree(cq);
cq_nomem:
@@ -471,10 +472,7 @@ pq_reqs_no_in_use:
kfree(pq->reqs);
pq_reqs_nomem:
kfree(pq);
- fd->pq = NULL;
-pq_nomem:
- ret = -ENOMEM;
-done:
+
return ret;
}
@@ -534,11 +532,11 @@ static u8 dlid_to_selector(u16 dlid)
return mapping[hash];
}
-int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
- unsigned long dim, unsigned long *count)
+int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
+ struct iovec *iovec, unsigned long dim,
+ unsigned long *count)
{
int ret = 0, i;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_user_sdma_pkt_q *pq = fd->pq;
struct hfi1_user_sdma_comp_q *cq = fd->cq;
@@ -614,6 +612,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
req->pq = pq;
req->cq = cq;
req->status = -1;
+ req->ahg_idx = -1;
INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info));
@@ -703,7 +702,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
/* Save all the IO vector structures */
for (i = 0; i < req->data_iovs; i++) {
INIT_LIST_HEAD(&req->iovs[i].list);
- memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
+ memcpy(&req->iovs[i].iov,
+ iovec + idx++,
+ sizeof(req->iovs[i].iov));
ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) {
req->status = ret;
@@ -725,30 +726,28 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
*/
if (req_opcode(req->info.ctrl) == EXPECTED) {
u16 ntids = iovec[idx].iov_len / sizeof(*req->tids);
+ u32 *tmp;
if (!ntids || ntids > MAX_TID_PAIR_ENTRIES) {
ret = -EINVAL;
goto free_req;
}
- req->tids = kcalloc(ntids, sizeof(*req->tids), GFP_KERNEL);
- if (!req->tids) {
- ret = -ENOMEM;
- goto free_req;
- }
+
/*
* We have to copy all of the tids because they may vary
* in size and, therefore, the TID count might not be
* equal to the pkt count. However, there is no way to
* tell at this point.
*/
- ret = copy_from_user(req->tids, iovec[idx].iov_base,
- ntids * sizeof(*req->tids));
- if (ret) {
+ tmp = memdup_user(iovec[idx].iov_base,
+ ntids * sizeof(*req->tids));
+ if (IS_ERR(tmp)) {
+ ret = PTR_ERR(tmp);
SDMA_DBG(req, "Failed to copy %d TIDs (%d)",
ntids, ret);
- ret = -EFAULT;
goto free_req;
}
+ req->tids = tmp;
req->n_tids = ntids;
idx++;
}
@@ -764,14 +763,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
}
/* We don't need an AHG entry if the request contains only one packet */
- if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG)) {
- int ahg = sdma_ahg_alloc(req->sde);
-
- if (likely(ahg >= 0)) {
- req->ahg_idx = (u8)ahg;
- set_bit(SDMA_REQ_HAVE_AHG, &req->flags);
- }
- }
+ if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG))
+ req->ahg_idx = sdma_ahg_alloc(req->sde);
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
atomic_inc(&pq->n_reqs);
@@ -989,7 +982,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
}
}
- if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) {
+ if (req->ahg_idx >= 0) {
if (!req->seqnum) {
u16 pbclen = le16_to_cpu(req->hdr.pbc[0]);
u32 lrhlen = get_lrh_len(req->hdr,
@@ -1119,7 +1112,7 @@ dosend:
* happen due to the sequential manner in which
* descriptors are processed.
*/
- if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
+ if (req->ahg_idx >= 0)
sdma_ahg_free(req->sde, req->ahg_idx);
}
return ret;
@@ -1321,6 +1314,7 @@ static int set_txreq_header(struct user_sdma_request *req,
{
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct hfi1_pkt_header *hdr = &tx->hdr;
+ u8 omfactor; /* KDETH.OM */
u16 pbclen;
int ret;
u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
@@ -1398,8 +1392,9 @@ static int set_txreq_header(struct user_sdma_request *req,
}
tidval = req->tids[req->tididx];
}
- req->omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
- KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE : KDETH_OM_SMALL;
+ omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
+ KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT :
+ KDETH_OM_SMALL_SHIFT;
/* Set KDETH.TIDCtrl based on value for this TID. */
KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL,
EXP_TID_GET(tidval, CTRL));
@@ -1414,12 +1409,12 @@ static int set_txreq_header(struct user_sdma_request *req,
* transfer.
*/
SDMA_DBG(req, "TID offset %ubytes %uunits om%u",
- req->tidoffset, req->tidoffset / req->omfactor,
- req->omfactor != KDETH_OM_SMALL);
+ req->tidoffset, req->tidoffset >> omfactor,
+ omfactor != KDETH_OM_SMALL_SHIFT);
KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
- req->tidoffset / req->omfactor);
+ req->tidoffset >> omfactor);
KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
- req->omfactor != KDETH_OM_SMALL);
+ omfactor != KDETH_OM_SMALL_SHIFT);
}
done:
trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt,
@@ -1431,6 +1426,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
struct user_sdma_txreq *tx, u32 len)
{
int diff = 0;
+ u8 omfactor; /* KDETH.OM */
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct hfi1_pkt_header *hdr = &req->hdr;
u16 pbclen = le16_to_cpu(hdr->pbc[0]);
@@ -1482,14 +1478,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
}
tidval = req->tids[req->tididx];
}
- req->omfactor = ((EXP_TID_GET(tidval, LEN) *
+ omfactor = ((EXP_TID_GET(tidval, LEN) *
PAGE_SIZE) >=
- KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE :
- KDETH_OM_SMALL;
+ KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
+ KDETH_OM_SMALL_SHIFT;
/* KDETH.OM and KDETH.OFFSET (TID) */
AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
- ((!!(req->omfactor - KDETH_OM_SMALL)) << 15 |
- ((req->tidoffset / req->omfactor) & 0x7fff)));
+ ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
+ ((req->tidoffset >> omfactor)
+ & 0x7fff)));
/* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
(EXP_TID_GET(tidval, IDX) & 0x3ff));
@@ -1616,9 +1613,10 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
{
hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
- cq->comps[idx].status = state;
if (state == ERROR)
cq->comps[idx].errcode = -ret;
+ smp_wmb(); /* make sure errcode is visible first */
+ cq->comps[idx].status = state;
trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
idx, state, ret);
}
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index 39001714f551..e5b10aefe212 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -1,5 +1,7 @@
+#ifndef _HFI1_USER_SDMA_H
+#define _HFI1_USER_SDMA_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -56,7 +58,7 @@ extern uint extended_psn;
struct hfi1_user_sdma_pkt_q {
struct list_head list;
unsigned ctxt;
- unsigned subctxt;
+ u16 subctxt;
u16 n_max_reqs;
atomic_t n_reqs;
u16 reqidx;
@@ -78,7 +80,11 @@ struct hfi1_user_sdma_comp_q {
struct hfi1_sdma_comp_entry *comps;
};
-int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *, struct file *);
-int hfi1_user_sdma_free_queues(struct hfi1_filedata *);
-int hfi1_user_sdma_process_request(struct file *, struct iovec *, unsigned long,
- unsigned long *);
+int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
+ struct hfi1_filedata *fd);
+int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd);
+int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
+ struct iovec *iovec, unsigned long dim,
+ unsigned long *count);
+
+#endif /* _HFI1_USER_SDMA_H */
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 95ed4d6da510..2d19f9bb434d 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -60,6 +60,8 @@
#include "trace.h"
#include "qp.h"
#include "verbs_txreq.h"
+#include "debugfs.h"
+#include "vnic.h"
static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -291,12 +293,28 @@ static void wss_insert(void *address)
/*
* Is the working set larger than the threshold?
*/
-static inline int wss_exceeds_threshold(void)
+static inline bool wss_exceeds_threshold(void)
{
return atomic_read(&wss.total_count) >= wss.threshold;
}
/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
+ [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [IB_WR_SEND] = IB_WC_SEND,
+ [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+ [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
+ [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
+ [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
+ [IB_WR_REG_MR] = IB_WC_REG_MR
+};
+
+/*
* Length of header by opcode, 0 --> not supported
*/
const u8 hdr_len_by_opcode[256] = {
@@ -419,18 +437,19 @@ __be64 ib_hfi1_sys_image_guid;
* @ss: the SGE state
* @data: the data to copy
* @length: the length of the data
+ * @release: boolean to release MR
* @copy_last: do a separate copy of the last 8 bytes
*/
void hfi1_copy_sge(
struct rvt_sge_state *ss,
void *data, u32 length,
- int release,
- int copy_last)
+ bool release,
+ bool copy_last)
{
struct rvt_sge *sge = &ss->sge;
- int in_last = 0;
int i;
- int cacheless_copy = 0;
+ bool in_last = false;
+ bool cacheless_copy = false;
if (sge_copy_mode == COPY_CACHELESS) {
cacheless_copy = length >= PAGE_SIZE;
@@ -454,19 +473,15 @@ void hfi1_copy_sge(
if (length > 8) {
length -= 8;
} else {
- copy_last = 0;
- in_last = 1;
+ copy_last = false;
+ in_last = true;
}
}
again:
while (length) {
- u32 len = sge->length;
+ u32 len = rvt_get_sge_length(sge, length);
- if (len > length)
- len = length;
- if (len > sge->sge_length)
- len = sge->sge_length;
WARN_ON_ONCE(len == 0);
if (unlikely(in_last)) {
/* enforce byte transfer ordering */
@@ -477,77 +492,19 @@ again:
} else {
memcpy(sge->vaddr, data, len);
}
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (release)
- rvt_put_mr(sge->mr);
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
+ rvt_update_sge(ss, len, release);
data += len;
length -= len;
}
if (copy_last) {
- copy_last = 0;
- in_last = 1;
+ copy_last = false;
+ in_last = true;
length = 8;
goto again;
}
}
-/**
- * hfi1_skip_sge - skip over SGE memory
- * @ss: the SGE state
- * @length: the number of bytes to skip
- */
-void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
-{
- struct rvt_sge *sge = &ss->sge;
-
- while (length) {
- u32 len = sge->length;
-
- if (len > length)
- len = length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- WARN_ON_ONCE(len == 0);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (release)
- rvt_put_mr(sge->mr);
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
- length -= len;
- }
-}
-
/*
* Make sure the QP is ready and able to accept the given opcode.
*/
@@ -562,6 +519,35 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
return NULL;
}
+static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
+{
+#ifdef CONFIG_FAULT_INJECTION
+ if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
+ /*
+ * In order to drop non-IB traffic we
+ * set PbcInsertHrc to NONE (0x2).
+ * The packet will still be delivered
+ * to the receiving node but a
+ * KHdrHCRCErr (KDETH packet with a bad
+ * HCRC) will be triggered and the
+ * packet will not be delivered to the
+ * correct context.
+ */
+ pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
+ else
+ /*
+ * In order to drop regular verbs
+ * traffic we set the PbcTestEbp
+ * flag. The packet will still be
+ * delivered to the receiving node but
+ * a 'late ebp error' will be
+ * triggered and will be dropped.
+ */
+ pbc |= PBC_TEST_EBP;
+#endif
+ return pbc;
+}
+
/**
* hfi1_ib_rcv - process an incoming packet
* @packet: data packet information
@@ -576,7 +562,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
struct ib_header *hdr = packet->hdr;
u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
- struct hfi1_ibport *ibp = &ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
opcode_handler packet_handler;
unsigned long flags;
@@ -586,7 +572,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
u16 lid;
/* Check for GRH */
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
} else if (lnh == HFI1_LRH_GRH) {
@@ -605,12 +591,12 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
trace_input_ibhdr(rcd->dd, hdr);
- opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
+ opcode = ib_bth_get_opcode(packet->ohdr);
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
/* Get the destination QP number. */
qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
- lid = be16_to_cpu(hdr->lrh[1]);
+ lid = ib_get_dlid(hdr);
if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
struct rvt_mcast *mcast;
@@ -618,7 +604,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
if (lnh != HFI1_LRH_GRH)
goto drop;
- mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
+ mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
if (!mcast)
goto drop;
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
@@ -644,6 +630,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
rcu_read_unlock();
goto drop;
}
+ if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode,
+ true))) {
+ rcu_read_unlock();
+ goto drop;
+ }
spin_lock_irqsave(&packet->qp->r_lock, flags);
packet_handler = qp_ok(opcode, packet);
if (likely(packet_handler))
@@ -689,27 +680,6 @@ static void mem_timer(unsigned long data)
hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM);
}
-void update_sge(struct rvt_sge_state *ss, u32 length)
-{
- struct rvt_sge *sge = &ss->sge;
-
- sge->vaddr += length;
- sge->length -= length;
- sge->sge_length -= length;
- if (sge->sge_length == 0) {
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- return;
- sge->n = 0;
- }
- sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
- }
-}
-
/*
* This is called with progress side lock held.
*/
@@ -798,7 +768,7 @@ static noinline int build_verbs_ulp_payload(
len);
if (ret)
goto bail_txadd;
- update_sge(ss, len);
+ rvt_update_sge(ss, len, false);
length -= len;
}
return ret;
@@ -863,7 +833,6 @@ static int build_verbs_tx_desc(
if (ret)
goto bail_txadd;
}
-
/* add the ulp payload - if any. tx->ss can be NULL for acks */
if (tx->ss)
ret = build_verbs_ulp_payload(sde, length, tx);
@@ -882,7 +851,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_ibdev *dev = ps->dev;
struct hfi1_pportdata *ppd = ps->ppd;
struct verbs_txreq *tx;
- u64 pbc_flags = 0;
u8 sc5 = priv->s_sc;
int ret;
@@ -891,12 +859,16 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (!sdma_txreq_built(&tx->txreq)) {
if (likely(pbc == 0)) {
u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+ u8 opcode = get_opcode(&tx->phdr.hdr);
+
/* No vl15 here */
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+ pbc = hfi1_fault_tx(qp, opcode, pbc);
pbc = create_pbc(ppd,
- pbc_flags,
+ pbc,
qp->srate_mbps,
vl,
plen);
@@ -999,7 +971,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u32 plen = hdrwords + dwords + 2; /* includes pbc */
struct hfi1_pportdata *ppd = ps->ppd;
u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
- u64 pbc_flags = 0;
u8 sc5;
unsigned long flags = 0;
struct send_context *sc;
@@ -1024,9 +995,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (likely(pbc == 0)) {
u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+ struct verbs_txreq *tx = ps->s_txreq;
+ u8 opcode = get_opcode(&tx->phdr.hdr);
+
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
- pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
+ pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+ pbc = hfi1_fault_tx(qp, opcode, pbc);
+ pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
}
if (cb)
iowait_pio_inc(&priv->s_iowait);
@@ -1073,7 +1049,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (slen > len)
slen = len;
- update_sge(ss, slen);
+ rvt_update_sge(ss, slen, false);
seg_pio_copy_mid(pbuf, addr, slen);
len -= slen;
}
@@ -1255,7 +1231,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
hdr = &ps->s_txreq->phdr.hdr;
/* locate the pkey within the headers */
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_GRH)
ohdr = &hdr->u.l.oth;
else
@@ -1302,17 +1278,20 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
{
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
- u16 ver = dd->dc8051_ver;
+ u32 ver = dd->dc8051_ver;
memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
- rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) |
- (u64)dc8051_ver_min(ver);
+ rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 32) |
+ ((u64)(dc8051_ver_min(ver)) << 16) |
+ (u64)dc8051_ver_patch(ver);
+
rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
- IB_DEVICE_MEM_MGT_EXTENSIONS;
+ IB_DEVICE_MEM_MGT_EXTENSIONS |
+ IB_DEVICE_RDMA_NETDEV_OPA_VNIC;
rdi->dparms.props.page_size_cap = PAGE_SIZE;
rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
rdi->dparms.props.vendor_part_id = dd->pcidev->device;
@@ -1384,6 +1363,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
u16 lid = ppd->lid;
+ /* props being zeroed by the caller, avoid zeroing it here */
props->lid = lid ? lid : 0;
props->lmc = ppd->lmc;
/* OPA logical states match IB logical states */
@@ -1479,14 +1459,14 @@ static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
/*
* convert ah port,sl to sc
*/
-u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah)
+u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah)
{
- struct hfi1_ibport *ibp = to_iport(ibdev, ah->port_num);
+ struct hfi1_ibport *ibp = to_iport(ibdev, rdma_ah_get_port_num(ah));
- return ibp->sl_to_sc[ah->sl];
+ return ibp->sl_to_sc[rdma_ah_get_sl(ah)];
}
-static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
+static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
{
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
@@ -1494,9 +1474,9 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
u8 sc5;
/* test the mapping for validity */
- ibp = to_iport(ibdev, ah_attr->port_num);
+ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
ppd = ppd_from_ibp(ibp);
- sc5 = ibp->sl_to_sc[ah_attr->sl];
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
dd = dd_from_ppd(ppd);
if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
return -EINVAL;
@@ -1504,7 +1484,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
}
static void hfi1_notify_new_ah(struct ib_device *ibdev,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct rvt_ah *ah)
{
struct hfi1_ibport *ibp;
@@ -1517,9 +1497,9 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev,
* done being setup. We can however modify things which we need to set.
*/
- ibp = to_iport(ibdev, ah_attr->port_num);
+ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
ppd = ppd_from_ibp(ibp);
- sc5 = ibp->sl_to_sc[ah->attr.sl];
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)];
dd = dd_from_ppd(ppd);
ah->vl = sc_to_vlt(dd, sc5);
if (ah->vl < num_vls || ah->vl == 15)
@@ -1528,17 +1508,21 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev,
struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid)
{
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
struct ib_ah *ah = ERR_PTR(-EINVAL);
struct rvt_qp *qp0;
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ struct hfi1_devdata *dd = dd_from_ppd(ppd);
+ u8 port_num = ppd->port;
memset(&attr, 0, sizeof(attr));
- attr.dlid = dlid;
- attr.port_num = ppd_from_ibp(ibp)->port;
+ attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
+ rdma_ah_set_dlid(&attr, dlid);
+ rdma_ah_set_port_num(&attr, ppd_from_ibp(ibp)->port);
rcu_read_lock();
qp0 = rcu_dereference(ibp->rvp.qp[0]);
if (qp0)
- ah = ib_create_ah(qp0->ibqp.pd, &attr);
+ ah = rdma_create_ah(qp0->ibqp.pd, &attr);
rcu_read_unlock();
return ah;
}
@@ -1585,10 +1569,10 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
{
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
struct hfi1_ibdev *dev = dev_from_rdi(rdi);
- u16 ver = dd_from_dev(dev)->dc8051_ver;
+ u32 ver = dd_from_dev(dev)->dc8051_ver;
- snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver),
- dc8051_ver_min(ver));
+ snprintf(str, str_len, "%u.%u.%u", dc8051_ver_maj(ver),
+ dc8051_ver_min(ver), dc8051_ver_patch(ver));
}
static const char * const driver_cntr_names[] = {
@@ -1605,6 +1589,7 @@ static const char * const driver_cntr_names[] = {
"DRIVER_EgrHdrFull"
};
+static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */
static const char **dev_cntr_names;
static const char **port_cntr_names;
static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
@@ -1618,7 +1603,7 @@ static int cntr_names_initialized;
* external strings.
*/
static int init_cntr_names(const char *names_in,
- const int names_len,
+ const size_t names_len,
int num_extra_names,
int *num_cntrs,
const char ***cntr_names)
@@ -1659,6 +1644,7 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
{
int i, err;
+ mutex_lock(&cntr_names_lock);
if (!cntr_names_initialized) {
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -1667,8 +1653,10 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
num_driver_cntrs,
&num_dev_cntrs,
&dev_cntr_names);
- if (err)
+ if (err) {
+ mutex_unlock(&cntr_names_lock);
return NULL;
+ }
for (i = 0; i < num_driver_cntrs; i++)
dev_cntr_names[num_dev_cntrs + i] =
@@ -1682,10 +1670,12 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
if (err) {
kfree(dev_cntr_names);
dev_cntr_names = NULL;
+ mutex_unlock(&cntr_names_lock);
return NULL;
}
cntr_names_initialized = 1;
}
+ mutex_unlock(&cntr_names_lock);
if (!port_num)
return rdma_alloc_hw_stats_struct(
@@ -1784,10 +1774,11 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
ibdev->owner = THIS_MODULE;
ibdev->phys_port_cnt = dd->num_pports;
- ibdev->dma_device = &dd->pcidev->dev;
+ ibdev->dev.parent = &dd->pcidev->dev;
ibdev->modify_device = modify_device;
ibdev->alloc_hw_stats = alloc_hw_stats;
ibdev->get_hw_stats = get_hw_stats;
+ ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn;
/* keep process mad in the driver */
ibdev->process_mad = hfi1_process_mad;
@@ -1832,7 +1823,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
- dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send;
+ dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send_from_rvt;
dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send;
dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send;
dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr;
@@ -1845,6 +1836,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu;
dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
+ dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
/* completeion queue */
@@ -1903,14 +1895,18 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
del_timer_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
+ mutex_lock(&cntr_names_lock);
kfree(dev_cntr_names);
kfree(port_cntr_names);
+ dev_cntr_names = NULL;
+ port_cntr_names = NULL;
cntr_names_initialized = 0;
+ mutex_unlock(&cntr_names_lock);
}
void hfi1_cnp_rcv(struct hfi1_packet *packet)
{
- struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_header *hdr = packet->hdr;
struct rvt_qp *qp = packet->qp;
@@ -1920,12 +1916,12 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
switch (packet->qp->ibqp.qp_type) {
case IB_QPT_UC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
case IB_QPT_RC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_RC;
break;
@@ -1939,7 +1935,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
return;
}
- sc5 = hdr2sc(hdr, packet->rhf);
+ sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = qp->ibqp.qp_num;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index e6b893010e6d..cd635d0c1d3b 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -125,9 +125,7 @@ struct hfi1_qp_priv {
struct sdma_engine *s_sde; /* current sde */
struct send_context *s_sendcontext; /* current sendcontext */
u8 s_sc; /* SC[0..4] for next packet */
- u8 r_adefered; /* number of acks defered */
struct iowait s_iowait;
- struct timer_list s_rnr_timer;
struct rvt_qp *owner;
};
@@ -141,6 +139,10 @@ struct hfi1_pkt_state {
struct hfi1_pportdata *ppd;
struct verbs_txreq *s_txreq;
unsigned long flags;
+ unsigned long timeout;
+ unsigned long timeout_int;
+ int cpu;
+ bool in_thread;
};
#define HFI1_PSN_CREDIT 16
@@ -196,6 +198,11 @@ struct hfi1_ibdev {
struct dentry *hfi1_ibdev_dbg;
/* per HFI symlinks to above */
struct dentry *hfi1_ibdev_link;
+#ifdef CONFIG_FAULT_INJECTION
+ struct fault_opcode *fault_opcode;
+ struct fault_packet *fault_packet;
+ bool fault_suppress_err;
+#endif
#endif
};
@@ -260,15 +267,6 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
#define PSN_MODIFY_MASK 0xFFFFFF
/*
- * Compare the lower 24 bits of the msn values.
- * Returns an integer <, ==, or > than zero.
- */
-static inline int cmp_msn(u32 a, u32 b)
-{
- return (((int)a) - ((int)b)) << 8;
-}
-
-/*
* Compare two PSNs
* Returns an integer <, ==, or > than zero.
*/
@@ -299,9 +297,7 @@ void hfi1_put_txreq(struct verbs_txreq *tx);
int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
- int release, int copy_last);
-
-void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
+ bool release, bool copy_last);
void hfi1_cnp_rcv(struct hfi1_packet *packet);
@@ -315,20 +311,12 @@ void hfi1_rc_hdrerr(
u32 rcv_flags,
struct rvt_qp *qp);
-u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid);
-void hfi1_rc_rnr_retry(unsigned long arg);
-void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to);
-void hfi1_rc_timeout(unsigned long arg);
-void hfi1_del_timers_sync(struct rvt_qp *qp);
-void hfi1_stop_rc_timers(struct rvt_qp *qp);
-
void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
-void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
-
void hfi1_ud_rcv(struct hfi1_packet *packet);
int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey);
@@ -342,7 +330,7 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
-
+void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
extern const u32 rc_only_opcode;
@@ -362,7 +350,7 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0);
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
- struct ib_global_route *grh, u32 hwords, u32 nwords);
+ const struct ib_global_route *grh, u32 hwords, u32 nwords);
void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
@@ -370,7 +358,9 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
void _hfi1_do_send(struct work_struct *work);
-void hfi1_do_send(struct rvt_qp *qp);
+void hfi1_do_send_from_rvt(struct rvt_qp *qp);
+
+void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status);
diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h
new file mode 100644
index 000000000000..4a621cde4abb
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -0,0 +1,183 @@
+#ifndef _HFI1_VNIC_H
+#define _HFI1_VNIC_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/opa_vnic.h>
+#include "hfi.h"
+#include "sdma.h"
+
+#define HFI1_VNIC_MAX_TXQ 16
+#define HFI1_VNIC_MAX_PAD 12
+
+/* L2 header definitions */
+#define HFI1_L2_TYPE_OFFSET 0x7
+#define HFI1_L2_TYPE_SHFT 0x5
+#define HFI1_L2_TYPE_MASK 0x3
+
+#define HFI1_GET_L2_TYPE(hdr) \
+ ((*((u8 *)(hdr) + HFI1_L2_TYPE_OFFSET) >> HFI1_L2_TYPE_SHFT) & \
+ HFI1_L2_TYPE_MASK)
+
+/* L4 type definitions */
+#define HFI1_L4_TYPE_OFFSET 8
+
+#define HFI1_GET_L4_TYPE(data) \
+ (*((u8 *)(data) + HFI1_L4_TYPE_OFFSET))
+
+/* L4 header definitions */
+#define HFI1_VNIC_L4_HDR_OFFSET OPA_VNIC_L2_HDR_LEN
+
+#define HFI1_VNIC_GET_L4_HDR(data) \
+ (*((u16 *)((u8 *)(data) + HFI1_VNIC_L4_HDR_OFFSET)))
+
+#define HFI1_VNIC_GET_VESWID(data) \
+ (HFI1_VNIC_GET_L4_HDR(data) & 0xFFF)
+
+/* Service class */
+#define HFI1_VNIC_SC_OFFSET_LOW 6
+#define HFI1_VNIC_SC_OFFSET_HI 7
+#define HFI1_VNIC_SC_SHIFT 4
+
+#define HFI1_VNIC_MAX_QUEUE 16
+
+/**
+ * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
+ * @dd - device data pointer
+ * @sde - sdma engine
+ * @vinfo - vnic info pointer
+ * @wait - iowait structure
+ * @stx - sdma tx request
+ * @state - vnic Tx ring SDMA state
+ * @q_idx - vnic Tx queue index
+ */
+struct hfi1_vnic_sdma {
+ struct hfi1_devdata *dd;
+ struct sdma_engine *sde;
+ struct hfi1_vnic_vport_info *vinfo;
+ struct iowait wait;
+ struct sdma_txreq stx;
+ unsigned int state;
+ u8 q_idx;
+};
+
+/**
+ * struct hfi1_vnic_rx_queue - HFI1 VNIC receive queue
+ * @idx: queue index
+ * @vinfo: pointer to vport information
+ * @netdev: network device
+ * @napi: netdev napi structure
+ * @skbq: queue of received socket buffers
+ */
+struct hfi1_vnic_rx_queue {
+ u8 idx;
+ struct hfi1_vnic_vport_info *vinfo;
+ struct net_device *netdev;
+ struct napi_struct napi;
+ struct sk_buff_head skbq;
+};
+
+/**
+ * struct hfi1_vnic_vport_info - HFI1 VNIC virtual port information
+ * @dd: device data pointer
+ * @netdev: net device pointer
+ * @flags: state flags
+ * @lock: vport lock
+ * @num_tx_q: number of transmit queues
+ * @num_rx_q: number of receive queues
+ * @vesw_id: virtual switch id
+ * @rxq: Array of receive queues
+ * @stats: per queue stats
+ * @sdma: VNIC SDMA structure per TXQ
+ */
+struct hfi1_vnic_vport_info {
+ struct hfi1_devdata *dd;
+ struct net_device *netdev;
+ unsigned long flags;
+
+ /* Lock used around state updates */
+ struct mutex lock;
+
+ u8 num_tx_q;
+ u8 num_rx_q;
+ u16 vesw_id;
+ struct hfi1_vnic_rx_queue rxq[HFI1_NUM_VNIC_CTXT];
+
+ struct opa_vnic_stats stats[HFI1_VNIC_MAX_QUEUE];
+ struct hfi1_vnic_sdma sdma[HFI1_VNIC_MAX_TXQ];
+};
+
+#define v_dbg(format, arg...) \
+ netdev_dbg(vinfo->netdev, format, ## arg)
+#define v_err(format, arg...) \
+ netdev_err(vinfo->netdev, format, ## arg)
+#define v_info(format, arg...) \
+ netdev_info(vinfo->netdev, format, ## arg)
+
+/* vnic hfi1 internal functions */
+void hfi1_vnic_setup(struct hfi1_devdata *dd);
+void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
+
+void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet);
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo);
+bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx);
+
+/* vnic rdma netdev operations */
+struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
+ u8 port_num,
+ enum rdma_netdev_t type,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *));
+int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen);
+
+#endif /* _HFI1_VNIC_H */
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
new file mode 100644
index 000000000000..339f0cdd56d6
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -0,0 +1,904 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for VNIC functionality
+ */
+
+#include <linux/io.h>
+#include <linux/if_vlan.h>
+
+#include "vnic.h"
+
+#define HFI_TX_TIMEOUT_MS 1000
+
+#define HFI1_VNIC_RCV_Q_SIZE 1024
+
+#define HFI1_VNIC_UP 0
+
+static DEFINE_SPINLOCK(vport_cntr_lock);
+
+static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
+{
+ unsigned int rcvctrl_ops = 0;
+ int ret;
+
+ hfi1_init_ctxt(uctxt->sc);
+
+ uctxt->do_interrupt = &handle_receive_interrupt;
+
+ /* Now allocate the RcvHdr queue and eager buffers. */
+ ret = hfi1_create_rcvhdrq(dd, uctxt);
+ if (ret)
+ goto done;
+
+ ret = hfi1_setup_eagerbufs(uctxt);
+ if (ret)
+ goto done;
+
+ if (uctxt->rcvhdrtail_kvaddr)
+ clear_rcvhdrtail(uctxt);
+
+ rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
+ rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
+
+ if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
+ rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
+ rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+
+ hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
+
+ uctxt->is_vnic = true;
+done:
+ return ret;
+}
+
+static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata **vnic_ctxt)
+{
+ struct hfi1_ctxtdata *uctxt;
+ unsigned int ctxt;
+ int ret;
+
+ if (dd->flags & HFI1_FROZEN)
+ return -EIO;
+
+ for (ctxt = dd->first_dyn_alloc_ctxt;
+ ctxt < dd->num_rcv_contexts; ctxt++)
+ if (!dd->rcd[ctxt])
+ break;
+
+ if (ctxt == dd->num_rcv_contexts)
+ return -EBUSY;
+
+ uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, dd->node);
+ if (!uctxt) {
+ dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
+ return -ENOMEM;
+ }
+
+ uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
+ HFI1_CAP_KGET(NODROP_RHQ_FULL) |
+ HFI1_CAP_KGET(NODROP_EGR_FULL) |
+ HFI1_CAP_KGET(DMA_RTAIL);
+ uctxt->seq_cnt = 1;
+
+ /* Allocate and enable a PIO send context */
+ uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize,
+ uctxt->numa_id);
+
+ ret = uctxt->sc ? 0 : -ENOMEM;
+ if (ret)
+ goto bail;
+
+ dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n",
+ uctxt->sc->sw_index, uctxt->sc->hw_context);
+ ret = sc_enable(uctxt->sc);
+ if (ret)
+ goto bail;
+
+ if (dd->num_msix_entries)
+ hfi1_set_vnic_msix_info(uctxt);
+
+ hfi1_stats.sps_ctxts++;
+ dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
+ *vnic_ctxt = uctxt;
+
+ return ret;
+bail:
+ /*
+ * hfi1_free_ctxtdata() also releases send_context
+ * structure if uctxt->sc is not null
+ */
+ dd->rcd[uctxt->ctxt] = NULL;
+ hfi1_free_ctxtdata(dd, uctxt);
+ dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret);
+ return ret;
+}
+
+static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata *uctxt)
+{
+ unsigned long flags;
+
+ dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
+ flush_wc();
+
+ if (dd->num_msix_entries)
+ hfi1_reset_vnic_msix_info(uctxt);
+
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ /*
+ * Disable receive context and interrupt available, reset all
+ * RcvCtxtCtrl bits to default values.
+ */
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
+ HFI1_RCVCTRL_TIDFLOW_DIS |
+ HFI1_RCVCTRL_INTRAVAIL_DIS |
+ HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
+ HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
+ HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
+ /*
+ * VNIC contexts are allocated from user context pool.
+ * Release them back to user context pool.
+ *
+ * Reset context integrity checks to default.
+ * (writes to CSRs probably belong in chip.c)
+ */
+ write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
+ hfi1_pkt_default_send_ctxt_mask(dd, SC_USER));
+ sc_disable(uctxt->sc);
+
+ dd->send_contexts[uctxt->sc->sw_index].type = SC_USER;
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+
+ dd->rcd[uctxt->ctxt] = NULL;
+ uctxt->event_flags = 0;
+
+ hfi1_clear_tids(uctxt);
+ hfi1_clear_ctxt_pkey(dd, uctxt);
+
+ hfi1_stats.sps_ctxts--;
+ hfi1_free_ctxtdata(dd, uctxt);
+}
+
+void hfi1_vnic_setup(struct hfi1_devdata *dd)
+{
+ idr_init(&dd->vnic.vesw_idr);
+}
+
+void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
+{
+ idr_destroy(&dd->vnic.vesw_idr);
+}
+
+#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
+ u64 *src64, *dst64; \
+ for (src64 = &qstats->x_grp.unicast, \
+ dst64 = &stats->x_grp.unicast; \
+ dst64 <= &stats->x_grp.s_1519_max;) { \
+ *dst64++ += *src64++; \
+ } \
+ } while (0)
+
+/* hfi1_vnic_update_stats - update statistics */
+static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
+ struct opa_vnic_stats *stats)
+{
+ struct net_device *netdev = vinfo->netdev;
+ u8 i;
+
+ /* add tx counters on different queues */
+ for (i = 0; i < vinfo->num_tx_q; i++) {
+ struct opa_vnic_stats *qstats = &vinfo->stats[i];
+ struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
+
+ stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
+ stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
+ stats->tx_drop_state += qstats->tx_drop_state;
+ stats->tx_dlid_zero += qstats->tx_dlid_zero;
+
+ SUM_GRP_COUNTERS(stats, qstats, tx_grp);
+ stats->netstats.tx_packets += qnstats->tx_packets;
+ stats->netstats.tx_bytes += qnstats->tx_bytes;
+ }
+
+ /* add rx counters on different queues */
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct opa_vnic_stats *qstats = &vinfo->stats[i];
+ struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
+
+ stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
+ stats->netstats.rx_nohandler += qnstats->rx_nohandler;
+ stats->rx_drop_state += qstats->rx_drop_state;
+ stats->rx_oversize += qstats->rx_oversize;
+ stats->rx_runt += qstats->rx_runt;
+
+ SUM_GRP_COUNTERS(stats, qstats, rx_grp);
+ stats->netstats.rx_packets += qnstats->rx_packets;
+ stats->netstats.rx_bytes += qnstats->rx_bytes;
+ }
+
+ stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
+ stats->netstats.tx_carrier_errors +
+ stats->tx_drop_state + stats->tx_dlid_zero;
+ stats->netstats.tx_dropped = stats->netstats.tx_errors;
+
+ stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
+ stats->netstats.rx_nohandler +
+ stats->rx_drop_state + stats->rx_oversize +
+ stats->rx_runt;
+ stats->netstats.rx_dropped = stats->netstats.rx_errors;
+
+ netdev->stats.tx_packets = stats->netstats.tx_packets;
+ netdev->stats.tx_bytes = stats->netstats.tx_bytes;
+ netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
+ netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
+ netdev->stats.tx_errors = stats->netstats.tx_errors;
+ netdev->stats.tx_dropped = stats->netstats.tx_dropped;
+
+ netdev->stats.rx_packets = stats->netstats.rx_packets;
+ netdev->stats.rx_bytes = stats->netstats.rx_bytes;
+ netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
+ netdev->stats.multicast = stats->rx_grp.mcastbcast;
+ netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
+ netdev->stats.rx_errors = stats->netstats.rx_errors;
+ netdev->stats.rx_dropped = stats->netstats.rx_dropped;
+}
+
+/* update_len_counters - update pkt's len histogram counters */
+static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
+ int len)
+{
+ /* account for 4 byte FCS */
+ if (len >= 1515)
+ grp->s_1519_max++;
+ else if (len >= 1020)
+ grp->s_1024_1518++;
+ else if (len >= 508)
+ grp->s_512_1023++;
+ else if (len >= 252)
+ grp->s_256_511++;
+ else if (len >= 124)
+ grp->s_128_255++;
+ else if (len >= 61)
+ grp->s_65_127++;
+ else
+ grp->s_64++;
+}
+
+/* hfi1_vnic_update_tx_counters - update transmit counters */
+static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx, struct sk_buff *skb, int err)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
+ struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
+ u16 vlan_tci;
+
+ stats->netstats.tx_packets++;
+ stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
+
+ update_len_counters(tx_grp, skb->len);
+
+ /* rest of the counts are for good packets only */
+ if (unlikely(err))
+ return;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ tx_grp->mcastbcast++;
+ else
+ tx_grp->unicast++;
+
+ if (!__vlan_get_tag(skb, &vlan_tci))
+ tx_grp->vlan++;
+ else
+ tx_grp->untagged++;
+}
+
+/* hfi1_vnic_update_rx_counters - update receive counters */
+static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx, struct sk_buff *skb, int err)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
+ struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
+ struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
+ u16 vlan_tci;
+
+ stats->netstats.rx_packets++;
+ stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
+
+ update_len_counters(rx_grp, skb->len);
+
+ /* rest of the counts are for good packets only */
+ if (unlikely(err))
+ return;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ rx_grp->mcastbcast++;
+ else
+ rx_grp->unicast++;
+
+ if (!__vlan_get_tag(skb, &vlan_tci))
+ rx_grp->vlan++;
+ else
+ rx_grp->untagged++;
+}
+
+/* This function is overloaded for opa_vnic specific implementation */
+static void hfi1_vnic_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ hfi1_vnic_update_stats(vinfo, vstats);
+}
+
+static u64 create_bypass_pbc(u32 vl, u32 dw_len)
+{
+ u64 pbc;
+
+ pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
+ | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
+ | PBC_PACKET_BYPASS
+ | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
+ | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
+
+ return pbc;
+}
+
+/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
+static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx)
+{
+ netif_stop_subqueue(vinfo->netdev, q_idx);
+ if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
+ return;
+
+ netif_start_subqueue(vinfo->netdev, q_idx);
+}
+
+static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ u8 pad_len, q_idx = skb->queue_mapping;
+ struct hfi1_devdata *dd = vinfo->dd;
+ struct opa_vnic_skb_mdata *mdata;
+ u32 pkt_len, total_len;
+ int err = -EINVAL;
+ u64 pbc;
+
+ v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
+ if (unlikely(!netif_oper_up(netdev))) {
+ vinfo->stats[q_idx].tx_drop_state++;
+ goto tx_finish;
+ }
+
+ /* take out meta data */
+ mdata = (struct opa_vnic_skb_mdata *)skb->data;
+ skb_pull(skb, sizeof(*mdata));
+ if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
+ vinfo->stats[q_idx].tx_dlid_zero++;
+ goto tx_finish;
+ }
+
+ /* add tail padding (for 8 bytes size alignment) and icrc */
+ pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
+ pad_len += OPA_VNIC_ICRC_TAIL_LEN;
+
+ /*
+ * pkt_len is how much data we have to write, includes header and data.
+ * total_len is length of the packet in Dwords plus the PBC should not
+ * include the CRC.
+ */
+ pkt_len = (skb->len + pad_len) >> 2;
+ total_len = pkt_len + 2; /* PBC + packet */
+
+ pbc = create_bypass_pbc(mdata->vl, total_len);
+
+ skb_get(skb);
+ v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
+ err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
+ if (unlikely(err)) {
+ if (err == -ENOMEM)
+ vinfo->stats[q_idx].netstats.tx_fifo_errors++;
+ else if (err != -EBUSY)
+ vinfo->stats[q_idx].netstats.tx_carrier_errors++;
+ }
+ /* remove the header before updating tx counters */
+ skb_pull(skb, OPA_VNIC_HDR_LEN);
+
+ if (unlikely(err == -EBUSY)) {
+ hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_BUSY;
+ }
+
+tx_finish:
+ /* update tx counters */
+ hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+}
+
+static u16 hfi1_vnic_select_queue(struct net_device *netdev,
+ struct sk_buff *skb,
+ void *accel_priv,
+ select_queue_fallback_t fallback)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ struct opa_vnic_skb_mdata *mdata;
+ struct sdma_engine *sde;
+
+ mdata = (struct opa_vnic_skb_mdata *)skb->data;
+ sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
+ return sde->this_idx;
+}
+
+/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
+static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
+ struct sk_buff *skb)
+{
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
+ int rc = -EFAULT;
+
+ skb_pull(skb, OPA_VNIC_HDR_LEN);
+
+ /* Validate Packet length */
+ if (unlikely(skb->len > max_len))
+ vinfo->stats[rxq->idx].rx_oversize++;
+ else if (unlikely(skb->len < ETH_ZLEN))
+ vinfo->stats[rxq->idx].rx_runt++;
+ else
+ rc = 0;
+ return rc;
+}
+
+static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
+{
+ unsigned char *pad_info;
+ struct sk_buff *skb;
+
+ skb = skb_dequeue(&rxq->skbq);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* remove tail padding and icrc */
+ pad_info = skb->data + skb->len - 1;
+ skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
+ ((*pad_info) & 0x7)));
+
+ return skb;
+}
+
+/* hfi1_vnic_handle_rx - handle skb receive */
+static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
+ int *work_done, int work_to_do)
+{
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ struct sk_buff *skb;
+ int rc;
+
+ while (1) {
+ if (*work_done >= work_to_do)
+ break;
+
+ skb = hfi1_vnic_get_skb(rxq);
+ if (unlikely(!skb))
+ break;
+
+ rc = hfi1_vnic_decap_skb(rxq, skb);
+ /* update rx counters */
+ hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
+ if (unlikely(rc)) {
+ dev_kfree_skb_any(skb);
+ continue;
+ }
+
+ skb_checksum_none_assert(skb);
+ skb->protocol = eth_type_trans(skb, rxq->netdev);
+
+ napi_gro_receive(&rxq->napi, skb);
+ (*work_done)++;
+ }
+}
+
+/* hfi1_vnic_napi - napi receive polling callback function */
+static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
+{
+ struct hfi1_vnic_rx_queue *rxq = container_of(napi,
+ struct hfi1_vnic_rx_queue, napi);
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ int work_done = 0;
+
+ v_dbg("napi %d budget %d\n", rxq->idx, budget);
+ hfi1_vnic_handle_rx(rxq, &work_done, budget);
+
+ v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
+ if (work_done < budget)
+ napi_complete(napi);
+
+ return work_done;
+}
+
+void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_devdata *dd = packet->rcd->dd;
+ struct hfi1_vnic_vport_info *vinfo = NULL;
+ struct hfi1_vnic_rx_queue *rxq;
+ struct sk_buff *skb;
+ int l4_type, vesw_id = -1;
+ u8 q_idx;
+
+ l4_type = HFI1_GET_L4_TYPE(packet->ebuf);
+ if (likely(l4_type == OPA_VNIC_L4_ETHR)) {
+ vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
+ vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id);
+
+ /*
+ * In case of invalid vesw id, count the error on
+ * the first available vport.
+ */
+ if (unlikely(!vinfo)) {
+ struct hfi1_vnic_vport_info *vinfo_tmp;
+ int id_tmp = 0;
+
+ vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp);
+ if (vinfo_tmp) {
+ spin_lock(&vport_cntr_lock);
+ vinfo_tmp->stats[0].netstats.rx_nohandler++;
+ spin_unlock(&vport_cntr_lock);
+ }
+ }
+ }
+
+ if (unlikely(!vinfo)) {
+ dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
+ l4_type, vesw_id, packet->rcd->ctxt);
+ return;
+ }
+
+ q_idx = packet->rcd->vnic_q_idx;
+ rxq = &vinfo->rxq[q_idx];
+ if (unlikely(!netif_oper_up(vinfo->netdev))) {
+ vinfo->stats[q_idx].rx_drop_state++;
+ skb_queue_purge(&rxq->skbq);
+ return;
+ }
+
+ if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
+ vinfo->stats[q_idx].netstats.rx_fifo_errors++;
+ return;
+ }
+
+ skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
+ if (unlikely(!skb)) {
+ vinfo->stats[q_idx].netstats.rx_fifo_errors++;
+ return;
+ }
+
+ memcpy(skb->data, packet->ebuf, packet->tlen);
+ skb_put(skb, packet->tlen);
+ skb_queue_tail(&rxq->skbq, skb);
+
+ if (napi_schedule_prep(&rxq->napi)) {
+ v_dbg("napi %d scheduling\n", q_idx);
+ __napi_schedule(&rxq->napi);
+ }
+}
+
+static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ struct net_device *netdev = vinfo->netdev;
+ int i, rc;
+
+ /* ensure virtual eth switch id is valid */
+ if (!vinfo->vesw_id)
+ return -EINVAL;
+
+ rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id,
+ vinfo->vesw_id + 1, GFP_NOWAIT);
+ if (rc < 0)
+ return rc;
+
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ skb_queue_head_init(&rxq->skbq);
+ napi_enable(&rxq->napi);
+ }
+
+ netif_carrier_on(netdev);
+ netif_tx_start_all_queues(netdev);
+ set_bit(HFI1_VNIC_UP, &vinfo->flags);
+
+ return 0;
+}
+
+static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ u8 i;
+
+ clear_bit(HFI1_VNIC_UP, &vinfo->flags);
+ netif_carrier_off(vinfo->netdev);
+ netif_tx_disable(vinfo->netdev);
+ idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
+
+ /* ensure irqs see the change */
+ hfi1_vnic_synchronize_irq(dd);
+
+ /* remove unread skbs */
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ napi_disable(&rxq->napi);
+ skb_queue_purge(&rxq->skbq);
+ }
+}
+
+static int hfi1_netdev_open(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ int rc;
+
+ mutex_lock(&vinfo->lock);
+ rc = hfi1_vnic_up(vinfo);
+ mutex_unlock(&vinfo->lock);
+ return rc;
+}
+
+static int hfi1_netdev_close(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ mutex_lock(&vinfo->lock);
+ if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
+ hfi1_vnic_down(vinfo);
+ mutex_unlock(&vinfo->lock);
+ return 0;
+}
+
+static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata **vnic_ctxt)
+{
+ int rc;
+
+ rc = allocate_vnic_ctxt(dd, vnic_ctxt);
+ if (rc) {
+ dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
+ return rc;
+ }
+
+ rc = setup_vnic_ctxt(dd, *vnic_ctxt);
+ if (rc) {
+ dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
+ deallocate_vnic_ctxt(dd, *vnic_ctxt);
+ *vnic_ctxt = NULL;
+ }
+
+ return rc;
+}
+
+static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ int i, rc = 0;
+
+ mutex_lock(&hfi1_mutex);
+ if (!dd->vnic.num_vports) {
+ rc = hfi1_vnic_txreq_init(dd);
+ if (rc)
+ goto txreq_fail;
+
+ dd->vnic.msix_idx = dd->first_dyn_msix_idx;
+ }
+
+ for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
+ rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
+ if (rc)
+ break;
+ dd->vnic.ctxt[i]->vnic_q_idx = i;
+ }
+
+ if (i < vinfo->num_rx_q) {
+ /*
+ * If required amount of contexts is not
+ * allocated successfully then remaining contexts
+ * are released.
+ */
+ while (i-- > dd->vnic.num_ctxt) {
+ deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+ dd->vnic.ctxt[i] = NULL;
+ }
+ goto alloc_fail;
+ }
+
+ if (dd->vnic.num_ctxt != i) {
+ dd->vnic.num_ctxt = i;
+ hfi1_init_vnic_rsm(dd);
+ }
+
+ dd->vnic.num_vports++;
+ hfi1_vnic_sdma_init(vinfo);
+alloc_fail:
+ if (!dd->vnic.num_vports)
+ hfi1_vnic_txreq_deinit(dd);
+txreq_fail:
+ mutex_unlock(&hfi1_mutex);
+ return rc;
+}
+
+static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ int i;
+
+ mutex_lock(&hfi1_mutex);
+ if (--dd->vnic.num_vports == 0) {
+ for (i = 0; i < dd->vnic.num_ctxt; i++) {
+ deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+ dd->vnic.ctxt[i] = NULL;
+ }
+ hfi1_deinit_vnic_rsm(dd);
+ dd->vnic.num_ctxt = 0;
+ hfi1_vnic_txreq_deinit(dd);
+ }
+ mutex_unlock(&hfi1_mutex);
+}
+
+static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ bool reopen = false;
+
+ /*
+ * If vesw_id is being changed, and if the vnic port is up,
+ * reset the vnic port to ensure new vesw_id gets picked up
+ */
+ if (id != vinfo->vesw_id) {
+ mutex_lock(&vinfo->lock);
+ if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
+ hfi1_vnic_down(vinfo);
+ reopen = true;
+ }
+
+ vinfo->vesw_id = id;
+ if (reopen)
+ hfi1_vnic_up(vinfo);
+
+ mutex_unlock(&vinfo->lock);
+ }
+}
+
+/* netdev ops */
+static const struct net_device_ops hfi1_netdev_ops = {
+ .ndo_open = hfi1_netdev_open,
+ .ndo_stop = hfi1_netdev_close,
+ .ndo_start_xmit = hfi1_netdev_start_xmit,
+ .ndo_select_queue = hfi1_vnic_select_queue,
+ .ndo_get_stats64 = hfi1_vnic_get_stats64,
+};
+
+static void hfi1_vnic_free_rn(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ hfi1_vnic_deinit(vinfo);
+ mutex_destroy(&vinfo->lock);
+ free_netdev(netdev);
+}
+
+struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
+ u8 port_num,
+ enum rdma_netdev_t type,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *))
+{
+ struct hfi1_devdata *dd = dd_from_ibdev(device);
+ struct hfi1_vnic_vport_info *vinfo;
+ struct net_device *netdev;
+ struct rdma_netdev *rn;
+ int i, size, rc;
+
+ if (!port_num || (port_num > dd->num_pports))
+ return ERR_PTR(-EINVAL);
+
+ if (type != RDMA_NETDEV_OPA_VNIC)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
+ netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
+ dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT);
+ if (!netdev)
+ return ERR_PTR(-ENOMEM);
+
+ rn = netdev_priv(netdev);
+ vinfo = opa_vnic_dev_priv(netdev);
+ vinfo->dd = dd;
+ vinfo->num_tx_q = dd->chip_sdma_engines;
+ vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT;
+ vinfo->netdev = netdev;
+ rn->free_rdma_netdev = hfi1_vnic_free_rn;
+ rn->set_id = hfi1_vnic_set_vesw_id;
+
+ netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
+ netdev->hw_features = netdev->features;
+ netdev->vlan_features = netdev->features;
+ netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
+ netdev->netdev_ops = &hfi1_netdev_ops;
+ mutex_init(&vinfo->lock);
+
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ rxq->idx = i;
+ rxq->vinfo = vinfo;
+ rxq->netdev = netdev;
+ netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
+ }
+
+ rc = hfi1_vnic_init(vinfo);
+ if (rc)
+ goto init_fail;
+
+ return netdev;
+init_fail:
+ mutex_destroy(&vinfo->lock);
+ free_netdev(netdev);
+ return ERR_PTR(rc);
+}
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
new file mode 100644
index 000000000000..51a817d3aa14
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for VNIC SDMA functionality
+ */
+
+#include "sdma.h"
+#include "vnic.h"
+
+#define HFI1_VNIC_SDMA_Q_ACTIVE BIT(0)
+#define HFI1_VNIC_SDMA_Q_DEFERRED BIT(1)
+
+#define HFI1_VNIC_TXREQ_NAME_LEN 32
+#define HFI1_VNIC_SDMA_DESC_WTRMRK 64
+#define HFI1_VNIC_SDMA_RETRY_COUNT 1
+
+/*
+ * struct vnic_txreq - VNIC transmit descriptor
+ * @txreq: sdma transmit request
+ * @sdma: vnic sdma pointer
+ * @skb: skb to send
+ * @pad: pad buffer
+ * @plen: pad length
+ * @pbc_val: pbc value
+ * @retry_count: tx retry count
+ */
+struct vnic_txreq {
+ struct sdma_txreq txreq;
+ struct hfi1_vnic_sdma *sdma;
+
+ struct sk_buff *skb;
+ unsigned char pad[HFI1_VNIC_MAX_PAD];
+ u16 plen;
+ __le64 pbc_val;
+
+ u32 retry_count;
+};
+
+static void vnic_sdma_complete(struct sdma_txreq *txreq,
+ int status)
+{
+ struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
+ struct hfi1_vnic_sdma *vnic_sdma = tx->sdma;
+
+ sdma_txclean(vnic_sdma->dd, txreq);
+ dev_kfree_skb_any(tx->skb);
+ kmem_cache_free(vnic_sdma->dd->vnic.txreq_cache, tx);
+}
+
+static noinline int build_vnic_ulp_payload(struct sdma_engine *sde,
+ struct vnic_txreq *tx)
+{
+ int i, ret = 0;
+
+ ret = sdma_txadd_kvaddr(
+ sde->dd,
+ &tx->txreq,
+ tx->skb->data,
+ skb_headlen(tx->skb));
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ for (i = 0; i < skb_shinfo(tx->skb)->nr_frags; i++) {
+ struct skb_frag_struct *frag = &skb_shinfo(tx->skb)->frags[i];
+
+ /* combine physically continuous fragments later? */
+ ret = sdma_txadd_page(sde->dd,
+ &tx->txreq,
+ skb_frag_page(frag),
+ frag->page_offset,
+ skb_frag_size(frag));
+ if (unlikely(ret))
+ goto bail_txadd;
+ }
+
+ if (tx->plen)
+ ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq,
+ tx->pad + HFI1_VNIC_MAX_PAD - tx->plen,
+ tx->plen);
+
+bail_txadd:
+ return ret;
+}
+
+static int build_vnic_tx_desc(struct sdma_engine *sde,
+ struct vnic_txreq *tx,
+ u64 pbc)
+{
+ int ret = 0;
+ u16 hdrbytes = 2 << 2; /* PBC */
+
+ ret = sdma_txinit_ahg(
+ &tx->txreq,
+ 0,
+ hdrbytes + tx->skb->len + tx->plen,
+ 0,
+ 0,
+ NULL,
+ 0,
+ vnic_sdma_complete);
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ /* add pbc */
+ tx->pbc_val = cpu_to_le64(pbc);
+ ret = sdma_txadd_kvaddr(
+ sde->dd,
+ &tx->txreq,
+ &tx->pbc_val,
+ hdrbytes);
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ /* add the ulp payload */
+ ret = build_vnic_ulp_payload(sde, tx);
+bail_txadd:
+ return ret;
+}
+
+/* setup the last plen bypes of pad */
+static inline void hfi1_vnic_update_pad(unsigned char *pad, u8 plen)
+{
+ pad[HFI1_VNIC_MAX_PAD - 1] = plen - OPA_VNIC_ICRC_TAIL_LEN;
+}
+
+int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen)
+{
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx];
+ struct sdma_engine *sde = vnic_sdma->sde;
+ struct vnic_txreq *tx;
+ int ret = -ECOMM;
+
+ if (unlikely(READ_ONCE(vnic_sdma->state) != HFI1_VNIC_SDMA_Q_ACTIVE))
+ goto tx_err;
+
+ if (unlikely(!sde || !sdma_running(sde)))
+ goto tx_err;
+
+ tx = kmem_cache_alloc(dd->vnic.txreq_cache, GFP_ATOMIC);
+ if (unlikely(!tx)) {
+ ret = -ENOMEM;
+ goto tx_err;
+ }
+
+ tx->sdma = vnic_sdma;
+ tx->skb = skb;
+ hfi1_vnic_update_pad(tx->pad, plen);
+ tx->plen = plen;
+ ret = build_vnic_tx_desc(sde, tx, pbc);
+ if (unlikely(ret))
+ goto free_desc;
+ tx->retry_count = 0;
+
+ ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq);
+ /* When -ECOMM, sdma callback will be called with ABORT status */
+ if (unlikely(ret && unlikely(ret != -ECOMM)))
+ goto free_desc;
+
+ return ret;
+
+free_desc:
+ sdma_txclean(dd, &tx->txreq);
+ kmem_cache_free(dd->vnic.txreq_cache, tx);
+tx_err:
+ if (ret != -EBUSY)
+ dev_kfree_skb_any(skb);
+ return ret;
+}
+
+/*
+ * hfi1_vnic_sdma_sleep - vnic sdma sleep function
+ *
+ * This function gets called from sdma_send_txreq() when there are not enough
+ * sdma descriptors available to send the packet. It adds Tx queue's wait
+ * structure to sdma engine's dmawait list to be woken up when descriptors
+ * become available.
+ */
+static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
+ struct iowait *wait,
+ struct sdma_txreq *txreq,
+ unsigned int seq)
+{
+ struct hfi1_vnic_sdma *vnic_sdma =
+ container_of(wait, struct hfi1_vnic_sdma, wait);
+ struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
+ struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
+
+ if (sdma_progress(sde, seq, txreq))
+ if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT)
+ return -EAGAIN;
+
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
+ write_seqlock(&dev->iowait_lock);
+ if (list_empty(&vnic_sdma->wait.list))
+ list_add_tail(&vnic_sdma->wait.list, &sde->dmawait);
+ write_sequnlock(&dev->iowait_lock);
+ return -EBUSY;
+}
+
+/*
+ * hfi1_vnic_sdma_wakeup - vnic sdma wakeup function
+ *
+ * This function gets called when SDMA descriptors becomes available and Tx
+ * queue's wait structure was previously added to sdma engine's dmawait list.
+ * It notifies the upper driver about Tx queue wakeup.
+ */
+static void hfi1_vnic_sdma_wakeup(struct iowait *wait, int reason)
+{
+ struct hfi1_vnic_sdma *vnic_sdma =
+ container_of(wait, struct hfi1_vnic_sdma, wait);
+ struct hfi1_vnic_vport_info *vinfo = vnic_sdma->vinfo;
+
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE;
+ if (__netif_subqueue_stopped(vinfo->netdev, vnic_sdma->q_idx))
+ netif_wake_subqueue(vinfo->netdev, vnic_sdma->q_idx);
+};
+
+inline bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx)
+{
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx];
+
+ return (READ_ONCE(vnic_sdma->state) == HFI1_VNIC_SDMA_Q_ACTIVE);
+}
+
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
+{
+ int i;
+
+ for (i = 0; i < vinfo->num_tx_q; i++) {
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i];
+
+ iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep,
+ hfi1_vnic_sdma_wakeup, NULL);
+ vnic_sdma->sde = &vinfo->dd->per_sdma[i];
+ vnic_sdma->dd = vinfo->dd;
+ vnic_sdma->vinfo = vinfo;
+ vnic_sdma->q_idx = i;
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE;
+
+ /* Add a free descriptor watermark for wakeups */
+ if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) {
+ INIT_LIST_HEAD(&vnic_sdma->stx.list);
+ vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK;
+ list_add_tail(&vnic_sdma->stx.list,
+ &vnic_sdma->wait.tx_head);
+ }
+ }
+}
+
+static void hfi1_vnic_txreq_kmem_cache_ctor(void *obj)
+{
+ struct vnic_txreq *tx = (struct vnic_txreq *)obj;
+
+ memset(tx, 0, sizeof(*tx));
+}
+
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd)
+{
+ char buf[HFI1_VNIC_TXREQ_NAME_LEN];
+
+ snprintf(buf, sizeof(buf), "hfi1_%u_vnic_txreq_cache", dd->unit);
+ dd->vnic.txreq_cache = kmem_cache_create(buf,
+ sizeof(struct vnic_txreq),
+ 0, SLAB_HWCACHE_ALIGN,
+ hfi1_vnic_txreq_kmem_cache_ctor);
+ if (!dd->vnic.txreq_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd)
+{
+ kmem_cache_destroy(dd->vnic.txreq_cache);
+ dd->vnic.txreq_cache = NULL;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 0ac294db3b29..f78a733a63ec 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -39,7 +39,8 @@
#define HNS_ROCE_VLAN_SL_BIT_MASK 7
#define HNS_ROCE_VLAN_SL_SHIFT 13
-struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr,
+struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device);
@@ -48,6 +49,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr,
struct hns_roce_ah *ah;
u16 vlan_tag = 0xffff;
struct in6_addr in6;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
union ib_gid sgid;
int ret;
@@ -56,15 +58,20 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr,
return ERR_PTR(-ENOMEM);
/* Get mac address */
- memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(ah_attr->grh.dgid.raw));
- if (rdma_is_multicast_addr(&in6))
+ memcpy(&in6, grh->dgid.raw, sizeof(grh->dgid.raw));
+ if (rdma_is_multicast_addr(&in6)) {
rdma_get_mcast_mac(&in6, ah->av.mac);
- else
- memcpy(ah->av.mac, ah_attr->dmac, sizeof(ah_attr->dmac));
+ } else {
+ u8 *dmac = rdma_ah_retrieve_dmac(ah_attr);
+
+ if (!dmac)
+ return ERR_PTR(-EINVAL);
+ memcpy(ah->av.mac, dmac, ETH_ALEN);
+ }
/* Get source gid */
- ret = ib_get_cached_gid(ibpd->device, ah_attr->port_num,
- ah_attr->grh.sgid_index, &sgid, &gid_attr);
+ ret = ib_get_cached_gid(ibpd->device, rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index, &sgid, &gid_attr);
if (ret) {
dev_err(dev, "get sgid failed! ret = %d\n", ret);
kfree(ah);
@@ -78,45 +85,46 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr,
}
if (vlan_tag < 0x1000)
- vlan_tag |= (ah_attr->sl & HNS_ROCE_VLAN_SL_BIT_MASK) <<
+ vlan_tag |= (rdma_ah_get_sl(ah_attr) &
+ HNS_ROCE_VLAN_SL_BIT_MASK) <<
HNS_ROCE_VLAN_SL_SHIFT;
- ah->av.port_pd = cpu_to_be32(to_hr_pd(ibpd)->pdn | (ah_attr->port_num <<
+ ah->av.port_pd = cpu_to_be32(to_hr_pd(ibpd)->pdn |
+ (rdma_ah_get_port_num(ah_attr) <<
HNS_ROCE_PORT_NUM_SHIFT));
- ah->av.gid_index = ah_attr->grh.sgid_index;
+ ah->av.gid_index = grh->sgid_index;
ah->av.vlan = cpu_to_le16(vlan_tag);
dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index,
ah->av.vlan);
- if (ah_attr->static_rate)
+ if (rdma_ah_get_static_rate(ah_attr))
ah->av.stat_rate = IB_RATE_10_GBPS;
- memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, HNS_ROCE_GID_SIZE);
- ah->av.sl_tclass_flowlabel = cpu_to_le32(ah_attr->sl <<
+ memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE);
+ ah->av.sl_tclass_flowlabel = cpu_to_le32(rdma_ah_get_sl(ah_attr) <<
HNS_ROCE_SL_SHIFT);
return &ah->ibah;
}
-int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct hns_roce_ah *ah = to_hr_ah(ibah);
memset(ah_attr, 0, sizeof(*ah_attr));
- ah_attr->sl = le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
- HNS_ROCE_SL_SHIFT;
- ah_attr->port_num = le32_to_cpu(ah->av.port_pd) >>
- HNS_ROCE_PORT_NUM_SHIFT;
- ah_attr->static_rate = ah->av.stat_rate;
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.traffic_class = le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
- HNS_ROCE_TCLASS_SHIFT;
- ah_attr->grh.flow_label = le32_to_cpu(ah->av.sl_tclass_flowlabel) &
- HNS_ROCE_FLOW_LABLE_MASK;
- ah_attr->grh.hop_limit = ah->av.hop_limit;
- ah_attr->grh.sgid_index = ah->av.gid_index;
- memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, HNS_ROCE_GID_SIZE);
+ rdma_ah_set_sl(ah_attr, (le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
+ HNS_ROCE_SL_SHIFT));
+ rdma_ah_set_port_num(ah_attr, (le32_to_cpu(ah->av.port_pd) >>
+ HNS_ROCE_PORT_NUM_SHIFT));
+ rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate);
+ rdma_ah_set_grh(ah_attr, NULL,
+ (le32_to_cpu(ah->av.sl_tclass_flowlabel) &
+ HNS_ROCE_FLOW_LABLE_MASK), ah->av.gid_index,
+ ah->av.hop_limit,
+ (le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
+ HNS_ROCE_TCLASS_SHIFT));
+ rdma_ah_set_dgid_raw(ah_attr, ah->av.dgid);
return 0;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 8c1f7a6f84d2..b94dcd823ad1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -299,9 +299,9 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev)
struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd;
int i;
- hr_cmd->context = kmalloc(hr_cmd->max_cmds *
- sizeof(struct hns_roce_cmd_context),
- GFP_KERNEL);
+ hr_cmd->context = kmalloc_array(hr_cmd->max_cmds,
+ sizeof(*hr_cmd->context),
+ GFP_KERNEL);
if (!hr_cmd->context)
return -ENOMEM;
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 589496c8fb9e..b89fd711019e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -219,8 +219,7 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
return PTR_ERR(*umem);
ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem),
- ilog2((unsigned int)(*umem)->page_size),
- &buf->hr_mtt);
+ (*umem)->page_shift, &buf->hr_mtt);
if (ret)
goto err_buf;
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 1a6cb5d7a0dd..e493a61e14e1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -687,9 +687,10 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
unsigned long obj, int cnt,
int rr);
-struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+struct ib_ah *hns_roce_create_ah(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata);
-int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
int hns_roce_destroy_ah(struct ib_ah *ah);
struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index b8111b0c8877..23fad6d96944 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -33,6 +33,7 @@
#include <linux/platform_device.h>
#include <linux/acpi.h>
#include <linux/etherdevice.h>
+#include <linux/of.h>
#include <rdma/ib_umem.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
@@ -227,14 +228,14 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_RDMA_READ:
ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_READ;
- set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
- atomic_wr(wr)->rkey);
+ set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
break;
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_WRITE;
- set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
- atomic_wr(wr)->rkey);
+ set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
break;
case IB_WR_SEND:
case IB_WR_SEND_WITH_INV:
@@ -657,11 +658,14 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
struct hns_roce_qp *hr_qp;
struct ib_cq *cq;
struct ib_pd *pd;
+ union ib_gid dgid;
u64 subnet_prefix;
int attr_mask = 0;
- int i;
+ int i, j;
int ret;
+ u8 queue_en[HNS_ROCE_V1_RESV_QP] = { 0 };
u8 phy_port;
+ u8 port = 0;
u8 sl;
priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
@@ -707,15 +711,27 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
attr.rnr_retry = 7;
attr.timeout = 0x12;
attr.path_mtu = IB_MTU_256;
- attr.ah_attr.ah_flags = 1;
- attr.ah_attr.static_rate = 3;
- attr.ah_attr.grh.sgid_index = 0;
- attr.ah_attr.grh.hop_limit = 1;
- attr.ah_attr.grh.flow_label = 0;
- attr.ah_attr.grh.traffic_class = 0;
+ attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ rdma_ah_set_grh(&attr.ah_attr, NULL, 0, 0, 1, 0);
+ rdma_ah_set_static_rate(&attr.ah_attr, 3);
subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
+ phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) :
+ (i % HNS_ROCE_MAX_PORTS);
+ sl = i / HNS_ROCE_MAX_PORTS;
+
+ for (j = 0; j < caps->num_ports; j++) {
+ if (hr_dev->iboe.phy_port[j] == phy_port) {
+ queue_en[i] = 1;
+ port = j;
+ break;
+ }
+ }
+
+ if (!queue_en[i])
+ continue;
+
free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd);
if (IS_ERR(free_mr->mr_free_qp[i])) {
dev_err(dev, "Create loop qp failed!\n");
@@ -723,15 +739,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
}
hr_qp = free_mr->mr_free_qp[i];
- sl = i / caps->num_ports;
-
- if (caps->num_ports == HNS_ROCE_MAX_PORTS)
- phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) :
- (i % caps->num_ports);
- else
- phy_port = i % caps->num_ports;
-
- hr_qp->port = phy_port + 1;
+ hr_qp->port = port;
hr_qp->phy_port = phy_port;
hr_qp->ibqp.qp_type = IB_QPT_RC;
hr_qp->ibqp.device = &hr_dev->ib_dev;
@@ -741,25 +749,22 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
hr_qp->ibqp.recv_cq = cq;
hr_qp->ibqp.send_cq = cq;
- attr.ah_attr.port_num = phy_port + 1;
- attr.ah_attr.sl = sl;
- attr.port_num = phy_port + 1;
+ rdma_ah_set_port_num(&attr.ah_attr, port + 1);
+ rdma_ah_set_sl(&attr.ah_attr, sl);
+ attr.port_num = port + 1;
attr.dest_qp_num = hr_qp->qpn;
- memcpy(attr.ah_attr.dmac, hr_dev->dev_addr[phy_port],
+ memcpy(rdma_ah_retrieve_dmac(&attr.ah_attr),
+ hr_dev->dev_addr[port],
MAC_ADDR_OCTET_NUM);
- memcpy(attr.ah_attr.grh.dgid.raw,
- &subnet_prefix, sizeof(u64));
- memcpy(&attr.ah_attr.grh.dgid.raw[8],
- hr_dev->dev_addr[phy_port], 3);
- memcpy(&attr.ah_attr.grh.dgid.raw[13],
- hr_dev->dev_addr[phy_port] + 3, 3);
- attr.ah_attr.grh.dgid.raw[11] = 0xff;
- attr.ah_attr.grh.dgid.raw[12] = 0xfe;
- attr.ah_attr.grh.dgid.raw[8] ^= 2;
-
- attr_mask |= IB_QP_PORT;
+ memcpy(&dgid.raw, &subnet_prefix, sizeof(u64));
+ memcpy(&dgid.raw[8], hr_dev->dev_addr[port], 3);
+ memcpy(&dgid.raw[13], hr_dev->dev_addr[port] + 3, 3);
+ dgid.raw[11] = 0xff;
+ dgid.raw[12] = 0xfe;
+ dgid.raw[8] ^= 2;
+ rdma_ah_set_dgid_raw(&attr.ah_attr, dgid.raw);
ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
IB_QPS_RESET, IB_QPS_INIT);
@@ -816,6 +821,9 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
hr_qp = free_mr->mr_free_qp[i];
+ if (!hr_qp)
+ continue;
+
ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp);
if (ret)
dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n",
@@ -967,7 +975,7 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies;
int i;
int ret;
- int ne;
+ int ne = 0;
mr_work = container_of(work, struct hns_roce_mr_free_work, work);
hr_mr = (struct hns_roce_mr *)mr_work->mr;
@@ -980,6 +988,10 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
hr_qp = free_mr->mr_free_qp[i];
+ if (!hr_qp)
+ continue;
+ ne++;
+
ret = hns_roce_v1_send_lp_wqe(hr_qp);
if (ret) {
dev_err(dev,
@@ -989,7 +1001,6 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
}
}
- ne = HNS_ROCE_V1_RESV_QP;
do {
ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc);
if (ret < 0) {
@@ -999,7 +1010,8 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
goto free_work;
}
ne -= ret;
- msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
+ usleep_range(HNS_ROCE_V1_FREE_MR_WAIT_VALUE * 1000,
+ (1 + HNS_ROCE_V1_FREE_MR_WAIT_VALUE) * 1000);
} while (ne && time_before_eq(jiffies, end));
if (ne != 0)
@@ -1725,7 +1737,7 @@ int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
roce_set_field(mpt_entry->mpt_byte_64, MPT_BYTE_64_L_KEY_IDX_H_M,
MPT_BYTE_64_L_KEY_IDX_H_S, mtpt_idx >> MTPT_IDX_SHIFT);
- /* DMA momery regsiter */
+ /* DMA memory register */
if (mr->type == MR_TYPE_DMA)
return 0;
@@ -1851,6 +1863,7 @@ void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
u32 doorbell[2];
doorbell[0] = cons_index & ((hr_cq->cq_depth << 1) - 1);
+ doorbell[1] = 0;
roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3);
@@ -2184,7 +2197,7 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq,
}
wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
++wq->tail;
- } else {
+ } else {
/* RQ conrespond to CQE */
wc->byte_len = le32_to_cpu(cqe->byte_cnt);
opcode = roce_get_field(cqe->cqe_byte_4,
@@ -2565,6 +2578,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct device *dev = &hr_dev->pdev->dev;
struct hns_roce_qp_context *context;
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
dma_addr_t dma_handle_2 = 0;
dma_addr_t dma_handle = 0;
uint32_t doorbell[2] = {0};
@@ -2573,6 +2587,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
int ret = -EINVAL;
u64 *mtts = NULL;
int port;
+ u8 port_num;
u8 *dmac;
u8 *smac;
@@ -2739,7 +2754,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
goto out;
}
- dmac = (u8 *)attr->ah_attr.dmac;
+ dmac = (u8 *)attr->ah_attr.roce.dmac;
context->sq_rq_bt_l = (u32)(dma_handle);
roce_set_field(context->qpc_bytes_24,
@@ -2780,7 +2795,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
roce_set_bit(context->qpc_bytes_32,
QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S,
- attr->ah_attr.ah_flags);
+ rdma_ah_get_ah_flags(&attr->ah_attr));
roce_set_field(context->qpc_bytes_32,
QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M,
QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S,
@@ -2792,12 +2807,13 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
attr->dest_qp_num);
/* Configure GID index */
+ port_num = rdma_ah_get_port_num(&attr->ah_attr);
roce_set_field(context->qpc_bytes_36,
QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M,
QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S,
- hns_get_gid_index(hr_dev,
- attr->ah_attr.port_num - 1,
- attr->ah_attr.grh.sgid_index));
+ hns_get_gid_index(hr_dev,
+ port_num - 1,
+ grh->sgid_index));
memcpy(&(context->dmac_l), dmac, 4);
@@ -2808,26 +2824,26 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
roce_set_field(context->qpc_bytes_44,
QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M,
QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S,
- attr->ah_attr.static_rate);
+ rdma_ah_get_static_rate(&attr->ah_attr));
roce_set_field(context->qpc_bytes_44,
QP_CONTEXT_QPC_BYTES_44_HOPLMT_M,
QP_CONTEXT_QPC_BYTES_44_HOPLMT_S,
- attr->ah_attr.grh.hop_limit);
+ grh->hop_limit);
roce_set_field(context->qpc_bytes_48,
QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M,
QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S,
- attr->ah_attr.grh.flow_label);
+ grh->flow_label);
roce_set_field(context->qpc_bytes_48,
QP_CONTEXT_QPC_BYTES_48_TCLASS_M,
QP_CONTEXT_QPC_BYTES_48_TCLASS_S,
- attr->ah_attr.grh.traffic_class);
+ grh->traffic_class);
roce_set_field(context->qpc_bytes_48,
QP_CONTEXT_QPC_BYTES_48_MTU_M,
QP_CONTEXT_QPC_BYTES_48_MTU_S, attr->path_mtu);
- memcpy(context->dgid, attr->ah_attr.grh.dgid.raw,
- sizeof(attr->ah_attr.grh.dgid.raw));
+ memcpy(context->dgid, grh->dgid.raw,
+ sizeof(grh->dgid.raw));
dev_dbg(dev, "dmac:%x :%lx\n", context->dmac_l,
roce_get_field(context->qpc_bytes_44,
@@ -2907,8 +2923,9 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
hr_qp->phy_port);
roce_set_field(context->qpc_bytes_156,
QP_CONTEXT_QPC_BYTES_156_SL_M,
- QP_CONTEXT_QPC_BYTES_156_SL_S, attr->ah_attr.sl);
- hr_qp->sl = attr->ah_attr.sl;
+ QP_CONTEXT_QPC_BYTES_156_SL_S,
+ rdma_ah_get_sl(&attr->ah_attr));
+ hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
} else if (cur_state == IB_QPS_RTR &&
new_state == IB_QPS_RTS) {
/* If exist optional param, return error */
@@ -3019,8 +3036,9 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
hr_qp->phy_port);
roce_set_field(context->qpc_bytes_156,
QP_CONTEXT_QPC_BYTES_156_SL_M,
- QP_CONTEXT_QPC_BYTES_156_SL_S, attr->ah_attr.sl);
- hr_qp->sl = attr->ah_attr.sl;
+ QP_CONTEXT_QPC_BYTES_156_SL_S,
+ rdma_ah_get_sl(&attr->ah_attr));
+ hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
roce_set_field(context->qpc_bytes_156,
QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M,
QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S,
@@ -3355,28 +3373,33 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
if (hr_qp->ibqp.qp_type == IB_QPT_RC ||
hr_qp->ibqp.qp_type == IB_QPT_UC) {
- qp_attr->ah_attr.sl = roce_get_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_SL_M,
- QP_CONTEXT_QPC_BYTES_156_SL_S);
- qp_attr->ah_attr.grh.flow_label = roce_get_field(
- context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S);
- qp_attr->ah_attr.grh.sgid_index = roce_get_field(
- context->qpc_bytes_36,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S);
- qp_attr->ah_attr.grh.hop_limit = roce_get_field(
- context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_M,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_S);
- qp_attr->ah_attr.grh.traffic_class = roce_get_field(
- context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_M,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_S);
-
- memcpy(qp_attr->ah_attr.grh.dgid.raw, context->dgid,
- sizeof(qp_attr->ah_attr.grh.dgid.raw));
+ struct ib_global_route *grh =
+ rdma_ah_retrieve_grh(&qp_attr->ah_attr);
+
+ rdma_ah_set_sl(&qp_attr->ah_attr,
+ roce_get_field(context->qpc_bytes_156,
+ QP_CONTEXT_QPC_BYTES_156_SL_M,
+ QP_CONTEXT_QPC_BYTES_156_SL_S));
+ rdma_ah_set_ah_flags(&qp_attr->ah_attr, IB_AH_GRH);
+ grh->flow_label =
+ roce_get_field(context->qpc_bytes_48,
+ QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M,
+ QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S);
+ grh->sgid_index =
+ roce_get_field(context->qpc_bytes_36,
+ QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M,
+ QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S);
+ grh->hop_limit =
+ roce_get_field(context->qpc_bytes_44,
+ QP_CONTEXT_QPC_BYTES_44_HOPLMT_M,
+ QP_CONTEXT_QPC_BYTES_44_HOPLMT_S);
+ grh->traffic_class =
+ roce_get_field(context->qpc_bytes_48,
+ QP_CONTEXT_QPC_BYTES_48_TCLASS_M,
+ QP_CONTEXT_QPC_BYTES_48_TCLASS_S);
+
+ memcpy(grh->dgid.raw, context->dgid,
+ sizeof(grh->dgid.raw));
}
qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12,
@@ -3526,10 +3549,12 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
old_cnt = roce_get_field(old_send,
ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S);
- if (cur_cnt - old_cnt > SDB_ST_CMP_VAL)
+ if (cur_cnt - old_cnt >
+ SDB_ST_CMP_VAL) {
success_flags = 1;
- else {
- send_ptr = roce_get_field(old_send,
+ } else {
+ send_ptr =
+ roce_get_field(old_send,
ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
roce_get_field(sdb_retry_cnt,
@@ -3634,6 +3659,7 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work)
struct hns_roce_dev *hr_dev;
struct hns_roce_qp *hr_qp;
struct device *dev;
+ unsigned long qpn;
int ret;
qp_work_entry = container_of(work, struct hns_roce_qp_work, work);
@@ -3641,8 +3667,9 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work)
dev = &hr_dev->pdev->dev;
priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
hr_qp = qp_work_entry->qp;
+ qpn = hr_qp->qpn;
- dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", hr_qp->qpn);
+ dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", qpn);
qp_work_entry->sche_cnt++;
@@ -3653,7 +3680,7 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work)
&qp_work_entry->db_wait_stage);
if (ret) {
dev_err(dev, "Check QP(0x%lx) db process status failed!\n",
- hr_qp->qpn);
+ qpn);
return;
}
@@ -3667,7 +3694,7 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work)
ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state,
IB_QPS_RESET);
if (ret) {
- dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", hr_qp->qpn);
+ dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", qpn);
return;
}
@@ -3676,14 +3703,14 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work)
if (hr_qp->ibqp.qp_type == IB_QPT_RC) {
/* RC QP, release QPN */
- hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
+ hns_roce_release_range_qp(hr_dev, qpn, 1);
kfree(hr_qp);
} else
kfree(hr_to_hr_sqp(hr_qp));
kfree(qp_work_entry);
- dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", hr_qp->qpn);
+ dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", qpn);
}
int hns_roce_v1_destroy_qp(struct ib_qp *ibqp)
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 4953d9cb83a7..d9777b662eba 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -32,6 +32,7 @@
*/
#include <linux/acpi.h>
#include <linux/of_platform.h>
+#include <linux/module.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
@@ -124,8 +125,6 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
return -ENODEV;
}
- spin_lock_bh(&hr_dev->iboe.lock);
-
switch (event) {
case NETDEV_UP:
case NETDEV_CHANGE:
@@ -143,7 +142,6 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
break;
}
- spin_unlock_bh(&hr_dev->iboe.lock);
return 0;
}
@@ -249,7 +247,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
assert(port_num > 0);
port = port_num - 1;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = hr_dev->caps.max_mtu;
props->gid_tbl_len = hr_dev->caps.gid_table_len[port];
@@ -400,14 +398,15 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
struct ib_port_attr attr;
int ret;
- ret = hns_roce_query_port(ib_dev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+
+ ret = ib_query_port(ib_dev, port_num, &attr);
if (ret)
return ret;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
@@ -437,7 +436,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->owner = THIS_MODULE;
ib_dev->node_type = RDMA_NODE_IB_CA;
- ib_dev->dma_device = dev;
+ ib_dev->dev.parent = dev;
ib_dev->phys_port_cnt = hr_dev->caps.num_ports;
ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey;
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 4139abee3b54..80fc01ffd8bd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -127,11 +127,12 @@ static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
buddy->max_order = max_order;
spin_lock_init(&buddy->lock);
-
- buddy->bits = kzalloc((buddy->max_order + 1) * sizeof(long *),
- GFP_KERNEL);
- buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof(int *),
- GFP_KERNEL);
+ buddy->bits = kcalloc(buddy->max_order + 1,
+ sizeof(*buddy->bits),
+ GFP_KERNEL);
+ buddy->num_free = kcalloc(buddy->max_order + 1,
+ sizeof(*buddy->num_free),
+ GFP_KERNEL);
if (!buddy->bits || !buddy->num_free)
goto err_out;
@@ -204,7 +205,7 @@ int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
return 0;
}
- /* Note: if page_shift is zero, FAST memory regsiter */
+ /* Note: if page_shift is zero, FAST memory register */
mtt->page_shift = page_shift;
/* Compute MTT entry necessary */
@@ -503,7 +504,8 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> mtt->page_shift;
for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(sg) + umem->page_size * k;
+ pages[i++] = sg_dma_address(sg) +
+ (k << umem->page_shift);
if (i == PAGE_SIZE / sizeof(u64)) {
ret = hns_roce_write_mtt(hr_dev, mtt, n, i,
pages);
@@ -563,9 +565,9 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
n = ib_umem_page_count(mr->umem);
- if (mr->umem->page_size != HNS_ROCE_HEM_PAGE_SIZE) {
- dev_err(dev, "Just support 4K page size but is 0x%x now!\n",
- mr->umem->page_size);
+ if (mr->umem->page_shift != HNS_ROCE_HEM_PAGE_SHIFT) {
+ dev_err(dev, "Just support 4K page size but is 0x%lx now!\n",
+ BIT(mr->umem->page_shift));
ret = -EINVAL;
goto err_umem;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index f036f32f15d3..054c52699090 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -101,7 +101,7 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
event.event = IB_EVENT_QP_ACCESS_ERR;
break;
default:
- dev_dbg(ibqp->device->dma_device, "roce_ib: Unexpected event type %d on QP %06lx\n",
+ dev_dbg(ibqp->device->dev.parent, "roce_ib: Unexpected event type %d on QP %06lx\n",
type, hr_qp->qpn);
return;
}
@@ -437,8 +437,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
}
ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(hr_qp->umem),
- ilog2((unsigned int)hr_qp->umem->page_size),
- &hr_qp->mtt);
+ hr_qp->umem->page_shift, &hr_qp->mtt);
if (ret) {
dev_err(dev, "hns_roce_mtt_init error for create qp\n");
goto err_buf;
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index da2eb5a281fa..9b1566468744 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -527,6 +527,7 @@ enum i40iw_status_code i40iw_add_mac_addr(struct i40iw_device *iwdev,
int i40iw_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq);
+void i40iw_cleanup_pending_cqp_op(struct i40iw_device *iwdev);
void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev);
void i40iw_add_pdusecount(struct i40iw_pd *iwpd);
void i40iw_rem_devusecount(struct i40iw_device *iwdev);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 95a0586a4da8..5a2fa743676c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -784,7 +784,6 @@ static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node,
}
ctrl_ird |= IETF_PEER_TO_PEER;
- ctrl_ird |= IETF_FLPDU_ZERO_LEN;
switch (mpa_key) {
case MPA_KEY_REQUEST:
@@ -2446,8 +2445,8 @@ static void i40iw_handle_rcv_mpa(struct i40iw_cm_node *cm_node,
} else {
type = I40IW_CM_EVENT_CONNECTED;
cm_node->state = I40IW_CM_STATE_OFFLOADED;
- i40iw_send_ack(cm_node);
}
+ i40iw_send_ack(cm_node);
break;
default:
pr_err("%s wrong cm_node state =%d\n", __func__, cm_node->state);
@@ -3184,9 +3183,8 @@ void i40iw_setup_cm_core(struct i40iw_device *iwdev)
INIT_LIST_HEAD(&cm_core->connected_nodes);
INIT_LIST_HEAD(&cm_core->listen_nodes);
- init_timer(&cm_core->tcp_timer);
- cm_core->tcp_timer.function = i40iw_cm_timer_tick;
- cm_core->tcp_timer.data = (unsigned long)cm_core;
+ setup_timer(&cm_core->tcp_timer, i40iw_cm_timer_tick,
+ (unsigned long)cm_core);
spin_lock_init(&cm_core->ht_lock);
spin_lock_init(&cm_core->listen_list_lock);
@@ -3489,7 +3487,8 @@ static void i40iw_cm_disconn_true(struct i40iw_qp *iwqp)
if (((original_hw_tcp_state == I40IW_TCP_STATE_CLOSED) ||
(original_hw_tcp_state == I40IW_TCP_STATE_TIME_WAIT) ||
(last_ae == I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE) ||
- (last_ae == I40IW_AE_LLP_CONNECTION_RESET))) {
+ (last_ae == I40IW_AE_LLP_CONNECTION_RESET) ||
+ iwdev->reset)) {
issue_close = 1;
iwqp->cm_id = NULL;
if (!iwqp->flush_issued) {
@@ -4267,6 +4266,8 @@ void i40iw_cm_disconnect_all(struct i40iw_device *iwdev)
cm_node = container_of(list_node, struct i40iw_cm_node, connected_entry);
attr.qp_state = IB_QPS_ERR;
i40iw_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ if (iwdev->reset)
+ i40iw_cm_disconn(cm_node->iwqp);
i40iw_rem_ref_cm_node(cm_node);
}
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index 98923a8cf86d..9ec1ae9a82c9 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -285,28 +285,20 @@ void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2pa
struct i40iw_sc_dev *dev = vsi->dev;
struct i40iw_sc_qp *qp = NULL;
bool qs_handle_change = false;
- bool mss_change = false;
unsigned long flags;
u16 qs_handle;
int i;
- if (vsi->mss != l2params->mss) {
- mss_change = true;
- vsi->mss = l2params->mss;
- }
+ vsi->mss = l2params->mss;
i40iw_fill_qos_list(l2params->qs_handle_list);
for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
qs_handle = l2params->qs_handle_list[i];
if (vsi->qos[i].qs_handle != qs_handle)
qs_handle_change = true;
- else if (!mss_change)
- continue; /* no MSS nor qs handle change */
spin_lock_irqsave(&vsi->qos[i].lock, flags);
qp = i40iw_get_qp(&vsi->qos[i].qplist, qp);
while (qp) {
- if (mss_change)
- i40iw_qp_mss_modify(dev, qp);
if (qs_handle_change) {
qp->qs_handle = qs_handle;
/* issue cqp suspend command */
@@ -450,6 +442,9 @@ static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp,
u32 cnt = 0, p1, p2, val = 0, err_code;
enum i40iw_status_code ret_code;
+ *maj_err = 0;
+ *min_err = 0;
+
ret_code = i40iw_allocate_dma_mem(cqp->dev->hw,
&cqp->sdbuf,
128,
@@ -1975,6 +1970,8 @@ static enum i40iw_status_code i40iw_sc_ccq_destroy(struct i40iw_sc_cq *ccq,
ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000);
}
+ cqp->process_cqp_sds = i40iw_update_sds_noccq;
+
return ret_code;
}
@@ -2392,7 +2389,6 @@ static enum i40iw_status_code i40iw_sc_qp_modify(
set_64bit_val(wqe,
8,
- LS_64(info->new_mss, I40IW_CQPSQ_QP_NEWMSS) |
LS_64(term_len, I40IW_CQPSQ_QP_TERMLEN));
set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
@@ -2407,7 +2403,6 @@ static enum i40iw_status_code i40iw_sc_qp_modify(
LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) |
LS_64(info->force_loopback, I40IW_CQPSQ_QP_FORCELOOPBACK) |
LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
- LS_64(info->mss_change, I40IW_CQPSQ_QP_MSSCHANGE) |
LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) |
LS_64(info->remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) |
LS_64(term_actions, I40IW_CQPSQ_QP_TERMACT) |
@@ -4498,9 +4493,9 @@ void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *inf
i40iw_fill_qos_list(info->params->qs_handle_list);
for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
- vsi->qos[i].qs_handle =
- info->params->qs_handle_list[i];
- i40iw_debug(vsi->dev, I40IW_DEBUG_DCB, "qset[%d]: %d\n", i, vsi->qos[i].qs_handle);
+ vsi->qos[i].qs_handle = info->params->qs_handle_list[i];
+ i40iw_debug(vsi->dev, I40IW_DEBUG_DCB, "qset[%d]: %d\n", i,
+ vsi->qos[i].qs_handle);
spin_lock_init(&vsi->qos[i].lock);
INIT_LIST_HEAD(&vsi->qos[i].qplist);
}
@@ -4851,46 +4846,46 @@ void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi)
}
static struct i40iw_cqp_ops iw_cqp_ops = {
- i40iw_sc_cqp_init,
- i40iw_sc_cqp_create,
- i40iw_sc_cqp_post_sq,
- i40iw_sc_cqp_get_next_send_wqe,
- i40iw_sc_cqp_destroy,
- i40iw_sc_poll_for_cqp_op_done
+ .cqp_init = i40iw_sc_cqp_init,
+ .cqp_create = i40iw_sc_cqp_create,
+ .cqp_post_sq = i40iw_sc_cqp_post_sq,
+ .cqp_get_next_send_wqe = i40iw_sc_cqp_get_next_send_wqe,
+ .cqp_destroy = i40iw_sc_cqp_destroy,
+ .poll_for_cqp_op_done = i40iw_sc_poll_for_cqp_op_done
};
static struct i40iw_ccq_ops iw_ccq_ops = {
- i40iw_sc_ccq_init,
- i40iw_sc_ccq_create,
- i40iw_sc_ccq_destroy,
- i40iw_sc_ccq_create_done,
- i40iw_sc_ccq_get_cqe_info,
- i40iw_sc_ccq_arm
+ .ccq_init = i40iw_sc_ccq_init,
+ .ccq_create = i40iw_sc_ccq_create,
+ .ccq_destroy = i40iw_sc_ccq_destroy,
+ .ccq_create_done = i40iw_sc_ccq_create_done,
+ .ccq_get_cqe_info = i40iw_sc_ccq_get_cqe_info,
+ .ccq_arm = i40iw_sc_ccq_arm
};
static struct i40iw_ceq_ops iw_ceq_ops = {
- i40iw_sc_ceq_init,
- i40iw_sc_ceq_create,
- i40iw_sc_cceq_create_done,
- i40iw_sc_cceq_destroy_done,
- i40iw_sc_cceq_create,
- i40iw_sc_ceq_destroy,
- i40iw_sc_process_ceq
+ .ceq_init = i40iw_sc_ceq_init,
+ .ceq_create = i40iw_sc_ceq_create,
+ .cceq_create_done = i40iw_sc_cceq_create_done,
+ .cceq_destroy_done = i40iw_sc_cceq_destroy_done,
+ .cceq_create = i40iw_sc_cceq_create,
+ .ceq_destroy = i40iw_sc_ceq_destroy,
+ .process_ceq = i40iw_sc_process_ceq
};
static struct i40iw_aeq_ops iw_aeq_ops = {
- i40iw_sc_aeq_init,
- i40iw_sc_aeq_create,
- i40iw_sc_aeq_destroy,
- i40iw_sc_get_next_aeqe,
- i40iw_sc_repost_aeq_entries,
- i40iw_sc_aeq_create_done,
- i40iw_sc_aeq_destroy_done
+ .aeq_init = i40iw_sc_aeq_init,
+ .aeq_create = i40iw_sc_aeq_create,
+ .aeq_destroy = i40iw_sc_aeq_destroy,
+ .get_next_aeqe = i40iw_sc_get_next_aeqe,
+ .repost_aeq_entries = i40iw_sc_repost_aeq_entries,
+ .aeq_create_done = i40iw_sc_aeq_create_done,
+ .aeq_destroy_done = i40iw_sc_aeq_destroy_done
};
/* iwarp pd ops */
static struct i40iw_pd_ops iw_pd_ops = {
- i40iw_sc_pd_init,
+ .pd_init = i40iw_sc_pd_init,
};
static struct i40iw_priv_qp_ops iw_priv_qp_ops = {
@@ -4909,53 +4904,51 @@ static struct i40iw_priv_qp_ops iw_priv_qp_ops = {
};
static struct i40iw_priv_cq_ops iw_priv_cq_ops = {
- i40iw_sc_cq_init,
- i40iw_sc_cq_create,
- i40iw_sc_cq_destroy,
- i40iw_sc_cq_modify,
+ .cq_init = i40iw_sc_cq_init,
+ .cq_create = i40iw_sc_cq_create,
+ .cq_destroy = i40iw_sc_cq_destroy,
+ .cq_modify = i40iw_sc_cq_modify,
};
static struct i40iw_mr_ops iw_mr_ops = {
- i40iw_sc_alloc_stag,
- i40iw_sc_mr_reg_non_shared,
- i40iw_sc_mr_reg_shared,
- i40iw_sc_dealloc_stag,
- i40iw_sc_query_stag,
- i40iw_sc_mw_alloc
+ .alloc_stag = i40iw_sc_alloc_stag,
+ .mr_reg_non_shared = i40iw_sc_mr_reg_non_shared,
+ .mr_reg_shared = i40iw_sc_mr_reg_shared,
+ .dealloc_stag = i40iw_sc_dealloc_stag,
+ .query_stag = i40iw_sc_query_stag,
+ .mw_alloc = i40iw_sc_mw_alloc
};
static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = {
- i40iw_sc_manage_push_page,
- i40iw_sc_manage_hmc_pm_func_table,
- i40iw_sc_set_hmc_resource_profile,
- i40iw_sc_commit_fpm_values,
- i40iw_sc_query_fpm_values,
- i40iw_sc_static_hmc_pages_allocated,
- i40iw_sc_add_arp_cache_entry,
- i40iw_sc_del_arp_cache_entry,
- i40iw_sc_query_arp_cache_entry,
- i40iw_sc_manage_apbvt_entry,
- i40iw_sc_manage_qhash_table_entry,
- i40iw_sc_alloc_local_mac_ipaddr_entry,
- i40iw_sc_add_local_mac_ipaddr_entry,
- i40iw_sc_del_local_mac_ipaddr_entry,
- i40iw_sc_cqp_nop,
- i40iw_sc_commit_fpm_values_done,
- i40iw_sc_query_fpm_values_done,
- i40iw_sc_manage_hmc_pm_func_table_done,
- i40iw_sc_suspend_qp,
- i40iw_sc_resume_qp
+ .manage_push_page = i40iw_sc_manage_push_page,
+ .manage_hmc_pm_func_table = i40iw_sc_manage_hmc_pm_func_table,
+ .set_hmc_resource_profile = i40iw_sc_set_hmc_resource_profile,
+ .commit_fpm_values = i40iw_sc_commit_fpm_values,
+ .query_fpm_values = i40iw_sc_query_fpm_values,
+ .static_hmc_pages_allocated = i40iw_sc_static_hmc_pages_allocated,
+ .add_arp_cache_entry = i40iw_sc_add_arp_cache_entry,
+ .del_arp_cache_entry = i40iw_sc_del_arp_cache_entry,
+ .query_arp_cache_entry = i40iw_sc_query_arp_cache_entry,
+ .manage_apbvt_entry = i40iw_sc_manage_apbvt_entry,
+ .manage_qhash_table_entry = i40iw_sc_manage_qhash_table_entry,
+ .alloc_local_mac_ipaddr_table_entry = i40iw_sc_alloc_local_mac_ipaddr_entry,
+ .add_local_mac_ipaddr_entry = i40iw_sc_add_local_mac_ipaddr_entry,
+ .del_local_mac_ipaddr_entry = i40iw_sc_del_local_mac_ipaddr_entry,
+ .cqp_nop = i40iw_sc_cqp_nop,
+ .commit_fpm_values_done = i40iw_sc_commit_fpm_values_done,
+ .query_fpm_values_done = i40iw_sc_query_fpm_values_done,
+ .manage_hmc_pm_func_table_done = i40iw_sc_manage_hmc_pm_func_table_done,
+ .update_suspend_qp = i40iw_sc_suspend_qp,
+ .update_resume_qp = i40iw_sc_resume_qp
};
static struct i40iw_hmc_ops iw_hmc_ops = {
- i40iw_sc_init_iw_hmc,
- i40iw_sc_parse_fpm_query_buf,
- i40iw_sc_configure_iw_fpm,
- i40iw_sc_parse_fpm_commit_buf,
- i40iw_sc_create_hmc_obj,
- i40iw_sc_del_hmc_obj,
- NULL,
- NULL
+ .init_iw_hmc = i40iw_sc_init_iw_hmc,
+ .parse_fpm_query_buf = i40iw_sc_parse_fpm_query_buf,
+ .configure_iw_fpm = i40iw_sc_configure_iw_fpm,
+ .parse_fpm_commit_buf = i40iw_sc_parse_fpm_commit_buf,
+ .create_hmc_object = i40iw_sc_create_hmc_obj,
+ .del_hmc_object = i40iw_sc_del_hmc_obj
};
/**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 2728af3103ce..ae8463ff59a7 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -243,6 +243,8 @@ static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp)
if (free_hwcqp)
dev->cqp_ops->cqp_destroy(dev->cqp);
+ i40iw_cleanup_pending_cqp_op(iwdev);
+
i40iw_free_dma_mem(dev->hw, &cqp->sq);
kfree(cqp->scratch_array);
iwdev->cqp.scratch_array = NULL;
@@ -274,13 +276,12 @@ static void i40iw_disable_irq(struct i40iw_sc_dev *dev,
/**
* i40iw_destroy_aeq - destroy aeq
* @iwdev: iwarp device
- * @reset: true if called before reset
*
* Issue a destroy aeq request and
* free the resources associated with the aeq
* The function is called during driver unload
*/
-static void i40iw_destroy_aeq(struct i40iw_device *iwdev, bool reset)
+static void i40iw_destroy_aeq(struct i40iw_device *iwdev)
{
enum i40iw_status_code status = I40IW_ERR_NOT_READY;
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
@@ -288,7 +289,7 @@ static void i40iw_destroy_aeq(struct i40iw_device *iwdev, bool reset)
if (!iwdev->msix_shared)
i40iw_disable_irq(dev, iwdev->iw_msixtbl, (void *)iwdev);
- if (reset)
+ if (iwdev->reset)
goto exit;
if (!dev->aeq_ops->aeq_destroy(&aeq->sc_aeq, 0, 1))
@@ -304,19 +305,17 @@ exit:
* i40iw_destroy_ceq - destroy ceq
* @iwdev: iwarp device
* @iwceq: ceq to be destroyed
- * @reset: true if called before reset
*
* Issue a destroy ceq request and
* free the resources associated with the ceq
*/
static void i40iw_destroy_ceq(struct i40iw_device *iwdev,
- struct i40iw_ceq *iwceq,
- bool reset)
+ struct i40iw_ceq *iwceq)
{
enum i40iw_status_code status;
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- if (reset)
+ if (iwdev->reset)
goto exit;
status = dev->ceq_ops->ceq_destroy(&iwceq->sc_ceq, 0, 1);
@@ -335,12 +334,11 @@ exit:
/**
* i40iw_dele_ceqs - destroy all ceq's
* @iwdev: iwarp device
- * @reset: true if called before reset
*
* Go through all of the device ceq's and for each ceq
* disable the ceq interrupt and destroy the ceq
*/
-static void i40iw_dele_ceqs(struct i40iw_device *iwdev, bool reset)
+static void i40iw_dele_ceqs(struct i40iw_device *iwdev)
{
u32 i = 0;
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
@@ -349,32 +347,31 @@ static void i40iw_dele_ceqs(struct i40iw_device *iwdev, bool reset)
if (iwdev->msix_shared) {
i40iw_disable_irq(dev, msix_vec, (void *)iwdev);
- i40iw_destroy_ceq(iwdev, iwceq, reset);
+ i40iw_destroy_ceq(iwdev, iwceq);
iwceq++;
i++;
}
for (msix_vec++; i < iwdev->ceqs_count; i++, msix_vec++, iwceq++) {
i40iw_disable_irq(dev, msix_vec, (void *)iwceq);
- i40iw_destroy_ceq(iwdev, iwceq, reset);
+ i40iw_destroy_ceq(iwdev, iwceq);
}
}
/**
* i40iw_destroy_ccq - destroy control cq
* @iwdev: iwarp device
- * @reset: true if called before reset
*
* Issue destroy ccq request and
* free the resources associated with the ccq
*/
-static void i40iw_destroy_ccq(struct i40iw_device *iwdev, bool reset)
+static void i40iw_destroy_ccq(struct i40iw_device *iwdev)
{
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
struct i40iw_ccq *ccq = &iwdev->ccq;
enum i40iw_status_code status = 0;
- if (!reset)
+ if (!iwdev->reset)
status = dev->ccq_ops->ccq_destroy(dev->ccq, 0, true);
if (status)
i40iw_pr_err("ccq destroy failed %d\n", status);
@@ -810,7 +807,7 @@ static enum i40iw_status_code i40iw_setup_ceqs(struct i40iw_device *iwdev,
iwceq->msix_idx = msix_vec->idx;
status = i40iw_configure_ceq_vector(iwdev, iwceq, ceq_id, msix_vec);
if (status) {
- i40iw_destroy_ceq(iwdev, iwceq, false);
+ i40iw_destroy_ceq(iwdev, iwceq);
break;
}
i40iw_enable_intr(&iwdev->sc_dev, msix_vec->idx);
@@ -912,7 +909,7 @@ static enum i40iw_status_code i40iw_setup_aeq(struct i40iw_device *iwdev)
status = i40iw_configure_aeq_vector(iwdev);
if (status) {
- i40iw_destroy_aeq(iwdev, false);
+ i40iw_destroy_aeq(iwdev);
return status;
}
@@ -1319,13 +1316,13 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev,
status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_QUERY_FPM_BUF_SIZE,
I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK);
if (status)
- goto exit;
+ goto error;
info.fpm_query_buf_pa = mem.pa;
info.fpm_query_buf = mem.va;
status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_COMMIT_FPM_BUF_SIZE,
I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK);
if (status)
- goto exit;
+ goto error;
info.fpm_commit_buf_pa = mem.pa;
info.fpm_commit_buf = mem.va;
info.hmc_fn_id = ldev->fid;
@@ -1347,11 +1344,9 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev,
info.exception_lan_queue = 1;
info.vchnl_send = i40iw_virtchnl_send;
status = i40iw_device_init(&iwdev->sc_dev, &info);
-exit:
- if (status) {
- kfree(iwdev->hmc_info_mem);
- iwdev->hmc_info_mem = NULL;
- }
+
+ if (status)
+ goto error;
memset(&vsi_info, 0, sizeof(vsi_info));
vsi_info.dev = &iwdev->sc_dev;
vsi_info.back_vsi = (void *)iwdev;
@@ -1362,11 +1357,19 @@ exit:
memset(&stats_info, 0, sizeof(stats_info));
stats_info.fcn_id = ldev->fid;
stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL);
+ if (!stats_info.pestat) {
+ status = I40IW_ERR_NO_MEMORY;
+ goto error;
+ }
stats_info.stats_initialize = true;
if (stats_info.pestat)
i40iw_vsi_stats_init(&iwdev->vsi, &stats_info);
}
return status;
+error:
+ kfree(iwdev->hmc_info_mem);
+ iwdev->hmc_info_mem = NULL;
+ return status;
}
/**
@@ -1436,12 +1439,11 @@ static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev,
/**
* i40iw_deinit_device - clean up the device resources
* @iwdev: iwarp device
- * @reset: true if called before reset
*
* Destroy the ib device interface, remove the mac ip entry and ipv4/ipv6 addresses,
* destroy the device queues and free the pble and the hmc objects
*/
-static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset)
+static void i40iw_deinit_device(struct i40iw_device *iwdev)
{
struct i40e_info *ldev = iwdev->ldev;
@@ -1458,7 +1460,7 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset)
i40iw_destroy_rdma_device(iwdev->iwibdev);
/* fallthrough */
case IP_ADDR_REGISTERED:
- if (!reset)
+ if (!iwdev->reset)
i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
/* fallthrough */
case INET_NOTIFIER:
@@ -1468,26 +1470,26 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset)
unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
}
/* fallthrough */
+ case PBLE_CHUNK_MEM:
+ i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc);
+ /* fallthrough */
case CEQ_CREATED:
- i40iw_dele_ceqs(iwdev, reset);
+ i40iw_dele_ceqs(iwdev);
/* fallthrough */
case AEQ_CREATED:
- i40iw_destroy_aeq(iwdev, reset);
+ i40iw_destroy_aeq(iwdev);
/* fallthrough */
case IEQ_CREATED:
- i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_IEQ, reset);
+ i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_IEQ, iwdev->reset);
/* fallthrough */
case ILQ_CREATED:
- i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_ILQ, reset);
+ i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_ILQ, iwdev->reset);
/* fallthrough */
case CCQ_CREATED:
- i40iw_destroy_ccq(iwdev, reset);
- /* fallthrough */
- case PBLE_CHUNK_MEM:
- i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc);
+ i40iw_destroy_ccq(iwdev);
/* fallthrough */
case HMC_OBJS_CREATED:
- i40iw_del_hmc_objects(dev, dev->hmc_info, true, reset);
+ i40iw_del_hmc_objects(dev, dev->hmc_info, true, iwdev->reset);
/* fallthrough */
case CQP_CREATED:
i40iw_destroy_cqp(iwdev, true);
@@ -1664,6 +1666,7 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc);
if (status)
break;
+ iwdev->init_state = PBLE_CHUNK_MEM;
iwdev->virtchnl_wq = alloc_ordered_workqueue("iwvch", WQ_MEM_RECLAIM);
i40iw_register_notifiers();
iwdev->init_state = INET_NOTIFIER;
@@ -1687,7 +1690,7 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
} while (0);
i40iw_pr_err("status = %d last completion = %d\n", status, iwdev->init_state);
- i40iw_deinit_device(iwdev, false);
+ i40iw_deinit_device(iwdev);
return -ERESTART;
}
@@ -1768,9 +1771,12 @@ static void i40iw_close(struct i40e_info *ldev, struct i40e_client *client, bool
iwdev = &hdl->device;
iwdev->closing = true;
+ if (reset)
+ iwdev->reset = true;
+
i40iw_cm_disconnect_all(iwdev);
destroy_workqueue(iwdev->virtchnl_wq);
- i40iw_deinit_device(iwdev, reset);
+ i40iw_deinit_device(iwdev);
}
/**
@@ -1933,7 +1939,7 @@ static int i40iw_virtchnl_receive(struct i40e_info *ldev,
bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev)
{
struct i40iw_device *iwdev;
- wait_queue_t wait;
+ wait_queue_entry_t wait;
iwdev = dev->back_dev;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
index aa66c1c63dfa..f27be3e7830b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
@@ -199,7 +199,6 @@ void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
struct i40iw_virtchnl_work_info *work_info, u32 iw_vf_idx);
void *i40iw_remove_head(struct list_head *list);
void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend);
-void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len);
void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index db41ab40da9c..71050c5d29a0 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -408,6 +408,9 @@ enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp,
set_64bit_val(wqe, 0, info->paddr);
set_64bit_val(wqe, 8, LS_64(info->len, I40IWQPSQ_FRAG_LEN));
set_64bit_val(wqe, 16, header[0]);
+
+ /* Ensure all data is written before writing valid bit */
+ wmb();
set_64bit_val(wqe, 24, header[1]);
i40iw_debug_buf(qp->dev, I40IW_DEBUG_PUDA, "PUDA SEND WQE", wqe, 32);
@@ -1411,10 +1414,10 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq,
if (!list_empty(rxlist)) {
tmpbuf = (struct i40iw_puda_buf *)rxlist->next;
- plist = &tmpbuf->list;
while ((struct list_head *)tmpbuf != rxlist) {
if ((int)(buf->seqnum - tmpbuf->seqnum) < 0)
break;
+ plist = &tmpbuf->list;
tmpbuf = (struct i40iw_puda_buf *)plist->next;
}
/* Insert buf before tmpbuf */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
index 7b76259752b0..959ec81fba99 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -541,7 +541,6 @@ struct i40iw_create_qp_info {
struct i40iw_modify_qp_info {
u64 rx_win0;
u64 rx_win1;
- u16 new_mss;
u8 next_iwarp_state;
u8 termlen;
bool ord_valid;
@@ -554,7 +553,6 @@ struct i40iw_modify_qp_info {
bool dont_send_term;
bool dont_send_fin;
bool cached_var_valid;
- bool mss_change;
bool force_loopback;
};
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
index 2800f796271c..b0d3a0e8a9b5 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -913,29 +913,29 @@ enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data
}
static struct i40iw_qp_uk_ops iw_qp_uk_ops = {
- i40iw_qp_post_wr,
- i40iw_qp_ring_push_db,
- i40iw_rdma_write,
- i40iw_rdma_read,
- i40iw_send,
- i40iw_inline_rdma_write,
- i40iw_inline_send,
- i40iw_stag_local_invalidate,
- i40iw_mw_bind,
- i40iw_post_receive,
- i40iw_nop
+ .iw_qp_post_wr = i40iw_qp_post_wr,
+ .iw_qp_ring_push_db = i40iw_qp_ring_push_db,
+ .iw_rdma_write = i40iw_rdma_write,
+ .iw_rdma_read = i40iw_rdma_read,
+ .iw_send = i40iw_send,
+ .iw_inline_rdma_write = i40iw_inline_rdma_write,
+ .iw_inline_send = i40iw_inline_send,
+ .iw_stag_local_invalidate = i40iw_stag_local_invalidate,
+ .iw_mw_bind = i40iw_mw_bind,
+ .iw_post_receive = i40iw_post_receive,
+ .iw_post_nop = i40iw_nop
};
static struct i40iw_cq_ops iw_cq_ops = {
- i40iw_cq_request_notification,
- i40iw_cq_poll_completion,
- i40iw_cq_post_entries,
- i40iw_clean_cq
+ .iw_cq_request_notification = i40iw_cq_request_notification,
+ .iw_cq_poll_completion = i40iw_cq_poll_completion,
+ .iw_cq_post_entries = i40iw_cq_post_entries,
+ .iw_cq_clean = i40iw_clean_cq
};
static struct i40iw_device_uk_ops iw_device_uk_ops = {
- i40iw_cq_uk_init,
- i40iw_qp_uk_init,
+ .iwarp_cq_uk_init = i40iw_cq_uk_init,
+ .iwarp_qp_uk_init = i40iw_qp_uk_init,
};
/**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 0f5d43d1f5fc..e311ec559f4e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -160,6 +160,9 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
return NOTIFY_DONE;
iwdev = &hdl->device;
+ if (iwdev->init_state < INET_NOTIFIER)
+ return NOTIFY_DONE;
+
netdev = iwdev->ldev->netdev;
upper_dev = netdev_master_upper_dev_get(netdev);
if (netdev != event_netdev)
@@ -214,6 +217,9 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
return NOTIFY_DONE;
iwdev = &hdl->device;
+ if (iwdev->init_state < INET_NOTIFIER)
+ return NOTIFY_DONE;
+
netdev = iwdev->ldev->netdev;
if (netdev != event_netdev)
return NOTIFY_DONE;
@@ -260,6 +266,8 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *
if (!iwhdl)
return NOTIFY_DONE;
iwdev = &iwhdl->device;
+ if (iwdev->init_state < INET_NOTIFIER)
+ return NOTIFY_DONE;
p = (__be32 *)neigh->primary_key;
i40iw_copy_ip_ntohl(local_ipaddr, p);
if (neigh->nud_state & NUD_VALID) {
@@ -329,6 +337,7 @@ struct i40iw_cqp_request *i40iw_get_cqp_request(struct i40iw_cqp *cqp, bool wait
*/
void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request)
{
+ struct i40iw_device *iwdev = container_of(cqp, struct i40iw_device, cqp);
unsigned long flags;
if (cqp_request->dynamic) {
@@ -342,6 +351,7 @@ void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp
list_add_tail(&cqp_request->list, &cqp->cqp_avail_reqs);
spin_unlock_irqrestore(&cqp->req_lock, flags);
}
+ wake_up(&iwdev->close_wq);
}
/**
@@ -357,6 +367,56 @@ void i40iw_put_cqp_request(struct i40iw_cqp *cqp,
}
/**
+ * i40iw_free_pending_cqp_request -free pending cqp request objs
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+static void i40iw_free_pending_cqp_request(struct i40iw_cqp *cqp,
+ struct i40iw_cqp_request *cqp_request)
+{
+ struct i40iw_device *iwdev = container_of(cqp, struct i40iw_device, cqp);
+
+ if (cqp_request->waiting) {
+ cqp_request->compl_info.error = true;
+ cqp_request->request_done = true;
+ wake_up(&cqp_request->waitq);
+ }
+ i40iw_put_cqp_request(cqp, cqp_request);
+ wait_event_timeout(iwdev->close_wq,
+ !atomic_read(&cqp_request->refcount),
+ 1000);
+}
+
+/**
+ * i40iw_cleanup_pending_cqp_op - clean-up cqp with no completions
+ * @iwdev: iwarp device
+ */
+void i40iw_cleanup_pending_cqp_op(struct i40iw_device *iwdev)
+{
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_cqp *cqp = &iwdev->cqp;
+ struct i40iw_cqp_request *cqp_request = NULL;
+ struct cqp_commands_info *pcmdinfo = NULL;
+ u32 i, pending_work, wqe_idx;
+
+ pending_work = I40IW_RING_WORK_AVAILABLE(cqp->sc_cqp.sq_ring);
+ wqe_idx = I40IW_RING_GETCURRENT_TAIL(cqp->sc_cqp.sq_ring);
+ for (i = 0; i < pending_work; i++) {
+ cqp_request = (struct i40iw_cqp_request *)(unsigned long)cqp->scratch_array[wqe_idx];
+ if (cqp_request)
+ i40iw_free_pending_cqp_request(cqp, cqp_request);
+ wqe_idx = (wqe_idx + 1) % I40IW_RING_GETSIZE(cqp->sc_cqp.sq_ring);
+ }
+
+ while (!list_empty(&dev->cqp_cmd_head)) {
+ pcmdinfo = (struct cqp_commands_info *)i40iw_remove_head(&dev->cqp_cmd_head);
+ cqp_request = container_of(pcmdinfo, struct i40iw_cqp_request, info);
+ if (cqp_request)
+ i40iw_free_pending_cqp_request(cqp, cqp_request);
+ }
+}
+
+/**
* i40iw_free_qp - callback after destroy cqp completes
* @cqp_request: cqp request for destroy qp
* @num: not used
@@ -538,8 +598,12 @@ void i40iw_rem_ref(struct ib_qp *ibqp)
cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
cqp_info->in.u.qp_destroy.remove_hash_idx = true;
status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Destroy QP fail");
+ if (!status)
+ return;
+
+ i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
+ i40iw_free_qp_resources(iwdev, iwqp, qp_num);
+ i40iw_rem_devusecount(iwdev);
}
/**
@@ -749,23 +813,6 @@ void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, b
}
/**
- * i40iw_qp_mss_modify - modify mss for qp
- * @dev: hardware control device structure
- * @qp: hardware control qp
- */
-void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
- struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
- struct i40iw_modify_qp_info info;
-
- memset(&info, 0, sizeof(info));
- info.mss_change = true;
- info.new_mss = qp->vsi->mss;
- i40iw_hw_modify_qp(iwdev, iwqp, &info, false);
-}
-
-/**
* i40iw_term_modify_qp - modify qp for term message
* @qp: hardware control qp
* @next_state: qp's next state
@@ -836,10 +883,9 @@ void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp)
iwqp = (struct i40iw_qp *)qp->back_qp;
i40iw_add_ref(&iwqp->ibqp);
- init_timer(&iwqp->terminate_timer);
- iwqp->terminate_timer.function = i40iw_terminate_timeout;
+ setup_timer(&iwqp->terminate_timer, i40iw_terminate_timeout,
+ (unsigned long)iwqp);
iwqp->terminate_timer.expires = jiffies + HZ;
- iwqp->terminate_timer.data = (unsigned long)iwqp;
add_timer(&iwqp->terminate_timer);
}
@@ -1428,9 +1474,8 @@ void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi)
{
struct i40iw_vsi_pestat *devstat = vsi->pestat;
- init_timer(&devstat->stats_timer);
- devstat->stats_timer.function = i40iw_hw_stats_timeout;
- devstat->stats_timer.data = (unsigned long)vsi;
+ setup_timer(&devstat->stats_timer, i40iw_hw_stats_timeout,
+ (unsigned long)vsi);
mod_timer(&devstat->stats_timer,
jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 4c000d60d5c6..02d871db7ca5 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -97,8 +97,7 @@ static int i40iw_query_port(struct ib_device *ibdev,
struct i40iw_device *iwdev = to_iwdev(ibdev);
struct net_device *netdev = iwdev->netdev;
- memset(props, 0, sizeof(*props));
-
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -427,9 +426,13 @@ void i40iw_free_qp_resources(struct i40iw_device *iwdev,
struct i40iw_qp *iwqp,
u32 qp_num)
{
+ struct i40iw_pbl *iwpbl = &iwqp->iwpbl;
+
i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp);
if (qp_num)
i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num);
+ if (iwpbl->pbl_allocated)
+ i40iw_free_pble(iwdev->pble_rsrc, &iwpbl->pble_alloc);
i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->q2_ctx_mem);
i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->kqp.dma_mem);
kfree(iwqp->kqp.wrid_mem);
@@ -484,7 +487,7 @@ static int i40iw_setup_virt_qp(struct i40iw_device *iwdev,
struct i40iw_qp *iwqp,
struct i40iw_qp_init_info *init_info)
{
- struct i40iw_pbl *iwpbl = iwqp->iwpbl;
+ struct i40iw_pbl *iwpbl = &iwqp->iwpbl;
struct i40iw_qp_mr *qpmr = &iwpbl->qp_mr;
iwqp->page = qpmr->sq_page;
@@ -689,19 +692,22 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
ucontext = to_ucontext(ibpd->uobject->context);
if (req.user_wqe_buffers) {
+ struct i40iw_pbl *iwpbl;
+
spin_lock_irqsave(
&ucontext->qp_reg_mem_list_lock, flags);
- iwqp->iwpbl = i40iw_get_pbl(
+ iwpbl = i40iw_get_pbl(
(unsigned long)req.user_wqe_buffers,
&ucontext->qp_reg_mem_list);
spin_unlock_irqrestore(
&ucontext->qp_reg_mem_list_lock, flags);
- if (!iwqp->iwpbl) {
+ if (!iwpbl) {
err_code = -ENODATA;
i40iw_pr_err("no pbl info\n");
goto error;
}
+ memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl));
}
}
err_code = i40iw_setup_virt_qp(iwdev, iwqp, &init_info);
@@ -1162,8 +1168,10 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev,
memset(&req, 0, sizeof(req));
iwcq->user_mode = true;
ucontext = to_ucontext(context);
- if (ib_copy_from_udata(&req, udata, sizeof(struct i40iw_create_cq_req)))
+ if (ib_copy_from_udata(&req, udata, sizeof(struct i40iw_create_cq_req))) {
+ err_code = -EFAULT;
goto cq_free_resources;
+ }
spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
iwpbl = i40iw_get_pbl((unsigned long)req.user_cq_buffer,
@@ -1346,7 +1354,7 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
{
struct ib_umem *region = iwmr->region;
struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
- int chunk_pages, entry, pg_shift, i;
+ int chunk_pages, entry, i;
struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
struct i40iw_pble_info *pinfo;
struct scatterlist *sg;
@@ -1355,14 +1363,14 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf;
- pg_shift = ffs(region->page_size) - 1;
for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
- chunk_pages = sg_dma_len(sg) >> pg_shift;
+ chunk_pages = sg_dma_len(sg) >> region->page_shift;
if ((iwmr->type == IW_MEMREG_TYPE_QP) &&
!iwpbl->qp_mr.sq_page)
iwpbl->qp_mr.sq_page = sg_page(sg);
for (i = 0; i < chunk_pages; i++) {
- pg_addr = sg_dma_address(sg) + region->page_size * i;
+ pg_addr = sg_dma_address(sg) +
+ (i << region->page_shift);
if ((entry + i) == 0)
*pbl = cpu_to_le64(pg_addr & iwmr->page_msk);
@@ -1848,7 +1856,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
iwmr->ibmr.device = pd->device;
ucontext = to_ucontext(pd->uobject->context);
- iwmr->page_size = region->page_size;
+ iwmr->page_size = PAGE_SIZE;
iwmr->page_msk = PAGE_MASK;
if (region->hugetlb && (req.reg_type == IW_MEMREG_TYPE_MEM))
@@ -2064,7 +2072,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
ucontext = to_ucontext(ibpd->uobject->context);
i40iw_del_memlist(iwmr, ucontext);
}
- if (iwpbl->pbl_allocated)
+ if (iwpbl->pbl_allocated && iwmr->type != IW_MEMREG_TYPE_QP)
i40iw_free_pble(iwdev->pble_rsrc, palloc);
kfree(iwmr);
return 0;
@@ -2497,14 +2505,15 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = i40iw_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
@@ -2696,7 +2705,7 @@ static int i40iw_query_pkey(struct ib_device *ibdev,
* @ah_attr: address handle attributes
*/
static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd,
- struct ib_ah_attr *attr,
+ struct rdma_ah_attr *attr,
struct ib_udata *udata)
{
@@ -2758,7 +2767,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
(1ull << IB_USER_VERBS_CMD_POST_SEND);
iwibdev->ibdev.phys_port_cnt = 1;
iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
- iwibdev->ibdev.dma_device = &pcidev->dev;
iwibdev->ibdev.dev.parent = &pcidev->dev;
iwibdev->ibdev.query_port = i40iw_query_port;
iwibdev->ibdev.modify_port = i40iw_modify_port;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
index 07c3fec77de6..9067443cd311 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
@@ -170,7 +170,7 @@ struct i40iw_qp {
struct i40iw_qp_kmode kqp;
struct i40iw_dma_mem host_ctx;
struct timer_list terminate_timer;
- struct i40iw_pbl *iwpbl;
+ struct i40iw_pbl iwpbl;
struct i40iw_dma_mem q2_ctx_mem;
struct i40iw_dma_mem ietf_mem;
struct completion sq_drained;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
index f4d13683a403..48fd327f876b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
@@ -443,10 +443,7 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
if (!dev->vchnl_up)
return I40IW_ERR_NOT_READY;
if (vchnl_msg->iw_op_code == I40IW_VCHNL_OP_GET_VER) {
- if (vchnl_msg->iw_op_ver != I40IW_VCHNL_OP_GET_VER_V0)
- vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
- else
- vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
+ vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
return I40IW_SUCCESS;
}
for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 077c33d2dc75..538c46a73248 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -40,36 +40,45 @@
#include "mlx4_ib.h"
-static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+static struct ib_ah *create_ib_ah(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
struct mlx4_ib_ah *ah)
{
struct mlx4_dev *dev = to_mdev(pd->device)->dev;
- ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
- ah->av.ib.g_slid = ah_attr->src_path_bits;
- ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn |
+ (rdma_ah_get_port_num(ah_attr) << 24));
+ ah->av.ib.g_slid = rdma_ah_get_path_bits(ah_attr);
+ ah->av.ib.sl_tclass_flowlabel =
+ cpu_to_be32(rdma_ah_get_sl(ah_attr) << 28);
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+
ah->av.ib.g_slid |= 0x80;
- ah->av.ib.gid_index = ah_attr->grh.sgid_index;
- ah->av.ib.hop_limit = ah_attr->grh.hop_limit;
+ ah->av.ib.gid_index = grh->sgid_index;
+ ah->av.ib.hop_limit = grh->hop_limit;
ah->av.ib.sl_tclass_flowlabel |=
- cpu_to_be32((ah_attr->grh.traffic_class << 20) |
- ah_attr->grh.flow_label);
- memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
+ cpu_to_be32((grh->traffic_class << 20) |
+ grh->flow_label);
+ memcpy(ah->av.ib.dgid, grh->dgid.raw, 16);
}
- ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid);
- if (ah_attr->static_rate) {
- ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
- while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
- !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
- --ah->av.ib.stat_rate;
+ ah->av.ib.dlid = cpu_to_be16(rdma_ah_get_dlid(ah_attr));
+ if (rdma_ah_get_static_rate(ah_attr)) {
+ u8 static_rate = rdma_ah_get_static_rate(ah_attr) +
+ MLX4_STAT_RATE_OFFSET;
+
+ while (static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
+ !(1 << static_rate & dev->caps.stat_rate_support))
+ --static_rate;
+ ah->av.ib.stat_rate = static_rate;
}
return &ah->ibah;
}
-static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+static struct ib_ah *create_iboe_ah(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
struct mlx4_ib_ah *ah)
{
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
@@ -79,17 +88,18 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
u16 vlan_tag = 0xffff;
union ib_gid sgid;
struct ib_gid_attr gid_attr;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
int ret;
- memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+ memcpy(&in6, grh->dgid.raw, sizeof(in6));
if (rdma_is_multicast_addr(&in6)) {
is_mcast = 1;
rdma_get_mcast_mac(&in6, ah->av.eth.mac);
} else {
- memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN);
+ memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN);
}
- ret = ib_get_cached_gid(pd->device, ah_attr->port_num,
- ah_attr->grh.sgid_index, &sgid, &gid_attr);
+ ret = ib_get_cached_gid(pd->device, rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index, &sgid, &gid_attr);
if (ret)
return ERR_PTR(ret);
eth_zero_addr(ah->av.eth.s_mac);
@@ -100,36 +110,40 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
dev_put(gid_attr.ndev);
}
if (vlan_tag < 0x1000)
- vlan_tag |= (ah_attr->sl & 7) << 13;
- ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
- ret = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
+ vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13;
+ ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn |
+ (rdma_ah_get_port_num(ah_attr) << 24));
+ ret = mlx4_ib_gid_index_to_real_index(ibdev,
+ rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index);
if (ret < 0)
return ERR_PTR(ret);
ah->av.eth.gid_index = ret;
ah->av.eth.vlan = cpu_to_be16(vlan_tag);
- ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
- if (ah_attr->static_rate) {
- ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
+ ah->av.eth.hop_limit = grh->hop_limit;
+ if (rdma_ah_get_static_rate(ah_attr)) {
+ ah->av.eth.stat_rate = rdma_ah_get_static_rate(ah_attr) +
+ MLX4_STAT_RATE_OFFSET;
while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
!(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support))
--ah->av.eth.stat_rate;
}
ah->av.eth.sl_tclass_flowlabel |=
- cpu_to_be32((ah_attr->grh.traffic_class << 20) |
- ah_attr->grh.flow_label);
+ cpu_to_be32((grh->traffic_class << 20) |
+ grh->flow_label);
/*
* HW requires multicast LID so we just choose one.
*/
if (is_mcast)
ah->av.ib.dlid = cpu_to_be16(0xc000);
- memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
- ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(ah_attr->sl << 29);
-
+ memcpy(ah->av.eth.dgid, grh->dgid.raw, 16);
+ ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(rdma_ah_get_sl(ah_attr)
+ << 29);
return &ah->ibah;
}
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
@@ -140,8 +154,8 @@ struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
if (!ah)
return ERR_PTR(-ENOMEM);
- if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
- if (!(ah_attr->ah_flags & IB_AH_GRH)) {
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) {
ret = ERR_PTR(-EINVAL);
} else {
/*
@@ -163,34 +177,40 @@ struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
return create_ib_ah(pd, ah_attr, ah); /* never fails */
}
-int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct mlx4_ib_ah *ah = to_mah(ibah);
- enum rdma_link_layer ll;
+ int port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
- ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num);
- if (ll == IB_LINK_LAYER_ETHERNET)
- ah_attr->sl = be32_to_cpu(ah->av.eth.sl_tclass_flowlabel) >> 29;
- else
- ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
-
- ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0;
- if (ah->av.ib.stat_rate)
- ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;
- ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F;
+ ah_attr->type = ibah->type;
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ rdma_ah_set_dlid(ah_attr, 0);
+ rdma_ah_set_sl(ah_attr,
+ be32_to_cpu(ah->av.eth.sl_tclass_flowlabel)
+ >> 29);
+ } else {
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(ah->av.ib.dlid));
+ rdma_ah_set_sl(ah_attr,
+ be32_to_cpu(ah->av.ib.sl_tclass_flowlabel)
+ >> 28);
+ }
+
+ rdma_ah_set_port_num(ah_attr, port_num);
+ if (ah->av.ib.stat_rate)
+ rdma_ah_set_static_rate(ah_attr,
+ ah->av.ib.stat_rate -
+ MLX4_STAT_RATE_OFFSET);
+ rdma_ah_set_path_bits(ah_attr, ah->av.ib.g_slid & 0x7F);
if (mlx4_ib_ah_grh_present(ah)) {
- ah_attr->ah_flags = IB_AH_GRH;
-
- ah_attr->grh.traffic_class =
- be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20;
- ah_attr->grh.flow_label =
- be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff;
- ah_attr->grh.hop_limit = ah->av.ib.hop_limit;
- ah_attr->grh.sgid_index = ah->av.ib.gid_index;
- memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16);
+ u32 tc_fl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel);
+
+ rdma_ah_set_grh(ah_attr, NULL,
+ tc_fl & 0xfffff, ah->av.ib.gid_index,
+ ah->av.ib.hop_limit,
+ tc_fl >> 20);
+ rdma_ah_set_dgid_raw(ah_attr, ah->av.ib.dgid);
}
return 0;
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 06020c54db20..ea24230ea0d4 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -499,6 +499,7 @@ static int set_guid_rec(struct ib_device *ibdev,
struct list_head *head =
&dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
+ memset(&attr, 0, sizeof(attr));
err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
if (err) {
pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index d64845335e87..fedaf8260105 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -279,14 +279,14 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
}
static struct id_map_entry *
-id_map_get(struct ib_device *ibdev, int *pv_cm_id, int sl_cm_id, int slave_id)
+id_map_get(struct ib_device *ibdev, int *pv_cm_id, int slave_id, int sl_cm_id)
{
struct id_map_entry *ent;
struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
spin_lock(&sriov->id_map_lock);
if (*pv_cm_id == -1) {
- ent = id_map_find_by_sl_id(ibdev, sl_cm_id, slave_id);
+ ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id);
if (ent)
*pv_cm_id = (int) ent->pv_cm_id;
} else
@@ -323,6 +323,9 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
sl_cm_id = get_local_comm_id(mad);
+ id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
+ if (id)
+ goto cont;
id = id_map_alloc(ibdev, slave_id, sl_cm_id);
if (IS_ERR(id)) {
mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
@@ -343,6 +346,7 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
return -EINVAL;
}
+cont:
set_local_comm_id(mad, id->pv_cm_id);
if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
@@ -414,7 +418,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
struct rb_root *sl_id_map = &sriov->sl_id_map;
struct list_head lh;
struct rb_node *nd;
- int need_flush = 1;
+ int need_flush = 0;
struct id_map_entry *map, *tmp_map;
/* cancel all delayed work queue entries */
INIT_LIST_HEAD(&lh);
@@ -422,13 +426,13 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
if (slave < 0 || slave == map->slave_id) {
if (map->scheduled_delete)
- need_flush &= !!cancel_delayed_work(&map->timeout);
+ need_flush |= !cancel_delayed_work(&map->timeout);
}
}
spin_unlock(&sriov->id_map_lock);
- if (!need_flush)
+ if (need_flush)
flush_scheduled_work(); /* make sure all timers were flushed */
/* now, remove all leftover entries from databases*/
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 6a0fec357dae..ff931c580557 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
int err;
err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
- PAGE_SIZE * 2, &buf->buf, GFP_KERNEL);
+ PAGE_SIZE * 2, &buf->buf);
if (err)
goto out;
@@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL);
+ err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
if (err)
goto err_mtt;
@@ -147,7 +147,7 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
return PTR_ERR(*umem);
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(*umem),
- ilog2((*umem)->page_size), &buf->mtt);
+ (*umem)->page_shift, &buf->mtt);
if (err)
goto err_buf;
@@ -219,7 +219,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
uar = &to_mucontext(context)->uar;
} else {
- err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL);
+ err = mlx4_db_alloc(dev->dev, &cq->db, 1);
if (err)
goto err_cq;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index db564ccc0f92..21d31cb1325f 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -189,25 +189,26 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
{
struct ib_ah *new_ah;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
if (!dev->send_agent[port_num - 1][0])
return;
memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = lid;
- ah_attr.sl = sl;
- ah_attr.port_num = port_num;
+ ah_attr.type = rdma_ah_find_type(&dev->ib_dev, port_num);
+ rdma_ah_set_dlid(&ah_attr, lid);
+ rdma_ah_set_sl(&ah_attr, sl);
+ rdma_ah_set_port_num(&ah_attr, port_num);
- new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
- &ah_attr);
+ new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
+ &ah_attr);
if (IS_ERR(new_ah))
return;
spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
- ib_destroy_ah(dev->sm_ah[port_num - 1]);
+ rdma_destroy_ah(dev->sm_ah[port_num - 1]);
dev->sm_ah[port_num - 1] = new_ah;
spin_unlock_irqrestore(&dev->sm_lock, flags);
}
@@ -509,7 +510,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
struct mlx4_ib_demux_pv_ctx *tun_ctx;
struct mlx4_ib_demux_pv_qp *tun_qp;
struct mlx4_rcv_tunnel_mad *tun_mad;
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
struct ib_ah *ah;
struct ib_qp *src_qp = NULL;
unsigned tun_tx_ix = 0;
@@ -555,15 +556,18 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
/* create ah. Just need an empty one with the port num for the post send.
* The driver will set the force loopback bit in post_send */
memset(&attr, 0, sizeof attr);
- attr.port_num = port;
+ attr.type = rdma_ah_find_type(&dev->ib_dev, port);
+
+ rdma_ah_set_port_num(&attr, port);
if (is_eth) {
union ib_gid sgid;
+ union ib_gid dgid;
- if (get_gids_from_l3_hdr(grh, &sgid, &attr.grh.dgid))
+ if (get_gids_from_l3_hdr(grh, &sgid, &dgid))
return -EINVAL;
- attr.ah_flags = IB_AH_GRH;
+ rdma_ah_set_grh(&attr, &dgid, 0, 0, 0, 0);
}
- ah = ib_create_ah(tun_ctx->pd, &attr);
+ ah = rdma_create_ah(tun_ctx->pd, &attr);
if (IS_ERR(ah))
return -ENOMEM;
@@ -580,7 +584,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
if (tun_qp->tx_ring[tun_tx_ix].ah)
- ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
+ rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
tun_qp->tx_ring[tun_tx_ix].ah = ah;
ib_dma_sync_single_for_cpu(&dev->ib_dev,
tun_qp->tx_ring[tun_tx_ix].buf.map,
@@ -653,7 +657,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
spin_unlock(&tun_qp->tx_lock);
tun_qp->tx_ring[tun_tx_ix].ah = NULL;
end:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
return ret;
}
@@ -1018,7 +1022,7 @@ static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0])
- ib_destroy_ah(mad_send_wc->send_buf->context[0]);
+ rdma_destroy_ah(mad_send_wc->send_buf->context[0]);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -1073,7 +1077,7 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
}
if (dev->sm_ah[p])
- ib_destroy_ah(dev->sm_ah[p]);
+ rdma_destroy_ah(dev->sm_ah[p]);
}
}
@@ -1352,7 +1356,7 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index,
- u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
+ u32 remote_qpn, u32 qkey, struct rdma_ah_attr *attr,
u8 *s_mac, u16 vlan_id, struct ib_mad *mad)
{
struct ib_sge list;
@@ -1363,6 +1367,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
struct mlx4_mad_snd_buf *sqp_mad;
struct ib_ah *ah;
struct ib_qp *send_qp = NULL;
+ struct ib_global_route *grh;
unsigned wire_tx_ix = 0;
int ret = 0;
u16 wire_pkey_ix;
@@ -1389,12 +1394,13 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
send_qp = sqp->qp;
/* create ah */
- sgid_index = attr->grh.sgid_index;
- attr->grh.sgid_index = 0;
- ah = ib_create_ah(sqp_ctx->pd, attr);
+ grh = rdma_ah_retrieve_grh(attr);
+ sgid_index = grh->sgid_index;
+ grh->sgid_index = 0;
+ ah = rdma_create_ah(sqp_ctx->pd, attr);
if (IS_ERR(ah))
return -ENOMEM;
- attr->grh.sgid_index = sgid_index;
+ grh->sgid_index = sgid_index;
to_mah(ah)->av.ib.gid_index = sgid_index;
/* get rid of force-loopback bit */
to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
@@ -1410,7 +1416,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
if (sqp->tx_ring[wire_tx_ix].ah)
- ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
+ rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
sqp->tx_ring[wire_tx_ix].ah = ah;
ib_dma_sync_single_for_cpu(&dev->ib_dev,
sqp->tx_ring[wire_tx_ix].buf.map,
@@ -1442,7 +1448,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
if (s_mac)
memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
if (vlan_id < 0x1000)
- vlan_id |= (attr->sl & 7) << 13;
+ vlan_id |= (rdma_ah_get_sl(attr) & 7) << 13;
to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id);
@@ -1455,7 +1461,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
spin_unlock(&sqp->tx_lock);
sqp->tx_ring[wire_tx_ix].ah = NULL;
out:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
return ret;
}
@@ -1467,12 +1473,13 @@ static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
}
static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
+ struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr);
if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
- ah_attr->grh.sgid_index = slave;
+ grh->sgid_index = slave;
else
- ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port);
+ grh->sgid_index += get_slave_base_gid_ix(dev, slave, port);
}
static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
@@ -1482,11 +1489,13 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1);
struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr;
struct mlx4_ib_ah ah;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
u8 *slave_id;
int slave;
int port;
u16 vlan_id;
+ u8 qos;
+ u8 *dmac;
/* Get slave that sent this packet */
if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
@@ -1569,16 +1578,19 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
if (port < 0)
return;
ah.av.ib.port_pd = cpu_to_be32(port << 24 | (be32_to_cpu(ah.av.ib.port_pd) & 0xffffff));
+ ah.ibah.type = rdma_ah_find_type(&dev->ib_dev, port);
mlx4_ib_query_ah(&ah.ibah, &ah_attr);
- if (ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&ah_attr) & IB_AH_GRH)
fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
-
- memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
+ dmac = rdma_ah_retrieve_dmac(&ah_attr);
+ if (dmac)
+ memcpy(dmac, tunnel->hdr.mac, ETH_ALEN);
vlan_id = be16_to_cpu(tunnel->hdr.vlan);
/* if slave have default vlan use it */
- mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
- &vlan_id, &ah_attr.sl);
+ if (mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
+ &vlan_id, &qos))
+ rdma_ah_set_sl(&ah_attr, qos);
mlx4_ib_send_to_wire(dev, slave, ctx->port,
is_proxy_qp0(dev, wc->src_qp, slave) ?
@@ -1714,7 +1726,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
if (tun_qp->tx_ring[i].ah)
- ib_destroy_ah(tun_qp->tx_ring[i].ah);
+ rdma_destroy_ah(tun_qp->tx_ring[i].ah);
}
kfree(tun_qp->tx_ring);
kfree(tun_qp->ring);
@@ -1746,7 +1758,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
pr_debug("received tunnel send completion:"
"wrid=0x%llx, status=0x%x\n",
wc.wr_id, wc.status);
- ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
+ rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
@@ -1763,7 +1775,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
" status = %d, wrid = 0x%llx\n",
ctx->slave, wc.status, wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
- ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
+ rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
@@ -1900,7 +1912,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
if (wc.status == IB_WC_SUCCESS) {
switch (wc.opcode) {
case IB_WC_SEND:
- ib_destroy_ah(sqp->tx_ring[wc.wr_id &
+ rdma_destroy_ah(sqp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
@@ -1930,7 +1942,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
" status = %d, wrid = 0x%llx\n",
ctx->slave, wc.status, wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
- ib_destroy_ah(sqp->tx_ring[wc.wr_id &
+ rdma_destroy_ah(sqp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 7031a8dd4d14..d1b43cbbfea7 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -39,6 +39,9 @@
#include <linux/inetdevice.h>
#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
+
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/devlink.h>
@@ -58,8 +61,7 @@
#include <rdma/mlx4-abi.h>
#define DRV_NAME MLX4_IB_DRV_NAME
-#define DRV_VERSION "2.2-1"
-#define DRV_RELDATE "Feb 2014"
+#define DRV_VERSION "4.0-0"
#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
@@ -76,7 +78,7 @@ MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_ass
static const char mlx4_ib_version[] =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
- DRV_VERSION " (" DRV_RELDATE ")\n";
+ DRV_VERSION "\n";
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
@@ -678,7 +680,7 @@ static u8 state_to_phys_state(enum ib_port_state state)
}
static int eth_link_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props, int netw_view)
+ struct ib_port_attr *props)
{
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@@ -741,11 +743,11 @@ int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
{
int err;
- memset(props, 0, sizeof *props);
+ /* props being zeroed by the caller, avoid zeroing it here */
err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
ib_link_query_port(ibdev, port, props, netw_view) :
- eth_link_query_port(ibdev, port, props, netw_view);
+ eth_link_query_port(ibdev, port, props);
return err;
}
@@ -1014,7 +1016,7 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
mutex_lock(&mdev->cap_mask_mutex);
- err = mlx4_ib_query_port(ibdev, port, &attr);
+ err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
@@ -1153,7 +1155,7 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
* call to mlx4_ib_vma_close.
*/
put_task_struct(owning_process);
- msleep(1);
+ usleep_range(1000, 2000);
owning_process = get_pid_task(ibcontext->tgid,
PIDTYPE_PID);
if (!owning_process ||
@@ -1170,7 +1172,7 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
/* need to protect from a race on closing the vma as part of
* mlx4_ib_vma_close().
*/
- down_read(&owning_mm->mmap_sem);
+ down_write(&owning_mm->mmap_sem);
for (i = 0; i < HW_BAR_COUNT; i++) {
vma = context->hw_bar_info[i].vma;
if (!vma)
@@ -1184,11 +1186,13 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
BUG_ON(1);
}
+ context->hw_bar_info[i].vma->vm_flags &=
+ ~(VM_SHARED | VM_MAYSHARE);
/* context going to be destroyed, should not access ops any more */
context->hw_bar_info[i].vma->vm_ops = NULL;
}
- up_read(&owning_mm->mmap_sem);
+ up_write(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);
}
@@ -2537,24 +2541,27 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
int err;
- err = mlx4_ib_query_port(ibdev, port_num, &attr);
- if (err)
- return err;
-
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
-
if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
} else {
if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
+ if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
}
- immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
return 0;
}
@@ -2625,7 +2632,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.phys_port_cnt = mlx4_is_bonded(dev) ?
1 : ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
- ibdev->ib_dev.dma_device = &dev->persist->pdev->dev;
+ ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev;
ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev;
ibdev->ib_dev.add_gid = mlx4_ib_add_gid;
ibdev->ib_dev.del_gid = mlx4_ib_del_gid;
@@ -2861,23 +2868,19 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
- dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
- if (!iboe->nb.notifier_call) {
- iboe->nb.notifier_call = mlx4_ib_netdev_event;
- err = register_netdevice_notifier(&iboe->nb);
- if (err) {
- iboe->nb.notifier_call = NULL;
- goto err_notif;
- }
- }
- if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
- err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
- if (err) {
- goto err_notif;
- }
+ if (!iboe->nb.notifier_call) {
+ iboe->nb.notifier_call = mlx4_ib_netdev_event;
+ err = register_netdevice_notifier(&iboe->nb);
+ if (err) {
+ iboe->nb.notifier_call = NULL;
+ goto err_notif;
}
}
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
+ if (err)
+ goto err_notif;
+ }
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
if (device_create_file(&ibdev->ib_dev.dev,
@@ -2935,6 +2938,7 @@ err_counter:
mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
err_map:
+ mlx4_ib_free_eqs(dev, ibdev);
iounmap(ibdev->uar_map);
err_uar:
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index e010fe459e67..b73f89700ef9 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -209,7 +209,7 @@ static struct mcast_group *mcast_insert(struct mlx4_ib_demux_ctx *ctx,
static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
{
struct mlx4_ib_dev *dev = ctx->dev;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
spin_lock_irqsave(&dev->sm_lock, flags);
@@ -231,20 +231,20 @@ static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
struct mlx4_ib_dev *dev = ctx->dev;
struct ib_mad_agent *agent = dev->send_agent[ctx->port - 1][1];
struct ib_wc wc;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
/* Our agent might not yet be registered when mads start to arrive */
if (!agent)
return -EAGAIN;
- ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
+ rdma_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
if (ib_find_cached_pkey(&dev->ib_dev, ctx->port, IB_DEFAULT_PKEY_FULL, &wc.pkey_index))
return -EINVAL;
wc.sl = 0;
wc.dlid_path_bits = 0;
wc.port_num = ctx->port;
- wc.slid = ah_attr.dlid; /* opensm lid */
+ wc.slid = rdma_ah_get_dlid(&ah_attr); /* opensm lid */
wc.src_qp = 1;
return mlx4_ib_send_to_slave(dev, slave, ctx->port, IB_QPT_GSI, &wc, NULL, mad);
}
@@ -1091,7 +1091,7 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy
if (!count)
break;
- msleep(1);
+ usleep_range(1000, 2000);
} while (time_after(end, jiffies));
flush_workqueue(ctx->mcg_wq);
@@ -1102,7 +1102,8 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy
while ((p = rb_first(&ctx->mcg_table)) != NULL) {
group = rb_entry(p, struct mcast_group, node);
if (atomic_read(&group->refcount))
- mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group);
+ mcg_debug_group(group, "group refcount %d!!! (pointer %p)\n",
+ atomic_read(&group->refcount), group);
force_clean_group(group);
}
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 7f3d976d81ed..9db82e67e959 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -55,7 +55,7 @@
#define pr_fmt(fmt) "<" MLX4_IB_DRV_NAME "> %s: " fmt, __func__
#define mlx4_ib_warn(ibdev, format, arg...) \
- dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg)
+ dev_warn((ibdev)->dev.parent, MLX4_IB_DRV_NAME ": " format, ## arg)
enum {
MLX4_IB_SQ_MIN_WQE_SHIFT = 6,
@@ -185,7 +185,6 @@ enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
- MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
/* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
@@ -742,9 +741,9 @@ int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
struct ib_udata *udata);
-int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
int mlx4_ib_destroy_ah(struct ib_ah *ah);
struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
@@ -833,7 +832,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
- u32 qkey, struct ib_ah_attr *attr, u8 *s_mac,
+ u32 qkey, struct rdma_ah_attr *attr, u8 *s_mac,
u16 vlan_id, struct ib_mad *mad);
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 5d73989d9771..e6f77f63da75 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -107,7 +107,7 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
len = sg_dma_len(sg) >> mtt->page_shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(sg) +
- umem->page_size * k;
+ (k << umem->page_shift);
/*
* Be friendly to mlx4_write_mtt() and
* pass it chunks of appropriate size.
@@ -155,7 +155,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
n = ib_umem_page_count(mr->umem);
- shift = ilog2(mr->umem->page_size);
+ shift = mr->umem->page_shift;
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
convert_access(access_flags), n, shift, &mr->mmr);
@@ -239,7 +239,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
goto release_mpt_entry;
}
n = ib_umem_page_count(mmr->umem);
- shift = ilog2(mmr->umem->page_size);
+ shift = mmr->umem->page_shift;
err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
virt_addr, length, n, shift,
@@ -292,10 +292,10 @@ mlx4_alloc_priv_pages(struct ib_device *device,
if (!mr->pages)
return -ENOMEM;
- mr->page_map = dma_map_single(device->dma_device, mr->pages,
+ mr->page_map = dma_map_single(device->dev.parent, mr->pages,
mr->page_map_size, DMA_TO_DEVICE);
- if (dma_mapping_error(device->dma_device, mr->page_map)) {
+ if (dma_mapping_error(device->dev.parent, mr->page_map)) {
ret = -ENOMEM;
goto err;
}
@@ -313,7 +313,7 @@ mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
if (mr->pages) {
struct ib_device *device = mr->ibmr.device;
- dma_unmap_single(device->dma_device, mr->page_map,
+ dma_unmap_single(device->dev.parent, mr->page_map,
mr->page_map_size, DMA_TO_DEVICE);
free_page((unsigned long)mr->pages);
mr->pages = NULL;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index c068add8838b..75c0e6c5dd56 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -76,10 +76,6 @@ enum {
MLX4_IB_LSO_HEADER_SPARE = 128,
};
-enum {
- MLX4_IB_IBOE_ETHERTYPE = 0x8915
-};
-
struct mlx4_ib_sqp {
struct mlx4_ib_qp qp;
int pkey_index;
@@ -638,8 +634,8 @@ static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev,
static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
- gfp_t gfp)
+ struct ib_udata *udata, int sqpn,
+ struct mlx4_ib_qp **caller_qp)
{
int qpn;
int err;
@@ -695,14 +691,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
(qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
- sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp);
+ sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL);
if (!sqp)
return -ENOMEM;
qp = &sqp->qp;
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
} else {
- qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp);
+ qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL);
if (!qp)
return -ENOMEM;
qp->pri.vid = 0xFFFF;
@@ -749,7 +745,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
}
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
- ilog2(qp->umem->page_size), &qp->mtt);
+ qp->umem->page_shift, &qp->mtt);
if (err)
goto err_buf;
@@ -784,7 +780,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err;
if (qp_has_rq(init_attr)) {
- err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp);
+ err = mlx4_db_alloc(dev->dev, &qp->db, 0);
if (err)
goto err;
@@ -792,7 +788,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
}
if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size,
- &qp->buf, gfp)) {
+ &qp->buf)) {
memcpy(&init_attr->cap, &backup_cap,
sizeof(backup_cap));
err = set_kernel_sq_size(dev, &init_attr->cap, qp_type,
@@ -801,7 +797,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err_db;
if (mlx4_buf_alloc(dev->dev, qp->buf_size,
- PAGE_SIZE * 2, &qp->buf, gfp)) {
+ PAGE_SIZE * 2, &qp->buf)) {
err = -ENOMEM;
goto err_db;
}
@@ -812,20 +808,20 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp);
+ err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
if (err)
goto err_mtt;
qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64),
- gfp | __GFP_NOWARN);
+ GFP_KERNEL | __GFP_NOWARN);
if (!qp->sq.wrid)
qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
- gfp, PAGE_KERNEL);
+ GFP_KERNEL, PAGE_KERNEL);
qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64),
- gfp | __GFP_NOWARN);
+ GFP_KERNEL | __GFP_NOWARN);
if (!qp->rq.wrid)
qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
- gfp, PAGE_KERNEL);
+ GFP_KERNEL, PAGE_KERNEL);
if (!qp->sq.wrid || !qp->rq.wrid) {
err = -ENOMEM;
goto err_wrid;
@@ -863,7 +859,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
- err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
+ err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
if (err)
goto err_qpn;
@@ -1131,10 +1127,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
int err;
int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
u16 xrcdn = 0;
- gfp_t gfp;
- gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ?
- GFP_NOIO : GFP_KERNEL;
/*
* We only support LSO, vendor flag1, and multicast loopback blocking,
* and only for kernel UD QPs.
@@ -1144,8 +1137,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
MLX4_IB_SRIOV_TUNNEL_QP |
MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_NETIF |
- MLX4_IB_QP_CREATE_ROCE_V2_GSI |
- MLX4_IB_QP_CREATE_USE_GFP_NOIO))
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI))
return ERR_PTR(-EINVAL);
if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
@@ -1158,7 +1150,6 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
- MLX4_IB_QP_CREATE_USE_GFP_NOIO |
MLX4_IB_QP_CREATE_ROCE_V2_GSI |
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) &&
init_attr->qp_type != IB_QPT_UD) ||
@@ -1183,7 +1174,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_RAW_PACKET:
- qp = kzalloc(sizeof *qp, gfp);
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
qp->pri.vid = 0xFFFF;
@@ -1192,7 +1183,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
case IB_QPT_UD:
{
err = create_qp_common(to_mdev(pd->device), pd, init_attr,
- udata, 0, &qp, gfp);
+ udata, 0, &qp);
if (err) {
kfree(qp);
return ERR_PTR(err);
@@ -1221,8 +1212,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
}
err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
- sqpn,
- &qp, gfp);
+ sqpn, &qp);
if (err)
return ERR_PTR(err);
@@ -1387,31 +1377,31 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
}
-static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
+static int _mlx4_set_path(struct mlx4_ib_dev *dev,
+ const struct rdma_ah_attr *ah,
u64 smac, u16 vlan_tag, struct mlx4_qp_path *path,
struct mlx4_roce_smac_vlan_info *smac_info, u8 port)
{
- int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
- IB_LINK_LAYER_ETHERNET;
int vidx;
int smac_index;
int err;
-
- path->grh_mylmc = ah->src_path_bits & 0x7f;
- path->rlid = cpu_to_be16(ah->dlid);
- if (ah->static_rate) {
- path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
+ path->grh_mylmc = rdma_ah_get_path_bits(ah) & 0x7f;
+ path->rlid = cpu_to_be16(rdma_ah_get_dlid(ah));
+ if (rdma_ah_get_static_rate(ah)) {
+ path->static_rate = rdma_ah_get_static_rate(ah) +
+ MLX4_STAT_RATE_OFFSET;
while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
!(1 << path->static_rate & dev->dev->caps.stat_rate_support))
--path->static_rate;
} else
path->static_rate = 0;
- if (ah->ah_flags & IB_AH_GRH) {
- int real_sgid_index = mlx4_ib_gid_index_to_real_index(dev,
- port,
- ah->grh.sgid_index);
+ if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah);
+ int real_sgid_index =
+ mlx4_ib_gid_index_to_real_index(dev, port,
+ grh->sgid_index);
if (real_sgid_index >= dev->dev->caps.gid_table_len[port]) {
pr_err("sgid_index (%u) too large. max is %d\n",
@@ -1421,19 +1411,19 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
path->grh_mylmc |= 1 << 7;
path->mgid_index = real_sgid_index;
- path->hop_limit = ah->grh.hop_limit;
+ path->hop_limit = grh->hop_limit;
path->tclass_flowlabel =
- cpu_to_be32((ah->grh.traffic_class << 20) |
- (ah->grh.flow_label));
- memcpy(path->rgid, ah->grh.dgid.raw, 16);
+ cpu_to_be32((grh->traffic_class << 20) |
+ (grh->flow_label));
+ memcpy(path->rgid, grh->dgid.raw, 16);
}
- if (is_eth) {
- if (!(ah->ah_flags & IB_AH_GRH))
+ if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ if (!(rdma_ah_get_ah_flags(ah) & IB_AH_GRH))
return -1;
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
- ((port - 1) << 6) | ((ah->sl & 7) << 3);
+ ((port - 1) << 6) | ((rdma_ah_get_sl(ah) & 7) << 3);
path->feup |= MLX4_FEUP_FORCE_ETH_UP;
if (vlan_tag < 0x1000) {
@@ -1492,14 +1482,13 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
} else {
smac_index = smac_info->smac_index;
}
-
- memcpy(path->dmac, ah->dmac, 6);
+ memcpy(path->dmac, ah->roce.dmac, 6);
path->ackto = MLX4_IB_LINK_TYPE_ETH;
/* put MAC table smac index for IBoE */
path->grh_mylmc = (u8) (smac_index) | 0x80;
} else {
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
- ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
+ ((port - 1) << 6) | ((rdma_ah_get_sl(ah) & 0xf) << 2);
}
return 0;
@@ -1771,11 +1760,13 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
u16 vlan = 0xffff;
u8 smac[ETH_ALEN];
int status = 0;
- int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
- attr->ah_attr.ah_flags & IB_AH_GRH;
+ int is_eth =
+ rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
+ rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
- if (is_eth && attr->ah_attr.ah_flags & IB_AH_GRH) {
- int index = attr->ah_attr.grh.sgid_index;
+ if (is_eth) {
+ int index =
+ rdma_ah_read_grh(&attr->ah_attr)->sgid_index;
status = ib_get_cached_gid(ibqp->device, port_num,
index, &gid, &gid_attr);
@@ -2424,11 +2415,31 @@ static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num)
return vl;
}
+static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num,
+ int index, union ib_gid *gid,
+ enum ib_gid_type *gid_type)
+{
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+ struct mlx4_port_gid_table *port_gid_table;
+ unsigned long flags;
+
+ port_gid_table = &iboe->gids[port_num - 1];
+ spin_lock_irqsave(&iboe->lock, flags);
+ memcpy(gid, &port_gid_table->gids[index].gid, sizeof(*gid));
+ *gid_type = port_gid_table->gids[index].gid_type;
+ spin_unlock_irqrestore(&iboe->lock, flags);
+ if (!memcmp(gid, &zgid, sizeof(*gid)))
+ return -ENOENT;
+
+ return 0;
+}
+
#define MLX4_ROCEV2_QP1_SPORT 0xC000
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
struct ib_device *ib_dev = sqp->qp.ibqp.device;
+ struct mlx4_ib_dev *ibdev = to_mdev(ib_dev);
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_ctrl_seg *ctrl = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
@@ -2454,8 +2465,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
if (is_eth) {
- struct ib_gid_attr gid_attr;
-
+ enum ib_gid_type gid_type;
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
@@ -2466,18 +2476,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
if (err)
return err;
} else {
- err = ib_get_cached_gid(ib_dev,
- be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index, &sgid,
- &gid_attr);
- if (!err) {
- if (gid_attr.ndev)
- dev_put(gid_attr.ndev);
- if (!memcmp(&sgid, &zgid, sizeof(sgid)))
- err = -ENOENT;
- }
+ err = fill_gid_by_hw_index(ibdev, sqp->qp.port,
+ ah->av.ib.gid_index,
+ &sgid, &gid_type);
if (!err) {
- is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+ is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
if (is_udp) {
if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
ip_version = 4;
@@ -2588,7 +2591,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
u16 ether_type;
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
- ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE :
+ ether_type = (!is_udp) ? ETH_P_IBOE:
(ip_version == 4 ? ETH_P_IP : ETH_P_IPV6);
mlx->sched_prio = cpu_to_be16(pcp);
@@ -2955,21 +2958,17 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
if (sqp->roce_v2_gsi) {
struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
- struct ib_gid_attr gid_attr;
+ enum ib_gid_type gid_type;
union ib_gid gid;
- if (!ib_get_cached_gid(ibqp->device,
- be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index, &gid,
- &gid_attr)) {
- if (gid_attr.ndev)
- dev_put(gid_attr.ndev);
- qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
- to_mqp(sqp->roce_v2_gsi) : qp;
- } else {
+ if (!fill_gid_by_hw_index(mdev, sqp->qp.port,
+ ah->av.ib.gid_index,
+ &gid, &gid_type))
+ qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
+ to_mqp(sqp->roce_v2_gsi) : qp;
+ else
pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
ah->av.ib.gid_index);
- }
}
}
@@ -3390,39 +3389,37 @@ static int to_ib_qp_access_flags(int mlx4_flags)
return ib_flags;
}
-static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
- struct mlx4_qp_path *path)
+static void to_rdma_ah_attr(struct mlx4_ib_dev *ibdev,
+ struct rdma_ah_attr *ah_attr,
+ struct mlx4_qp_path *path)
{
struct mlx4_dev *dev = ibdev->dev;
- int is_eth;
-
- memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
- ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1;
+ u8 port_num = path->sched_queue & 0x40 ? 2 : 1;
- if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num);
+ if (port_num == 0 || port_num > dev->caps.num_ports)
return;
- is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) ==
- IB_LINK_LAYER_ETHERNET;
- if (is_eth)
- ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) |
- ((path->sched_queue & 4) << 1);
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE)
+ rdma_ah_set_sl(ah_attr, ((path->sched_queue >> 3) & 0x7) |
+ ((path->sched_queue & 4) << 1));
else
- ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
+ rdma_ah_set_sl(ah_attr, (path->sched_queue >> 2) & 0xf);
+ rdma_ah_set_port_num(ah_attr, port_num);
- ib_ah_attr->dlid = be16_to_cpu(path->rlid);
- ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
- ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
- ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
- if (ib_ah_attr->ah_flags) {
- ib_ah_attr->grh.sgid_index = path->mgid_index;
- ib_ah_attr->grh.hop_limit = path->hop_limit;
- ib_ah_attr->grh.traffic_class =
- (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
- ib_ah_attr->grh.flow_label =
- be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
- memcpy(ib_ah_attr->grh.dgid.raw,
- path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(path->rlid));
+ rdma_ah_set_path_bits(ah_attr, path->grh_mylmc & 0x7f);
+ rdma_ah_set_static_rate(ah_attr,
+ path->static_rate ? path->static_rate - 5 : 0);
+ if (path->grh_mylmc & (1 << 7)) {
+ rdma_ah_set_grh(ah_attr, NULL,
+ be32_to_cpu(path->tclass_flowlabel) & 0xfffff,
+ path->mgid_index,
+ path->hop_limit,
+ (be32_to_cpu(path->tclass_flowlabel)
+ >> 20) & 0xff);
+ rdma_ah_set_dgid_raw(ah_attr, path->rgid);
}
}
@@ -3463,10 +3460,11 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
to_ib_qp_access_flags(be32_to_cpu(context.params2));
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
- to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
- to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
+ to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
+ to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
- qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
+ qp_attr->alt_port_num =
+ rdma_ah_get_port_num(&qp_attr->alt_ah_attr);
}
qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 7dd3f267f06b..0facaf5f6d23 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -122,7 +122,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
}
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
- ilog2(srq->umem->page_size), &srq->mtt);
+ srq->umem->page_shift, &srq->mtt);
if (err)
goto err_buf;
@@ -135,14 +135,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_mtt;
} else {
- err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL);
+ err = mlx4_db_alloc(dev->dev, &srq->db, 0);
if (err)
goto err_srq;
*srq->db.db = 0;
- if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf,
- GFP_KERNEL)) {
+ if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2,
+ &srq->buf)) {
err = -ENOMEM;
goto err_db;
}
@@ -167,7 +167,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL);
+ err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
if (err)
goto err_mtt;
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 69fb5ba94d0f..0ba5ba7540c8 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -226,6 +226,7 @@ static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
int ret = 0 ;
struct ib_port_attr attr;
+ memset(&attr, 0, sizeof(attr));
/* get the physical gid and pkey table sizes.*/
ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
if (ret)
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 7493a83acd28..90ad2adc752f 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
-mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o
+mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index d090e96f6f01..3363e29157f6 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -34,50 +34,51 @@
static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
struct mlx5_ib_ah *ah,
- struct ib_ah_attr *ah_attr,
- enum rdma_link_layer ll)
+ struct rdma_ah_attr *ah_attr)
{
- if (ah_attr->ah_flags & IB_AH_GRH) {
- memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
- ah->av.grh_gid_fl = cpu_to_be32(ah_attr->grh.flow_label |
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+
+ memcpy(ah->av.rgid, &grh->dgid, 16);
+ ah->av.grh_gid_fl = cpu_to_be32(grh->flow_label |
(1 << 30) |
- ah_attr->grh.sgid_index << 20);
- ah->av.hop_limit = ah_attr->grh.hop_limit;
- ah->av.tclass = ah_attr->grh.traffic_class;
+ grh->sgid_index << 20);
+ ah->av.hop_limit = grh->hop_limit;
+ ah->av.tclass = grh->traffic_class;
}
- ah->av.stat_rate_sl = (ah_attr->static_rate << 4);
+ ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);
- if (ll == IB_LINK_LAYER_ETHERNET) {
- memcpy(ah->av.rmac, ah_attr->dmac, sizeof(ah_attr->dmac));
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ memcpy(ah->av.rmac, ah_attr->roce.dmac,
+ sizeof(ah_attr->roce.dmac));
ah->av.udp_sport =
- mlx5_get_roce_udp_sport(dev,
- ah_attr->port_num,
- ah_attr->grh.sgid_index);
- ah->av.stat_rate_sl |= (ah_attr->sl & 0x7) << 1;
+ mlx5_get_roce_udp_sport(dev,
+ rdma_ah_get_port_num(ah_attr),
+ rdma_ah_read_grh(ah_attr)->sgid_index);
+ ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1;
} else {
- ah->av.rlid = cpu_to_be16(ah_attr->dlid);
- ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
- ah->av.stat_rate_sl |= (ah_attr->sl & 0xf);
+ ah->av.rlid = cpu_to_be16(rdma_ah_get_dlid(ah_attr));
+ ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f;
+ ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0xf);
}
return &ah->ibah;
}
-struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
struct mlx5_ib_ah *ah;
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- enum rdma_link_layer ll;
-
- ll = pd->device->get_link_layer(pd->device, ah_attr->port_num);
+ enum rdma_ah_attr_type ah_type = ah_attr->type;
- if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
+ if ((ah_type == RDMA_AH_ATTR_TYPE_ROCE) &&
+ !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
return ERR_PTR(-EINVAL);
- if (ll == IB_LINK_LAYER_ETHERNET && udata) {
+ if (ah_type == RDMA_AH_ATTR_TYPE_ROCE && udata) {
int err;
struct mlx5_ib_create_ah_resp resp = {};
u32 min_resp_len = offsetof(typeof(resp), dmac) +
@@ -92,7 +93,7 @@ struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
if (err)
return ERR_PTR(err);
- memcpy(resp.dmac, ah_attr->dmac, ETH_ALEN);
+ memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
return ERR_PTR(err);
@@ -102,28 +103,29 @@ struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
if (!ah)
return ERR_PTR(-ENOMEM);
- return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */
+ return create_ib_ah(dev, ah, ah_attr); /* never fails */
}
-int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct mlx5_ib_ah *ah = to_mah(ibah);
u32 tmp;
memset(ah_attr, 0, sizeof(*ah_attr));
+ ah_attr->type = ibah->type;
tmp = be32_to_cpu(ah->av.grh_gid_fl);
if (tmp & (1 << 30)) {
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.sgid_index = (tmp >> 20) & 0xff;
- ah_attr->grh.flow_label = tmp & 0xfffff;
- memcpy(&ah_attr->grh.dgid, ah->av.rgid, 16);
- ah_attr->grh.hop_limit = ah->av.hop_limit;
- ah_attr->grh.traffic_class = ah->av.tclass;
+ rdma_ah_set_grh(ah_attr, NULL,
+ tmp & 0xfffff,
+ (tmp >> 20) & 0xff,
+ ah->av.hop_limit,
+ ah->av.tclass);
+ rdma_ah_set_dgid_raw(ah_attr, ah->av.rgid);
}
- ah_attr->dlid = be16_to_cpu(ah->av.rlid);
- ah_attr->static_rate = ah->av.stat_rate_sl >> 4;
- ah_attr->sl = ah->av.stat_rate_sl & 0xf;
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(ah->av.rlid));
+ rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate_sl >> 4);
+ rdma_ah_set_sl(ah_attr, ah->av.stat_rate_sl & 0xf);
return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
new file mode 100644
index 000000000000..18d5e1db93ed
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "cmd.h"
+
+int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
+{
+ u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
+ int err;
+
+ MLX5_SET(query_special_contexts_in, in, opcode,
+ MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *null_mkey = MLX5_GET(query_special_contexts_out, out,
+ null_mkey);
+ return err;
+}
+
+int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
+ bool reset, void *out, int out_size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
+
+ MLX5_SET(query_cong_statistics_in, in, opcode,
+ MLX5_CMD_OP_QUERY_CONG_STATISTICS);
+ MLX5_SET(query_cong_statistics_in, in, clear, reset);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
+}
diff --git a/drivers/infiniband/hw/qedr/qedr_hsi.h b/drivers/infiniband/hw/mlx5/cmd.h
index 66d27521373f..fa09228193a6 100644
--- a/drivers/infiniband/hw/qedr/qedr_hsi.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -1,5 +1,5 @@
-/* QLogic qedr NIC Driver
- * Copyright (c) 2015-2016 QLogic Corporation
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -17,7 +17,7 @@
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
- * disclaimer in the documentation and /or other materials
+ * disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
@@ -29,28 +29,14 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef __QED_HSI_ROCE__
-#define __QED_HSI_ROCE__
-#include <linux/qed/common_hsi.h>
-#include <linux/qed/roce_common.h>
-#include "qedr_hsi_rdma.h"
+#ifndef MLX5_IB_CMD_H
+#define MLX5_IB_CMD_H
-/* Affiliated asynchronous events / errors enumeration */
-enum roce_async_events_type {
- ROCE_ASYNC_EVENT_NONE = 0,
- ROCE_ASYNC_EVENT_COMM_EST = 1,
- ROCE_ASYNC_EVENT_SQ_DRAINED,
- ROCE_ASYNC_EVENT_SRQ_LIMIT,
- ROCE_ASYNC_EVENT_LAST_WQE_REACHED,
- ROCE_ASYNC_EVENT_CQ_ERR,
- ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR,
- ROCE_ASYNC_EVENT_LOCAL_CATASTROPHIC_ERR,
- ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR,
- ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR,
- ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR,
- ROCE_ASYNC_EVENT_SRQ_EMPTY,
- MAX_ROCE_ASYNC_EVENTS_TYPE
-};
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
-#endif /* __QED_HSI_ROCE__ */
+int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
+int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
+ bool reset, void *out, int out_size);
+#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index b3ef47c3ab73..a384d72ea3cd 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -172,6 +172,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
struct mlx5_ib_srq *srq;
struct mlx5_ib_wq *wq;
u16 wqe_ctr;
+ u8 roce_packet_type;
+ bool vlan_present;
u8 g;
if (qp->ibqp.srq || qp->ibqp.xrcd) {
@@ -223,7 +225,6 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
break;
}
wc->slid = be16_to_cpu(cqe->slid);
- wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
wc->dlid_path_bits = cqe->ml_path;
g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
@@ -237,10 +238,22 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wc->pkey_index = 0;
}
- if (ll != IB_LINK_LAYER_ETHERNET)
+ if (ll != IB_LINK_LAYER_ETHERNET) {
+ wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
return;
+ }
+
+ vlan_present = cqe->l4_l3_hdr_type & 0x1;
+ roce_packet_type = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3;
+ if (vlan_present) {
+ wc->vlan_id = (be16_to_cpu(cqe->vlan_info)) & 0xfff;
+ wc->sl = (be16_to_cpu(cqe->vlan_info) >> 13) & 0x7;
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+ } else {
+ wc->sl = 0;
+ }
- switch (wc->sl & 0x3) {
+ switch (roce_packet_type) {
case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
wc->network_hdr_type = RDMA_NETWORK_IB;
break;
@@ -689,7 +702,7 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
struct mlx5_ib_cq *cq = to_mcq(ibcq);
- void __iomem *uar_page = mdev->priv.uuari.uars[0].map;
+ void __iomem *uar_page = mdev->priv.uar->map;
unsigned long irq_flags;
int ret = 0;
@@ -704,9 +717,7 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
mlx5_cq_arm(&cq->mcq,
(flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
- uar_page,
- MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock),
- to_mcq(ibcq)->mcq.cons_index);
+ uar_page, to_mcq(ibcq)->mcq.cons_index);
return ret;
}
@@ -777,7 +788,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
- *cqb = mlx5_vzalloc(*inlen);
+ *cqb = kvzalloc(*inlen, GFP_KERNEL);
if (!*cqb) {
err = -ENOMEM;
goto err_db;
@@ -790,7 +801,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
MLX5_SET(cqc, cqc, log_page_size,
page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- *index = to_mucontext(context)->uuari.uars[0].index;
+ *index = to_mucontext(context)->bfregi.sys_pages[0];
if (ucmd.cqe_comp_en == 1) {
if (unlikely((*cqe_size != 64) ||
@@ -820,7 +831,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
return 0;
err_cqb:
- kfree(cqb);
+ kfree(*cqb);
err_db:
mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
@@ -873,7 +884,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
- *cqb = mlx5_vzalloc(*inlen);
+ *cqb = kvzalloc(*inlen, GFP_KERNEL);
if (!*cqb) {
err = -ENOMEM;
goto err_buf;
@@ -886,7 +897,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
MLX5_SET(cqc, cqc, log_page_size,
cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- *index = dev->mdev->priv.uuari.uars[0].index;
+ *index = dev->mdev->priv.uar->index;
return 0;
@@ -1303,7 +1314,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto ex_resize;
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 39e58489dcc2..95db929bdc34 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -42,12 +42,24 @@ enum {
MLX5_IB_VENDOR_CLASS2 = 0xa
};
+static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num,
+ struct ib_mad *in_mad)
+{
+ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+ in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ return true;
+ return dev->mdev->port_caps[port_num - 1].has_smi;
+}
+
int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad)
{
u8 op_modifier = 0;
+ if (!can_do_mad_ifc(dev, port, (struct ib_mad *)in_mad))
+ return -EPERM;
+
/* Key check traps can't be generated unless we have in_wc to
* tell us where to send the trap.
*/
@@ -175,6 +187,8 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
port_xmit_discards);
MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_constraint_errors,
port_xmit_constraint_errors);
+ MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_wait,
+ port_xmit_wait);
MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_constraint_errors,
port_rcv_constraint_errors);
MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_overrun_errors,
@@ -204,7 +218,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
(struct ib_pma_portcounters_ext *)(out_mad->data + 40);
int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
- out_cnt = mlx5_vzalloc(sz);
+ out_cnt = kvzalloc(sz, GFP_KERNEL);
if (!out_cnt)
return IB_MAD_RESULT_FAILURE;
@@ -217,7 +231,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
(struct ib_pma_portcounters *)(out_mad->data + 40);
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
- out_cnt = mlx5_vzalloc(sz);
+ out_cnt = kvzalloc(sz, GFP_KERNEL);
if (!out_cnt)
return IB_MAD_RESULT_FAILURE;
@@ -515,7 +529,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
if (!in_mad || !out_mad)
goto out;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d566f6738833..a7f2e60085c4 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -41,6 +41,8 @@
#include <asm/pat.h>
#endif
#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
#include <linux/delay.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
@@ -53,24 +55,21 @@
#include <linux/in.h>
#include <linux/etherdevice.h>
#include <linux/mlx5/fs.h>
+#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
+#include "cmd.h"
#define DRIVER_NAME "mlx5_ib"
-#define DRIVER_VERSION "2.2-1"
-#define DRIVER_RELDATE "Feb 2014"
+#define DRIVER_VERSION "5.0-0"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRIVER_VERSION);
-static int deprecated_prof_sel = 2;
-module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
-MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
-
static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
- DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
+ DRIVER_VERSION "\n";
enum {
MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
@@ -166,15 +165,84 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
return ndev;
}
+static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+ u8 *active_width)
+{
+ switch (eth_proto_oper) {
+ case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
+ case MLX5E_PROT_MASK(MLX5E_1000BASE_KX):
+ case MLX5E_PROT_MASK(MLX5E_100BASE_TX):
+ case MLX5E_PROT_MASK(MLX5E_1000BASE_T):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_SDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_T):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_CX4):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_KX4):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_KR):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_CR):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_SR):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_ER):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_QDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_25GBASE_CR):
+ case MLX5E_PROT_MASK(MLX5E_25GBASE_KR):
+ case MLX5E_PROT_MASK(MLX5E_25GBASE_SR):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_EDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_CR4):
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_KR4):
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_SR4):
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_LR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_QDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_50GBASE_CR2):
+ case MLX5E_PROT_MASK(MLX5E_50GBASE_KR2):
+ case MLX5E_PROT_MASK(MLX5E_50GBASE_SR2):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_HDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_56GBASE_R4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_FDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_CR4):
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_SR4):
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_KR4):
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_LR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_EDR;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_core_dev *mdev = dev->mdev;
struct net_device *ndev, *upper;
enum ib_mtu ndev_ib_mtu;
u16 qkey_viol_cntr;
+ u32 eth_prot_oper;
+ int err;
- memset(props, 0, sizeof(*props));
+ /* Possible bad flows are checked before filling out props so in case
+ * of an error it will still be zeroed out.
+ */
+ err = mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper, port_num);
+ if (err)
+ return err;
+
+ translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
+ &props->active_width);
props->port_cap_flags |= IB_PORT_CM_SUP;
props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
@@ -215,80 +283,49 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
dev_put(ndev);
props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
-
- props->active_width = IB_WIDTH_4X; /* TODO */
- props->active_speed = IB_SPEED_QDR; /* TODO */
-
return 0;
}
-static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- void *mlx5_addr)
+static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
{
-#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
- char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
- source_l3_address);
- void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
- source_mac_47_32);
-
- if (!gid)
- return;
+ enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ u8 roce_version = 0;
+ u8 roce_l3_type = 0;
+ bool vlan = false;
+ u8 mac[ETH_ALEN];
+ u16 vlan_id = 0;
- ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
+ if (gid) {
+ gid_type = attr->gid_type;
+ ether_addr_copy(mac, attr->ndev->dev_addr);
- if (is_vlan_dev(attr->ndev)) {
- MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
- MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
+ if (is_vlan_dev(attr->ndev)) {
+ vlan = true;
+ vlan_id = vlan_dev_vlan_id(attr->ndev);
+ }
}
- switch (attr->gid_type) {
+ switch (gid_type) {
case IB_GID_TYPE_IB:
- MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
+ roce_version = MLX5_ROCE_VERSION_1;
break;
case IB_GID_TYPE_ROCE_UDP_ENCAP:
- MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
+ roce_version = MLX5_ROCE_VERSION_2;
+ if (ipv6_addr_v4mapped((void *)gid))
+ roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4;
+ else
+ roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6;
break;
default:
- WARN_ON(true);
- }
-
- if (attr->gid_type != IB_GID_TYPE_IB) {
- if (ipv6_addr_v4mapped((void *)gid))
- MLX5_SET_RA(mlx5_addr, roce_l3_type,
- MLX5_ROCE_L3_TYPE_IPV4);
- else
- MLX5_SET_RA(mlx5_addr, roce_l3_type,
- MLX5_ROCE_L3_TYPE_IPV6);
+ mlx5_ib_warn(dev, "Unexpected GID type %u\n", gid_type);
}
- if ((attr->gid_type == IB_GID_TYPE_IB) ||
- !ipv6_addr_v4mapped((void *)gid))
- memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
- else
- memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
-}
-
-static int set_roce_addr(struct ib_device *device, u8 port_num,
- unsigned int index,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr)
-{
- struct mlx5_ib_dev *dev = to_mdev(device);
- u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
- void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
- enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
-
- if (ll != IB_LINK_LAYER_ETHERNET)
- return -EINVAL;
-
- ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
-
- MLX5_SET(set_roce_address_in, in, roce_address_index, index);
- MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
- return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+ return mlx5_core_roce_gid_set(dev->mdev, index, roce_version,
+ roce_l3_type, gid->raw, mac, vlan,
+ vlan_id);
}
static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
@@ -296,13 +333,13 @@ static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
const struct ib_gid_attr *attr,
__always_unused void **context)
{
- return set_roce_addr(device, port_num, index, gid, attr);
+ return set_roce_addr(to_mdev(device), port_num, index, gid, attr);
}
static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
unsigned int index, __always_unused void **context)
{
- return set_roce_addr(device, port_num, index, NULL, NULL);
+ return set_roce_addr(to_mdev(device), port_num, index, NULL, NULL);
}
__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
@@ -325,6 +362,27 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
}
+int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
+ int index, enum ib_gid_type *gid_type)
+{
+ struct ib_gid_attr attr;
+ union ib_gid gid;
+ int ret;
+
+ ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
+ if (ret)
+ return ret;
+
+ if (!attr.ndev)
+ return -ENODEV;
+
+ dev_put(attr.ndev);
+
+ *gid_type = attr.gid_type;
+
+ return 0;
+}
+
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
@@ -357,7 +415,7 @@ static void get_atomic_caps(struct mlx5_ib_dev *dev,
u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
u8 atomic_req_8B_endianness_mode =
- MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
+ MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianness_mode);
/* Check if HW supports 8 bytes standard atomic operations and capable
* of host endianness respond
@@ -564,8 +622,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
- if (MLX5_CAP_ETH(mdev, csum_cap))
+ if (MLX5_CAP_ETH(mdev, csum_cap)) {
+ /* Legacy bit to support old userspace libraries */
props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM;
+ }
+
+ if (MLX5_CAP_ETH(dev->mdev, vlan_cap))
+ props->raw_packet_caps |=
+ IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
@@ -604,8 +669,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
- MLX5_CAP_ETH(dev->mdev, scatter_fcs))
+ MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
+ /* Legacy bit to support old userspace libraries */
props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
+ }
if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
@@ -672,17 +740,6 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
}
- if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
- uhw->outlen)) {
- resp.mlx5_ib_support_multi_pkt_send_wqes =
- MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
- resp.response_length +=
- sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
- }
-
- if (field_avail(typeof(resp), reserved, uhw->outlen))
- resp.response_length += sizeof(resp.reserved);
-
if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
resp.cqe_comp_caps.max_num =
MLX5_CAP_GEN(dev->mdev, cqe_compression) ?
@@ -706,6 +763,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
resp.response_length += sizeof(resp.packet_pacing_caps);
}
+ if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
+ uhw->outlen)) {
+ resp.mlx5_ib_support_multi_pkt_send_wqes =
+ MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
+ resp.response_length +=
+ sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
+ }
+
+ if (field_avail(typeof(resp), reserved, uhw->outlen))
+ resp.response_length += sizeof(resp.reserved);
+
if (uhw->outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
@@ -830,7 +898,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
goto out;
}
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
if (err)
@@ -886,19 +954,31 @@ out:
int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
+ unsigned int count;
+ int ret;
+
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
- return mlx5_query_mad_ifc_port(ibdev, port, props);
+ ret = mlx5_query_mad_ifc_port(ibdev, port, props);
+ break;
case MLX5_VPORT_ACCESS_METHOD_HCA:
- return mlx5_query_hca_port(ibdev, port, props);
+ ret = mlx5_query_hca_port(ibdev, port, props);
+ break;
case MLX5_VPORT_ACCESS_METHOD_NIC:
- return mlx5_query_port_roce(ibdev, port, props);
+ ret = mlx5_query_port_roce(ibdev, port, props);
+ break;
default:
- return -EINVAL;
+ ret = -EINVAL;
+ }
+
+ if (!ret && props) {
+ count = mlx5_core_reserved_gids_count(to_mdev(ibdev)->mdev);
+ props->gid_tbl_len -= count;
}
+ return ret;
}
static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
@@ -968,6 +1048,31 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
return err;
}
+static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
+ u32 value)
+{
+ struct mlx5_hca_vport_context ctx = {};
+ int err;
+
+ err = mlx5_query_hca_vport_context(dev->mdev, 0,
+ port_num, 0, &ctx);
+ if (err)
+ return err;
+
+ if (~ctx.cap_mask1_perm & mask) {
+ mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n",
+ mask, ctx.cap_mask1_perm);
+ return -EINVAL;
+ }
+
+ ctx.cap_mask1 = value;
+ ctx.cap_mask1_perm = mask;
+ err = mlx5_core_modify_hca_vport_context(dev->mdev, 0,
+ port_num, 0, &ctx);
+
+ return err;
+}
+
static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
@@ -975,10 +1080,20 @@ static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_attr attr;
u32 tmp;
int err;
+ u32 change_mask;
+ u32 value;
+ bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) ==
+ IB_LINK_LAYER_INFINIBAND);
+
+ if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) {
+ change_mask = props->clr_port_cap_mask | props->set_port_cap_mask;
+ value = ~props->clr_port_cap_mask | props->set_port_cap_mask;
+ return set_port_caps_atomic(dev, port, change_mask, value);
+ }
mutex_lock(&dev->cap_mask_mutex);
- err = mlx5_ib_query_port(ibdev, port, &attr);
+ err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
@@ -992,6 +1107,86 @@ out:
return err;
}
+static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps)
+{
+ mlx5_ib_dbg(dev, "MLX5_LIB_CAP_4K_UAR = %s\n",
+ caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n");
+}
+
+static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
+ struct mlx5_ib_alloc_ucontext_req_v2 *req,
+ u32 *num_sys_pages)
+{
+ int uars_per_sys_page;
+ int bfregs_per_sys_page;
+ int ref_bfregs = req->total_num_bfregs;
+
+ if (req->total_num_bfregs == 0)
+ return -EINVAL;
+
+ BUILD_BUG_ON(MLX5_MAX_BFREGS % MLX5_NON_FP_BFREGS_IN_PAGE);
+ BUILD_BUG_ON(MLX5_MAX_BFREGS < MLX5_NON_FP_BFREGS_IN_PAGE);
+
+ if (req->total_num_bfregs > MLX5_MAX_BFREGS)
+ return -ENOMEM;
+
+ uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k);
+ bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR;
+ req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page);
+ *num_sys_pages = req->total_num_bfregs / bfregs_per_sys_page;
+
+ if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
+ return -EINVAL;
+
+ mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, alloated %d, using %d sys pages\n",
+ MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no",
+ lib_uar_4k ? "yes" : "no", ref_bfregs,
+ req->total_num_bfregs, *num_sys_pages);
+
+ return 0;
+}
+
+static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
+{
+ struct mlx5_bfreg_info *bfregi;
+ int err;
+ int i;
+
+ bfregi = &context->bfregi;
+ for (i = 0; i < bfregi->num_sys_pages; i++) {
+ err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]);
+ if (err)
+ goto error;
+
+ mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]);
+ }
+ return 0;
+
+error:
+ for (--i; i >= 0; i--)
+ if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]))
+ mlx5_ib_warn(dev, "failed to free uar %d\n", i);
+
+ return err;
+}
+
+static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
+{
+ struct mlx5_bfreg_info *bfregi;
+ int err;
+ int i;
+
+ bfregi = &context->bfregi;
+ for (i = 0; i < bfregi->num_sys_pages; i++) {
+ err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
+ if (err) {
+ mlx5_ib_warn(dev, "failed to free uar %d\n", i);
+ return err;
+ }
+ }
+ return 0;
+}
+
static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
@@ -999,17 +1194,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct mlx5_ib_alloc_ucontext_req_v2 req = {};
struct mlx5_ib_alloc_ucontext_resp resp = {};
struct mlx5_ib_ucontext *context;
- struct mlx5_uuar_info *uuari;
- struct mlx5_uar *uars;
- int gross_uuars;
- int num_uars;
+ struct mlx5_bfreg_info *bfregi;
int ver;
- int uuarn;
int err;
- int i;
size_t reqlen;
size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
max_cqe_version);
+ bool lib_uar_4k;
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
@@ -1032,27 +1223,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (req.flags)
return ERR_PTR(-EINVAL);
- if (req.total_num_uuars > MLX5_MAX_UUARS)
- return ERR_PTR(-ENOMEM);
-
- if (req.total_num_uuars == 0)
- return ERR_PTR(-EINVAL);
-
if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
return ERR_PTR(-EOPNOTSUPP);
- if (reqlen > sizeof(req) &&
- !ib_is_udata_cleared(udata, sizeof(req),
- reqlen - sizeof(req)))
- return ERR_PTR(-EOPNOTSUPP);
-
- req.total_num_uuars = ALIGN(req.total_num_uuars,
- MLX5_NON_FP_BF_REGS_PER_PAGE);
- if (req.num_low_latency_uuars > req.total_num_uuars - 1)
+ req.total_num_bfregs = ALIGN(req.total_num_bfregs,
+ MLX5_NON_FP_BFREGS_PER_UAR);
+ if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
return ERR_PTR(-EINVAL);
- num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
- gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
@@ -1065,6 +1243,10 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.cqe_version = min_t(__u8,
(__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
req.max_cqe_version);
+ resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
+ resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1;
resp.response_length = min(offsetof(typeof(resp), response_length) +
sizeof(resp.response_length), udata->outlen);
@@ -1072,58 +1254,58 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (!context)
return ERR_PTR(-ENOMEM);
- uuari = &context->uuari;
- mutex_init(&uuari->lock);
- uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
- if (!uars) {
- err = -ENOMEM;
+ lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
+ bfregi = &context->bfregi;
+
+ /* updates req->total_num_bfregs */
+ err = calc_total_bfregs(dev, lib_uar_4k, &req, &bfregi->num_sys_pages);
+ if (err)
goto out_ctx;
- }
- uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
- sizeof(*uuari->bitmap),
+ mutex_init(&bfregi->lock);
+ bfregi->lib_uar_4k = lib_uar_4k;
+ bfregi->count = kcalloc(req.total_num_bfregs, sizeof(*bfregi->count),
GFP_KERNEL);
- if (!uuari->bitmap) {
+ if (!bfregi->count) {
err = -ENOMEM;
- goto out_uar_ctx;
- }
- /*
- * clear all fast path uuars
- */
- for (i = 0; i < gross_uuars; i++) {
- uuarn = i & 3;
- if (uuarn == 2 || uuarn == 3)
- set_bit(i, uuari->bitmap);
+ goto out_ctx;
}
- uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
- if (!uuari->count) {
+ bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
+ sizeof(*bfregi->sys_pages),
+ GFP_KERNEL);
+ if (!bfregi->sys_pages) {
err = -ENOMEM;
- goto out_bitmap;
+ goto out_count;
}
- for (i = 0; i < num_uars; i++) {
- err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
- if (err)
- goto out_count;
- }
+ err = allocate_uars(dev, context);
+ if (err)
+ goto out_sys_pages;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
+ context->upd_xlt_page = __get_free_page(GFP_KERNEL);
+ if (!context->upd_xlt_page) {
+ err = -ENOMEM;
+ goto out_uars;
+ }
+ mutex_init(&context->upd_xlt_page_mutex);
+
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
err = mlx5_core_alloc_transport_domain(dev->mdev,
&context->tdn);
if (err)
- goto out_uars;
+ goto out_page;
}
INIT_LIST_HEAD(&context->vma_private_list);
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
- resp.tot_uuars = req.total_num_uuars;
+ resp.tot_bfregs = req.total_num_bfregs;
resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
if (field_avail(typeof(resp), cqe_version, udata->outlen))
@@ -1135,32 +1317,46 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.response_length += sizeof(resp.cmds_supp_uhw);
}
+ if (field_avail(typeof(resp), eth_min_inline, udata->outlen)) {
+ if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
+ mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline);
+ resp.eth_min_inline++;
+ }
+ resp.response_length += sizeof(resp.eth_min_inline);
+ }
+
/*
* We don't want to expose information from the PCI bar that is located
* after 4096 bytes, so if the arch only supports larger pages, let's
* pretend we don't support reading the HCA's core clock. This is also
* forced by mmap function.
*/
- if (PAGE_SIZE <= 4096 &&
- field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
- resp.comp_mask |=
- MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
- resp.hca_core_clock_offset =
- offsetof(struct mlx5_init_seg, internal_timer_h) %
- PAGE_SIZE;
+ if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
+ if (PAGE_SIZE <= 4096) {
+ resp.comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+ resp.hca_core_clock_offset =
+ offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
+ }
resp.response_length += sizeof(resp.hca_core_clock_offset) +
sizeof(resp.reserved2);
}
+ if (field_avail(typeof(resp), log_uar_size, udata->outlen))
+ resp.response_length += sizeof(resp.log_uar_size);
+
+ if (field_avail(typeof(resp), num_uars_per_page, udata->outlen))
+ resp.response_length += sizeof(resp.num_uars_per_page);
+
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto out_td;
- uuari->ver = ver;
- uuari->num_low_latency_uuars = req.num_low_latency_uuars;
- uuari->uars = uars;
- uuari->num_uars = num_uars;
+ bfregi->ver = ver;
+ bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
context->cqe_version = resp.cqe_version;
+ context->lib_caps = req.lib_caps;
+ print_lib_caps(dev, context->lib_caps);
return &context->ibucontext;
@@ -1168,20 +1364,21 @@ out_td:
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+out_page:
+ free_page(context->upd_xlt_page);
+
out_uars:
- for (i--; i >= 0; i--)
- mlx5_cmd_free_uar(dev->mdev, uars[i].index);
-out_count:
- kfree(uuari->count);
+ deallocate_uars(dev, context);
-out_bitmap:
- kfree(uuari->bitmap);
+out_sys_pages:
+ kfree(bfregi->sys_pages);
-out_uar_ctx:
- kfree(uars);
+out_count:
+ kfree(bfregi->count);
out_ctx:
kfree(context);
+
return ERR_PTR(err);
}
@@ -1189,28 +1386,31 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
- struct mlx5_uuar_info *uuari = &context->uuari;
- int i;
+ struct mlx5_bfreg_info *bfregi;
+ bfregi = &context->bfregi;
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
- for (i = 0; i < uuari->num_uars; i++) {
- if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
- mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
- }
-
- kfree(uuari->count);
- kfree(uuari->bitmap);
- kfree(uuari->uars);
+ free_page(context->upd_xlt_page);
+ deallocate_uars(dev, context);
+ kfree(bfregi->sys_pages);
+ kfree(bfregi->count);
kfree(context);
return 0;
}
-static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
+static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi,
+ int idx)
{
- return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
+ int fw_uars_per_page;
+
+ fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
+
+ return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) +
+ bfregi->sys_pages[idx] / fw_uars_per_page;
}
static int get_command(unsigned long offset)
@@ -1328,7 +1528,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
/* need to protect from a race on closing the vma as part of
* mlx5_ib_vma_close.
*/
- down_read(&owning_mm->mmap_sem);
+ down_write(&owning_mm->mmap_sem);
list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
list) {
vma = vma_private->vma;
@@ -1338,11 +1538,12 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
/* context going to be destroyed, should
* not access ops any more.
*/
+ vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
vma->vm_ops = NULL;
list_del(&vma_private->list);
kfree(vma_private);
}
- up_read(&owning_mm->mmap_sem);
+ up_write(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);
}
@@ -1365,11 +1566,23 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
struct vm_area_struct *vma,
struct mlx5_ib_ucontext *context)
{
- struct mlx5_uuar_info *uuari = &context->uuari;
+ struct mlx5_bfreg_info *bfregi = &context->bfregi;
int err;
unsigned long idx;
phys_addr_t pfn, pa;
pgprot_t prot;
+ int uars_per_page;
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k);
+ idx = get_index(vma->vm_pgoff);
+ if (idx % uars_per_page ||
+ idx * uars_per_page >= bfregi->num_sys_pages) {
+ mlx5_ib_warn(dev, "invalid uar index %lu\n", idx);
+ return -EINVAL;
+ }
switch (cmd) {
case MLX5_IB_MMAP_WC_PAGE:
@@ -1392,14 +1605,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
return -EINVAL;
}
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
- return -EINVAL;
-
- idx = get_index(vma->vm_pgoff);
- if (idx >= uuari->num_uars)
- return -EINVAL;
-
- pfn = uar_index2pfn(dev, uuari->uars[idx].index);
+ pfn = uar_index2pfn(dev, bfregi, idx);
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
vma->vm_page_prot = prot;
@@ -1570,6 +1776,8 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
#define LAST_IPV6_FIELD traffic_class
#define LAST_TCP_UDP_FIELD src_port
#define LAST_TUNNEL_FIELD tunnel_id
+#define LAST_FLOW_TAG_FIELD tag_id
+#define LAST_DROP_FIELD size
/* Field is the last supported field */
#define FIELDS_NOT_SUPPORTED(filter, field)\
@@ -1579,8 +1787,11 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
offsetof(typeof(filter), field) -\
sizeof(filter.field))
-static int parse_flow_attr(u32 *match_c, u32 *match_v,
- const union ib_flow_spec *ib_spec)
+#define IPV4_VERSION 4
+#define IPV6_VERSION 6
+static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
+ u32 *match_v, const union ib_flow_spec *ib_spec,
+ u32 *tag_id, bool *is_drop)
{
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
@@ -1588,23 +1799,28 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
misc_parameters);
void *headers_c;
void *headers_v;
+ int match_ipv;
if (ib_spec->type & IB_FLOW_SPEC_INNER) {
headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
inner_headers);
headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
inner_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version);
} else {
headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
outer_headers);
headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
outer_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
}
switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
case IB_FLOW_SPEC_ETH:
if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dmac_47_16),
@@ -1622,9 +1838,9 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
if (ib_spec->eth.mask.vlan_tag) {
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- vlan_tag, 1);
+ cvlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- vlan_tag, 1);
+ cvlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
@@ -1652,12 +1868,19 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
break;
case IB_FLOW_SPEC_IPV4:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IP);
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, IPV4_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IP);
+ }
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv4_layout.ipv4),
@@ -1684,12 +1907,19 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
break;
case IB_FLOW_SPEC_IPV6:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IPV6);
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, IPV6_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IPV6);
+ }
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
@@ -1725,7 +1955,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
case IB_FLOW_SPEC_TCP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
@@ -1745,7 +1975,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
case IB_FLOW_SPEC_UDP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
@@ -1765,13 +1995,28 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
case IB_FLOW_SPEC_VXLAN_TUNNEL:
if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
LAST_TUNNEL_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
ntohl(ib_spec->tunnel.mask.tunnel_id));
MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
ntohl(ib_spec->tunnel.val.tunnel_id));
break;
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
+ LAST_FLOW_TAG_FIELD))
+ return -EOPNOTSUPP;
+ if (ib_spec->flow_tag.tag_id >= BIT(24))
+ return -EINVAL;
+
+ *tag_id = ib_spec->flow_tag.tag_id;
+ break;
+ case IB_FLOW_SPEC_ACTION_DROP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
+ LAST_DROP_FIELD))
+ return -EOPNOTSUPP;
+ *is_drop = true;
+ break;
default:
return -EINVAL;
}
@@ -1802,26 +2047,60 @@ static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
is_multicast_ether_addr(eth_spec->val.dst_mac);
}
-static bool is_valid_attr(const struct ib_flow_attr *flow_attr)
+static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr,
+ bool check_inner)
{
union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
- bool has_ipv4_spec = false;
- bool eth_type_ipv4 = true;
+ int match_ipv = check_inner ?
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
+ int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
+ bool ipv4_spec_valid, ipv6_spec_valid;
+ unsigned int ip_spec_type = 0;
+ bool has_ethertype = false;
unsigned int spec_index;
+ bool mask_valid = true;
+ u16 eth_type = 0;
+ bool type_valid;
/* Validate that ethertype is correct */
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- if (ib_spec->type == IB_FLOW_SPEC_ETH &&
+ if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
ib_spec->eth.mask.ether_type) {
- if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) &&
- ib_spec->eth.val.ether_type == htons(ETH_P_IP)))
- eth_type_ipv4 = false;
- } else if (ib_spec->type == IB_FLOW_SPEC_IPV4) {
- has_ipv4_spec = true;
+ mask_valid = (ib_spec->eth.mask.ether_type ==
+ htons(0xffff));
+ has_ethertype = true;
+ eth_type = ntohs(ib_spec->eth.val.ether_type);
+ } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
+ (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
+ ip_spec_type = ib_spec->type;
}
ib_spec = (void *)ib_spec + ib_spec->size;
}
- return !has_ipv4_spec || eth_type_ipv4;
+
+ type_valid = (!has_ethertype) || (!ip_spec_type);
+ if (!type_valid && mask_valid) {
+ ipv4_spec_valid = (eth_type == ETH_P_IP) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
+ ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
+
+ type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
+ (((eth_type == ETH_P_MPLS_UC) ||
+ (eth_type == ETH_P_MPLS_MC)) && match_ipv);
+ }
+
+ return type_valid;
+}
+
+static bool is_valid_attr(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr)
+{
+ return is_valid_ethertype(mdev, flow_attr, false) &&
+ is_valid_ethertype(mdev, flow_attr, true);
}
static void put_flow_table(struct mlx5_ib_dev *dev,
@@ -1873,8 +2152,8 @@ enum flow_table_type {
MLX5_IB_FT_TX
};
-#define MLX5_FS_MAX_TYPES 10
-#define MLX5_FS_MAX_ENTRIES 32000UL
+#define MLX5_FS_MAX_TYPES 6
+#define MLX5_FS_MAX_ENTRIES BIT(16)
static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
struct ib_flow_attr *flow_attr,
enum flow_table_type ft_type)
@@ -1883,11 +2162,14 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio;
struct mlx5_flow_table *ft;
+ int max_table_size;
int num_entries;
int num_groups;
int priority;
int err = 0;
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ log_max_ft_size));
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
if (flow_is_multicast_only(flow_attr) &&
!dont_trap)
@@ -1926,6 +2208,9 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
if (!ns)
return ERR_PTR(-ENOTSUPP);
+ if (num_entries > max_table_size)
+ return ERR_PTR(-ENOMEM);
+
ft = prio->flow_table;
if (!ft) {
ft = mlx5_create_auto_grouped_flow_table(ns, priority,
@@ -1953,14 +2238,18 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_handler *handler;
struct mlx5_flow_act flow_act = {0};
struct mlx5_flow_spec *spec;
+ struct mlx5_flow_destination *rule_dst = dst;
const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
unsigned int spec_index;
+ u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ bool is_drop = false;
int err = 0;
+ int dest_num = 1;
- if (!is_valid_attr(flow_attr))
+ if (!is_valid_attr(dev->mdev, flow_attr))
return ERR_PTR(-EINVAL);
- spec = mlx5_vzalloc(sizeof(*spec));
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
handler = kzalloc(sizeof(*handler), GFP_KERNEL);
if (!handler || !spec) {
err = -ENOMEM;
@@ -1970,8 +2259,9 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
INIT_LIST_HEAD(&handler->list);
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- err = parse_flow_attr(spec->match_criteria,
- spec->match_value, ib_flow);
+ err = parse_flow_attr(dev->mdev, spec->match_criteria,
+ spec->match_value,
+ ib_flow, &flow_tag, &is_drop);
if (err < 0)
goto free;
@@ -1979,12 +2269,27 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
}
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
- flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
- MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
- flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ if (is_drop) {
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ rule_dst = NULL;
+ dest_num = 0;
+ } else {
+ flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ }
+
+ if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+ mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
+ flow_tag, flow_attr->type);
+ err = -EINVAL;
+ goto free;
+ }
+ flow_act.flow_tag = flow_tag;
handler->rule = mlx5_add_flow_rules(ft, spec,
&flow_act,
- dst, 1);
+ rule_dst, dest_num);
if (IS_ERR(handler->rule)) {
err = PTR_ERR(handler->rule);
@@ -2140,7 +2445,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
int err;
if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
- return ERR_PTR(-ENOSPC);
+ return ERR_PTR(-ENOMEM);
if (domain != IB_FLOW_DOMAIN_USER ||
flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
@@ -2451,6 +2756,35 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
ibdev->ib_active = false;
}
+static int set_has_smi_cap(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_hca_vport_context vport_ctx;
+ int err;
+ int port;
+
+ for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
+ dev->mdev->port_caps[port - 1].has_smi = false;
+ if (MLX5_CAP_GEN(dev->mdev, port_type) ==
+ MLX5_CAP_PORT_TYPE_IB) {
+ if (MLX5_CAP_GEN(dev->mdev, ib_virt)) {
+ err = mlx5_query_hca_vport_context(dev->mdev, 0,
+ port, 0,
+ &vport_ctx);
+ if (err) {
+ mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
+ port, err);
+ return err;
+ }
+ dev->mdev->port_caps[port - 1].has_smi =
+ vport_ctx.has_smi;
+ } else {
+ dev->mdev->port_caps[port - 1].has_smi = true;
+ }
+ }
+ }
+ return 0;
+}
+
static void get_ext_port_caps(struct mlx5_ib_dev *dev)
{
int port;
@@ -2475,6 +2809,10 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
if (!dprops)
goto out;
+ err = set_has_smi_cap(dev);
+ if (err)
+ goto out;
+
err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
if (err) {
mlx5_ib_warn(dev, "query_device failed %d\n", err);
@@ -2482,6 +2820,7 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
}
for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
+ memset(pprops, 0, sizeof(*pprops));
err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
if (err) {
mlx5_ib_warn(dev, "query_port %d failed %d\n",
@@ -2626,6 +2965,18 @@ error_0:
return ret;
}
+static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
+{
+ switch (umr_fence_cap) {
+ case MLX5_CAP_UMR_FENCE_NONE:
+ return MLX5_FENCE_MODE_NONE;
+ case MLX5_CAP_UMR_FENCE_SMALL:
+ return MLX5_FENCE_MODE_INITIATOR_SMALL;
+ default:
+ return MLX5_FENCE_MODE_STRONG_ORDERING;
+ }
+}
+
static int create_dev_resources(struct mlx5_ib_resources *devr)
{
struct ib_srq_init_attr attr;
@@ -2776,11 +3127,13 @@ static u32 get_core_cap_flags(struct ib_device *ibdev)
if (ll == IB_LINK_LAYER_INFINIBAND)
return RDMA_CORE_PORT_IBA_IB;
+ ret = RDMA_CORE_PORT_RAW_PACKET;
+
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
- return 0;
+ return ret;
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
- return 0;
+ return ret;
if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
ret |= RDMA_CORE_PORT_IBA_ROCE;
@@ -2799,7 +3152,9 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
int err;
- err = mlx5_ib_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = get_core_cap_flags(ibdev);
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
@@ -2920,29 +3275,148 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
-static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
+struct mlx5_ib_counter {
+ const char *name;
+ size_t offset;
+};
+
+#define INIT_Q_COUNTER(_name) \
+ { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
+
+static const struct mlx5_ib_counter basic_q_cnts[] = {
+ INIT_Q_COUNTER(rx_write_requests),
+ INIT_Q_COUNTER(rx_read_requests),
+ INIT_Q_COUNTER(rx_atomic_requests),
+ INIT_Q_COUNTER(out_of_buffer),
+};
+
+static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
+ INIT_Q_COUNTER(out_of_sequence),
+};
+
+static const struct mlx5_ib_counter retrans_q_cnts[] = {
+ INIT_Q_COUNTER(duplicate_request),
+ INIT_Q_COUNTER(rnr_nak_retry_err),
+ INIT_Q_COUNTER(packet_seq_err),
+ INIT_Q_COUNTER(implied_nak_seq_err),
+ INIT_Q_COUNTER(local_ack_timeout_err),
+};
+
+#define INIT_CONG_COUNTER(_name) \
+ { .name = #_name, .offset = \
+ MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
+
+static const struct mlx5_ib_counter cong_cnts[] = {
+ INIT_CONG_COUNTER(rp_cnp_ignored),
+ INIT_CONG_COUNTER(rp_cnp_handled),
+ INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
+ INIT_CONG_COUNTER(np_cnp_sent),
+};
+
+static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
unsigned int i;
- for (i = 0; i < dev->num_ports; i++)
+ for (i = 0; i < dev->num_ports; i++) {
mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].q_cnt_id);
+ dev->port[i].cnts.set_id);
+ kfree(dev->port[i].cnts.names);
+ kfree(dev->port[i].cnts.offsets);
+ }
}
-static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
+static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_counters *cnts)
+{
+ u32 num_counters;
+
+ num_counters = ARRAY_SIZE(basic_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
+ num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
+ num_counters += ARRAY_SIZE(retrans_q_cnts);
+ cnts->num_q_counters = num_counters;
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
+ num_counters += ARRAY_SIZE(cong_cnts);
+ }
+
+ cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
+ if (!cnts->names)
+ return -ENOMEM;
+
+ cnts->offsets = kcalloc(num_counters,
+ sizeof(cnts->offsets), GFP_KERNEL);
+ if (!cnts->offsets)
+ goto err_names;
+
+ return 0;
+
+err_names:
+ kfree(cnts->names);
+ return -ENOMEM;
+}
+
+static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
+ const char **names,
+ size_t *offsets)
+{
+ int i;
+ int j = 0;
+
+ for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
+ names[j] = basic_q_cnts[i].name;
+ offsets[j] = basic_q_cnts[i].offset;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
+ for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
+ names[j] = out_of_seq_q_cnts[i].name;
+ offsets[j] = out_of_seq_q_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+ for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
+ names[j] = retrans_q_cnts[i].name;
+ offsets[j] = retrans_q_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
+ names[j] = cong_cnts[i].name;
+ offsets[j] = cong_cnts[i].offset;
+ }
+ }
+}
+
+static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
int i;
int ret;
for (i = 0; i < dev->num_ports; i++) {
+ struct mlx5_ib_port *port = &dev->port[i];
+
ret = mlx5_core_alloc_q_counter(dev->mdev,
- &dev->port[i].q_cnt_id);
+ &port->cnts.set_id);
if (ret) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d, err %d\n",
i + 1, ret);
goto dealloc_counters;
}
+
+ ret = __mlx5_ib_alloc_counters(dev, &port->cnts);
+ if (ret)
+ goto dealloc_counters;
+
+ mlx5_ib_fill_counters(dev, port->cnts.names,
+ port->cnts.offsets);
}
return 0;
@@ -2950,81 +3424,136 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
dealloc_counters:
while (--i >= 0)
mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].q_cnt_id);
+ dev->port[i].cnts.set_id);
return ret;
}
-static const char * const names[] = {
- "rx_write_requests",
- "rx_read_requests",
- "rx_atomic_requests",
- "out_of_buffer",
- "out_of_sequence",
- "duplicate_request",
- "rnr_nak_retry_err",
- "packet_seq_err",
- "implied_nak_seq_err",
- "local_ack_timeout_err",
-};
-
-static const size_t stats_offsets[] = {
- MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
- MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
- MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
- MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
- MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
- MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
- MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
- MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
- MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
- MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
-};
-
static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
u8 port_num)
{
- BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets));
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_port *port = &dev->port[port_num - 1];
/* We support only per port stats */
if (port_num == 0)
return NULL;
- return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names),
+ return rdma_alloc_hw_stats_struct(port->cnts.names,
+ port->cnts.num_q_counters +
+ port->cnts.num_cong_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
-static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
- struct rdma_hw_stats *stats,
- u8 port, int index)
+static int mlx5_ib_query_q_counters(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_port *port,
+ struct rdma_hw_stats *stats)
{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
void *out;
__be32 val;
- int ret;
- int i;
+ int ret, i;
- if (!port || !stats)
- return -ENOSYS;
-
- out = mlx5_vzalloc(outlen);
+ out = kvzalloc(outlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
ret = mlx5_core_query_q_counter(dev->mdev,
- dev->port[port - 1].q_cnt_id, 0,
+ port->cnts.set_id, 0,
out, outlen);
if (ret)
goto free;
- for (i = 0; i < ARRAY_SIZE(names); i++) {
- val = *(__be32 *)(out + stats_offsets[i]);
+ for (i = 0; i < port->cnts.num_q_counters; i++) {
+ val = *(__be32 *)(out + port->cnts.offsets[i]);
stats->value[i] = (u64)be32_to_cpu(val);
}
+
free:
kvfree(out);
- return ARRAY_SIZE(names);
+ return ret;
+}
+
+static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_port *port,
+ struct rdma_hw_stats *stats)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
+ void *out;
+ int ret, i;
+ int offset = port->cnts.num_q_counters;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen);
+ if (ret)
+ goto free;
+
+ for (i = 0; i < port->cnts.num_cong_counters; i++) {
+ stats->value[i + offset] =
+ be64_to_cpup((__be64 *)(out +
+ port->cnts.offsets[i + offset]));
+ }
+
+free:
+ kvfree(out);
+ return ret;
+}
+
+static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port_num, int index)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_port *port = &dev->port[port_num - 1];
+ int ret, num_counters;
+
+ if (!stats)
+ return -EINVAL;
+
+ ret = mlx5_ib_query_q_counters(dev, port, stats);
+ if (ret)
+ return ret;
+ num_counters = port->cnts.num_q_counters;
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ ret = mlx5_ib_query_cong_counters(dev, port, stats);
+ if (ret)
+ return ret;
+ num_counters += port->cnts.num_cong_counters;
+ }
+
+ return num_counters;
+}
+
+static void mlx5_ib_free_rdma_netdev(struct net_device *netdev)
+{
+ return mlx5_rdma_netdev_free(netdev);
+}
+
+static struct net_device*
+mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
+ u8 port_num,
+ enum rdma_netdev_t type,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *))
+{
+ struct net_device *netdev;
+ struct rdma_netdev *rn;
+
+ if (type != RDMA_NETDEV_IPOIB)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
+ name, setup);
+ if (likely(!IS_ERR_OR_NULL(netdev))) {
+ rn = netdev_priv(netdev);
+ rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev;
+ }
+ return netdev;
}
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
@@ -3060,8 +3589,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
- MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
-
if (!mlx5_lag_is_active(mdev))
name = "mlx5_%d";
else
@@ -3075,7 +3602,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors =
dev->mdev->priv.eq_table.num_comp_vectors;
- dev->ib_dev.dma_device = &mdev->pdev->dev;
+ dev->ib_dev.dev.parent = &mdev->pdev->dev;
dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
@@ -3159,6 +3686,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
+ if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
+ dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev;
+
if (mlx5_core_is_pf(mdev)) {
dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
@@ -3170,6 +3700,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
mlx5_ib_internal_fill_odp_caps(dev);
+ dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
+
if (MLX5_CAP_GEN(mdev, imaicl)) {
dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw;
dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw;
@@ -3178,8 +3710,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
}
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
- MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
}
@@ -3233,13 +3764,27 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (err)
goto err_rsrc;
- err = mlx5_ib_alloc_q_counters(dev);
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
+ err = mlx5_ib_alloc_counters(dev);
+ if (err)
+ goto err_odp;
+ }
+
+ dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
+ if (!dev->mdev->priv.uar)
+ goto err_cnt;
+
+ err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
+ if (err)
+ goto err_uar_page;
+
+ err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
if (err)
- goto err_odp;
+ goto err_bfreg;
err = ib_register_device(&dev->ib_dev, NULL);
if (err)
- goto err_q_cnt;
+ goto err_fp_bfreg;
err = create_umr_res(dev);
if (err)
@@ -3262,8 +3807,18 @@ err_umrc:
err_dev:
ib_unregister_device(&dev->ib_dev);
-err_q_cnt:
- mlx5_ib_dealloc_q_counters(dev);
+err_fp_bfreg:
+ mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
+
+err_bfreg:
+ mlx5_free_bfreg(dev->mdev, &dev->bfreg);
+
+err_uar_page:
+ mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
+
+err_cnt:
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ mlx5_ib_dealloc_counters(dev);
err_odp:
mlx5_ib_odp_remove_one(dev);
@@ -3293,7 +3848,11 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
mlx5_remove_netdev_notifier(dev);
ib_unregister_device(&dev->ib_dev);
- mlx5_ib_dealloc_q_counters(dev);
+ mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
+ mlx5_free_bfreg(dev->mdev, &dev->bfreg);
+ mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ mlx5_ib_dealloc_counters(dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
@@ -3307,6 +3866,9 @@ static struct mlx5_interface mlx5_ib_interface = {
.add = mlx5_ib_add,
.remove = mlx5_ib_remove,
.event = mlx5_ib_event,
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ .pfault = mlx5_ib_pfault,
+#endif
.protocol = MLX5_INTERFACE_PROTOCOL_IB,
};
@@ -3314,28 +3876,16 @@ static int __init mlx5_ib_init(void)
{
int err;
- if (deprecated_prof_sel != 2)
- pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
-
- err = mlx5_ib_odp_init();
- if (err)
- return err;
+ mlx5_ib_odp_init();
err = mlx5_register_interface(&mlx5_ib_interface);
- if (err)
- goto clean_odp;
-
- return err;
-clean_odp:
- mlx5_ib_odp_cleanup();
return err;
}
static void __exit mlx5_ib_cleanup(void)
{
mlx5_unregister_interface(&mlx5_ib_interface);
- mlx5_ib_odp_cleanup();
}
module_init(mlx5_ib_init);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 6851357c16f4..914f212e7ef6 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -59,15 +59,14 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
u64 pfn;
struct scatterlist *sg;
int entry;
- unsigned long page_shift = ilog2(umem->page_size);
+ unsigned long page_shift = umem->page_shift;
- /* With ODP we must always match OS page size. */
if (umem->odp_data) {
- *count = ib_umem_page_count(umem);
- *shift = PAGE_SHIFT;
- *ncont = *count;
+ *ncont = ib_umem_page_count(umem);
+ *count = *ncont << (page_shift - PAGE_SHIFT);
+ *shift = page_shift;
if (order)
- *order = ilog2(roundup_pow_of_two(*count));
+ *order = ilog2(roundup_pow_of_two(*ncont));
return;
}
@@ -156,10 +155,10 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int page_shift, size_t offset, size_t num_pages,
__be64 *pas, int access_flags)
{
- unsigned long umem_page_shift = ilog2(umem->page_size);
+ unsigned long umem_page_shift = umem->page_shift;
int shift = page_shift - umem_page_shift;
int mask = (1 << shift) - 1;
- int i, k;
+ int i, k, idx;
u64 cur = 0;
u64 base;
int len;
@@ -185,18 +184,36 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> umem_page_shift;
base = sg_dma_address(sg);
- for (k = 0; k < len; k++) {
+
+ /* Skip elements below offset */
+ if (i + len < offset << shift) {
+ i += len;
+ continue;
+ }
+
+ /* Skip pages below offset */
+ if (i < offset << shift) {
+ k = (offset << shift) - i;
+ i = offset << shift;
+ } else {
+ k = 0;
+ }
+
+ for (; k < len; k++) {
if (!(i & mask)) {
cur = base + (k << umem_page_shift);
cur |= access_flags;
+ idx = (i >> shift) - offset;
- pas[i >> shift] = cpu_to_be64(cur);
+ pas[idx] = cpu_to_be64(cur);
mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
- i >> shift, be64_to_cpu(pas[i >> shift]));
- } else
- mlx5_ib_dbg(dev, "=====> 0x%llx\n",
- base + (k << umem_page_shift));
+ i >> shift, be64_to_cpu(pas[idx]));
+ }
i++;
+
+ /* Stop after num_pages reached */
+ if (i >> shift >= offset + num_pages)
+ return;
}
}
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 6c6057eb60ea..bdcf25410c99 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -90,7 +90,6 @@ enum mlx5_ib_latency_class {
MLX5_IB_LATENCY_CLASS_LOW,
MLX5_IB_LATENCY_CLASS_MEDIUM,
MLX5_IB_LATENCY_CLASS_HIGH,
- MLX5_IB_LATENCY_CLASS_FAST_PATH
};
enum mlx5_ib_mad_ifc_flags {
@@ -100,7 +99,7 @@ enum mlx5_ib_mad_ifc_flags {
};
enum {
- MLX5_CROSS_CHANNEL_UUAR = 0,
+ MLX5_CROSS_CHANNEL_BFREG = 0,
};
enum {
@@ -120,11 +119,16 @@ struct mlx5_ib_ucontext {
/* protect doorbell record alloc/free
*/
struct mutex db_page_mutex;
- struct mlx5_uuar_info uuari;
+ struct mlx5_bfreg_info bfregi;
u8 cqe_version;
/* Transport Domain number */
u32 tdn;
struct list_head vma_private_list;
+
+ unsigned long upd_xlt_page;
+ /* protect ODP/KSM */
+ struct mutex upd_xlt_page_mutex;
+ u64 lib_caps;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -174,13 +178,12 @@ struct mlx5_ib_flow_db {
* enum ib_send_flags and enum ib_qp_type for low-level driver
*/
-#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START
-#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1)
-#define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2)
-
-#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 3)
-#define MLX5_IB_SEND_UMR_UPDATE_PD (IB_SEND_RESERVED_START << 4)
-#define MLX5_IB_SEND_UMR_UPDATE_ACCESS IB_SEND_RESERVED_END
+#define MLX5_IB_SEND_UMR_ENABLE_MR (IB_SEND_RESERVED_START << 0)
+#define MLX5_IB_SEND_UMR_DISABLE_MR (IB_SEND_RESERVED_START << 1)
+#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 2)
+#define MLX5_IB_SEND_UMR_UPDATE_XLT (IB_SEND_RESERVED_START << 3)
+#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 4)
+#define MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS IB_SEND_RESERVED_END
#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1
/*
@@ -190,6 +193,17 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_QPT_HW_GSI IB_QPT_RESERVED2
#define MLX5_IB_WR_UMR IB_WR_RESERVED1
+#define MLX5_IB_UMR_OCTOWORD 16
+#define MLX5_IB_UMR_XLT_ALIGNMENT 64
+
+#define MLX5_IB_UPD_XLT_ZAP BIT(0)
+#define MLX5_IB_UPD_XLT_ENABLE BIT(1)
+#define MLX5_IB_UPD_XLT_ATOMIC BIT(2)
+#define MLX5_IB_UPD_XLT_ADDR BIT(3)
+#define MLX5_IB_UPD_XLT_PD BIT(4)
+#define MLX5_IB_UPD_XLT_ACCESS BIT(5)
+#define MLX5_IB_UPD_XLT_INDIRECT BIT(6)
+
/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
*
* These flags are intended for internal use by the mlx5_ib driver, and they
@@ -207,6 +221,10 @@ struct wr_list {
u16 next;
};
+enum mlx5_ib_rq_flags {
+ MLX5_IB_RQ_CVLAN_STRIPPING = 1 << 0,
+};
+
struct mlx5_ib_wq {
u64 *wrid;
u32 *wr_data;
@@ -264,29 +282,6 @@ struct mlx5_ib_rwq_ind_table {
u32 rqtn;
};
-/*
- * Connect-IB can trigger up to four concurrent pagefaults
- * per-QP.
- */
-enum mlx5_ib_pagefault_context {
- MLX5_IB_PAGEFAULT_RESPONDER_READ,
- MLX5_IB_PAGEFAULT_REQUESTOR_READ,
- MLX5_IB_PAGEFAULT_RESPONDER_WRITE,
- MLX5_IB_PAGEFAULT_REQUESTOR_WRITE,
- MLX5_IB_PAGEFAULT_CONTEXTS
-};
-
-static inline enum mlx5_ib_pagefault_context
- mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault)
-{
- return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE);
-}
-
-struct mlx5_ib_pfault {
- struct work_struct work;
- struct mlx5_pagefault mpfault;
-};
-
struct mlx5_ib_ubuffer {
struct ib_umem *umem;
int buf_size;
@@ -318,6 +313,7 @@ struct mlx5_ib_rq {
struct mlx5_db *doorbell;
u32 tirn;
u8 state;
+ u32 flags;
};
struct mlx5_ib_sq {
@@ -334,6 +330,12 @@ struct mlx5_ib_raw_packet_qp {
struct mlx5_ib_rq rq;
};
+struct mlx5_bf {
+ int buf_size;
+ unsigned long offset;
+ struct mlx5_sq_bfreg *bfreg;
+};
+
struct mlx5_ib_qp {
struct ib_qp ibqp;
union {
@@ -347,7 +349,7 @@ struct mlx5_ib_qp {
struct mlx5_ib_wq rq;
u8 sq_signal_bits;
- u8 fm_cache;
+ u8 next_fence;
struct mlx5_ib_wq sq;
/* serialize qp state modifications
@@ -359,33 +361,19 @@ struct mlx5_ib_qp {
int wq_sig;
int scat_cqe;
int max_inline_data;
- struct mlx5_bf *bf;
+ struct mlx5_bf bf;
int has_rq;
/* only for user space QPs. For kernel
* we have it from the bf object
*/
- int uuarn;
+ int bfregn;
int create_type;
/* Store signature errors */
bool signature_en;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- /*
- * A flag that is true for QP's that are in a state that doesn't
- * allow page faults, and shouldn't schedule any more faults.
- */
- int disable_page_faults;
- /*
- * The disable_page_faults_lock protects a QP's disable_page_faults
- * field, allowing for a thread to atomically check whether the QP
- * allows page faults, and if so schedule a page fault.
- */
- spinlock_t disable_page_faults_lock;
- struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
-#endif
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
@@ -410,17 +398,16 @@ enum mlx5_ib_qp_flags {
MLX5_IB_QP_SQPN_QP1 = 1 << 6,
MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
MLX5_IB_QP_RSS = 1 << 8,
+ MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9,
};
struct mlx5_umr_wr {
struct ib_send_wr wr;
- union {
- u64 virt_addr;
- u64 offset;
- } target;
+ u64 virt_addr;
+ u64 offset;
struct ib_pd *pd;
unsigned int page_shift;
- unsigned int npages;
+ unsigned int xlt_size;
u64 length;
int access_flags;
u32 mkey;
@@ -517,11 +504,16 @@ struct mlx5_ib_mr {
int live;
void *descs_alloc;
int access_flags; /* Needed for rereg MR */
+
+ struct mlx5_ib_mr *parent;
+ atomic_t num_leaf_free;
+ wait_queue_head_t q_leaf_free;
};
struct mlx5_ib_mw {
struct ib_mw ibmw;
struct mlx5_core_mkey mmkey;
+ int ndescs;
};
struct mlx5_ib_umr_context {
@@ -555,6 +547,10 @@ struct mlx5_cache_ent {
struct dentry *dir;
char name[4];
u32 order;
+ u32 xlt;
+ u32 access_mode;
+ u32 page;
+
u32 size;
u32 cur;
u32 miss;
@@ -569,6 +565,7 @@ struct mlx5_cache_ent {
struct work_struct work;
struct delayed_work dwork;
int pending;
+ struct completion compl;
};
struct mlx5_mr_cache {
@@ -599,8 +596,16 @@ struct mlx5_ib_resources {
struct mutex mutex;
};
+struct mlx5_ib_counters {
+ const char **names;
+ size_t *offsets;
+ u32 num_q_counters;
+ u32 num_cong_counters;
+ u16 set_id;
+};
+
struct mlx5_ib_port {
- u16 q_cnt_id;
+ struct mlx5_ib_counters cnts;
};
struct mlx5_roce {
@@ -617,7 +622,6 @@ struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
struct mlx5_roce roce;
- MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
int num_ports;
/* serialize update of capability mask
*/
@@ -634,11 +638,13 @@ struct mlx5_ib_dev {
int fill_delay;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct ib_odp_caps odp_caps;
+ u64 odp_max_size;
/*
* Sleepable RCU that prevents destruction of MRs while they are still
* being used by a page fault handler.
*/
struct srcu_struct mr_srcu;
+ u32 null_mkey;
#endif
struct mlx5_ib_flow_db flow_db;
/* protect resources needed as part of reset flow */
@@ -646,6 +652,9 @@ struct mlx5_ib_dev {
struct list_head qp_list;
/* Array with num_ports elements */
struct mlx5_ib_port *port;
+ struct mlx5_sq_bfreg bfreg;
+ struct mlx5_sq_bfreg fp_bfreg;
+ u8 umr_fence;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -723,16 +732,6 @@ static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw)
return container_of(ibmw, struct mlx5_ib_mw, ibmw);
}
-struct mlx5_ib_ah {
- struct ib_ah ibah;
- struct mlx5_av av;
-};
-
-static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah)
-{
- return container_of(ibah, struct mlx5_ib_ah, ibah);
-}
-
int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
struct mlx5_db *db);
void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
@@ -742,9 +741,9 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
-struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
struct ib_udata *udata);
-int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
int mlx5_ib_destroy_ah(struct ib_ah *ah);
struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
@@ -787,8 +786,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
-int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
- int npages, int zap);
+int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
+ int page_shift, int flags);
+struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+ int access_flags);
+void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 length, u64 virt_addr, int access_flags,
struct ib_pd *pd, struct ib_udata *udata);
@@ -842,7 +844,9 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
-int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
+
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry);
+void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
@@ -857,33 +861,32 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-extern struct workqueue_struct *mlx5_ib_page_fault_wq;
-
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
-void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault);
-void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp);
+void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
+ struct mlx5_pagefault *pfault);
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
-void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
-void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
unsigned long end);
+void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
+void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
+ size_t nentries, struct mlx5_ib_mr *mr, int flags);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
return;
}
-static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {}
static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
-static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
+static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
-static inline void mlx5_ib_odp_cleanup(void) {}
-static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {}
-static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {}
+static inline void mlx5_ib_odp_cleanup(void) {}
+static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
+static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
+ size_t nentries, struct mlx5_ib_mr *mr,
+ int flags) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
@@ -898,6 +901,8 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
int index);
+int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
+ int index, enum ib_gid_type *gid_type);
/* GSI QP helper functions */
struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
@@ -1001,4 +1006,17 @@ static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
}
+
+static inline int get_uars_per_sys_page(struct mlx5_ib_dev *dev, bool lib_support)
+{
+ return lib_support && MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_UARS_IN_PAGE : 1;
+}
+
+static inline int get_num_uars(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
+{
+ return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_sys_pages;
+}
+
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 8f608debe141..2c40a2e989d2 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -46,14 +46,10 @@ enum {
};
#define MLX5_UMR_ALIGN 2048
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-static __be64 mlx5_ib_update_mtt_emergency_buffer[
- MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
- __aligned(MLX5_UMR_ALIGN);
-static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
-#endif
static int clean_mr(struct mlx5_ib_mr *mr);
+static int use_umr(struct mlx5_ib_dev *dev, int order);
+static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
@@ -134,6 +130,7 @@ static void reg_mr_callback(int status, void *context)
return;
}
+ mr->mmkey.type = MLX5_MKEY_MR;
spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
key = dev->mdev->priv.mkey_key++;
spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
@@ -153,6 +150,9 @@ static void reg_mr_callback(int status, void *context)
if (err)
pr_err("Error inserting to mkey tree. 0x%x\n", -err);
write_unlock_irqrestore(&table->lock, flags);
+
+ if (!completion_done(&ent->compl))
+ complete(&ent->compl);
}
static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
@@ -161,7 +161,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
struct mlx5_cache_ent *ent = &cache->ent[c];
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
- int npages = 1 << ent->order;
void *mkc;
u32 *in;
int err = 0;
@@ -189,11 +188,11 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, access_mode, ent->access_mode);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
- MLX5_SET(mkc, mkc, log_page_size, 12);
+ MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
+ MLX5_SET(mkc, mkc, log_page_size, ent->page);
spin_lock_irq(&ent->lock);
ent->pending++;
@@ -451,6 +450,42 @@ static void cache_work_func(struct work_struct *work)
__cache_work_func(ent);
}
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
+{
+ struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent;
+ struct mlx5_ib_mr *mr;
+ int err;
+
+ if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
+ mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
+ return NULL;
+ }
+
+ ent = &cache->ent[entry];
+ while (1) {
+ spin_lock_irq(&ent->lock);
+ if (list_empty(&ent->head)) {
+ spin_unlock_irq(&ent->lock);
+
+ err = add_keys(dev, entry, 1);
+ if (err && err != -EAGAIN)
+ return ERR_PTR(err);
+
+ wait_for_completion(&ent->compl);
+ } else {
+ mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
+ list);
+ list_del(&mr->list);
+ ent->cur--;
+ spin_unlock_irq(&ent->lock);
+ if (ent->cur < ent->limit)
+ queue_work(cache->wq, &ent->work);
+ return mr;
+ }
+ }
+}
+
static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
{
struct mlx5_mr_cache *cache = &dev->cache;
@@ -460,12 +495,12 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
int i;
c = order2idx(dev, order);
- if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
+ if (c < 0 || c > MAX_UMR_CACHE_ENTRY) {
mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
return NULL;
}
- for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
+ for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) {
ent = &cache->ent[i];
mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
@@ -492,7 +527,7 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
return mr;
}
-static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
@@ -504,6 +539,10 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
return;
}
+
+ if (unreg_umr(dev, mr))
+ return;
+
ent = &cache->ent[c];
spin_lock_irq(&ent->lock);
list_add_tail(&mr->list, &ent->head);
@@ -543,6 +582,15 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
}
}
+static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ debugfs_remove_recursive(dev->cache.root);
+ dev->cache.root = NULL;
+}
+
static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mr_cache *cache = &dev->cache;
@@ -561,38 +609,34 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
sprintf(ent->name, "%d", ent->order);
ent->dir = debugfs_create_dir(ent->name, cache->root);
if (!ent->dir)
- return -ENOMEM;
+ goto err;
ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
&size_fops);
if (!ent->fsize)
- return -ENOMEM;
+ goto err;
ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
&limit_fops);
if (!ent->flimit)
- return -ENOMEM;
+ goto err;
ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
&ent->cur);
if (!ent->fcur)
- return -ENOMEM;
+ goto err;
ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
&ent->miss);
if (!ent->fmiss)
- return -ENOMEM;
+ goto err;
}
return 0;
-}
-
-static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
-{
- if (!mlx5_debugfs_root)
- return;
+err:
+ mlx5_mr_cache_debugfs_cleanup(dev);
- debugfs_remove_recursive(dev->cache.root);
+ return -ENOMEM;
}
static void delay_time_func(unsigned long ctx)
@@ -606,7 +650,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
- int limit;
int err;
int i;
@@ -619,31 +662,46 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
- INIT_LIST_HEAD(&cache->ent[i].head);
- spin_lock_init(&cache->ent[i].lock);
-
ent = &cache->ent[i];
INIT_LIST_HEAD(&ent->head);
spin_lock_init(&ent->lock);
ent->order = i + 2;
ent->dev = dev;
+ ent->limit = 0;
- if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
- (mlx5_core_is_pf(dev->mdev)))
- limit = dev->mdev->profile->mr_cache[i].limit;
- else
- limit = 0;
-
+ init_completion(&ent->compl);
INIT_WORK(&ent->work, cache_work_func);
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
- ent->limit = limit;
queue_work(cache->wq, &ent->work);
+
+ if (i > MAX_UMR_CACHE_ENTRY) {
+ mlx5_odp_init_mr_cache_entry(ent);
+ continue;
+ }
+
+ if (!use_umr(dev, ent->order))
+ continue;
+
+ ent->page = PAGE_SHIFT;
+ ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
+ MLX5_IB_UMR_OCTOWORD;
+ ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
+ if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+ mlx5_core_is_pf(dev->mdev))
+ ent->limit = dev->mdev->profile->mr_cache[i].limit;
+ else
+ ent->limit = 0;
}
err = mlx5_mr_cache_debugfs_init(dev);
if (err)
mlx5_ib_warn(dev, "cache debugfs failure\n");
+ /*
+ * We don't want to fail driver if debugfs failed to initialize,
+ * so we are not forwarding error to the user.
+ */
+
return 0;
}
@@ -732,6 +790,7 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
goto err_in;
kfree(in);
+ mr->mmkey.type = MLX5_MKEY_MR;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
mr->umem = NULL;
@@ -757,94 +816,13 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
return (npages + 1) / 2;
}
-static int use_umr(int order)
+static int use_umr(struct mlx5_ib_dev *dev, int order)
{
+ if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
+ return order <= MAX_UMR_CACHE_ENTRY + 2;
return order <= MLX5_MAX_UMR_SHIFT;
}
-static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int npages, int page_shift, int *size,
- __be64 **mr_pas, dma_addr_t *dma)
-{
- __be64 *pas;
- struct device *ddev = dev->ib_dev.dma_device;
-
- /*
- * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
- * To avoid copying garbage after the pas array, we allocate
- * a little more.
- */
- *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
- *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
- if (!(*mr_pas))
- return -ENOMEM;
-
- pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN);
- mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
- /* Clear padding after the actual pages. */
- memset(pas + npages, 0, *size - npages * sizeof(u64));
-
- *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE);
- if (dma_mapping_error(ddev, *dma)) {
- kfree(*mr_pas);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr,
- struct ib_sge *sg, u64 dma, int n, u32 key,
- int page_shift)
-{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- sg->addr = dma;
- sg->length = ALIGN(sizeof(u64) * n, 64);
- sg->lkey = dev->umrc.pd->local_dma_lkey;
-
- wr->next = NULL;
- wr->sg_list = sg;
- if (n)
- wr->num_sge = 1;
- else
- wr->num_sge = 0;
-
- wr->opcode = MLX5_IB_WR_UMR;
-
- umrwr->npages = n;
- umrwr->page_shift = page_shift;
- umrwr->mkey = key;
-}
-
-static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
- struct ib_sge *sg, u64 dma, int n, u32 key,
- int page_shift, u64 virt_addr, u64 len,
- int access_flags)
-{
- struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift);
-
- wr->send_flags = 0;
-
- umrwr->target.virt_addr = virt_addr;
- umrwr->length = len;
- umrwr->access_flags = access_flags;
- umrwr->pd = pd;
-}
-
-static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
- struct ib_send_wr *wr, u32 key)
-{
- struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
- wr->opcode = MLX5_IB_WR_UMR;
- umrwr->mkey = key;
-}
-
static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
int access_flags, struct ib_umem **umem,
int *npages, int *page_shift, int *ncont,
@@ -857,7 +835,7 @@ static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
access_flags, 0);
err = PTR_ERR_OR_ZERO(*umem);
if (err < 0) {
- mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
+ mlx5_ib_err(dev, "umem get failed (%d)\n", err);
return err;
}
@@ -891,21 +869,39 @@ static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
init_completion(&context->done);
}
+static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
+ struct mlx5_umr_wr *umrwr)
+{
+ struct umr_common *umrc = &dev->umrc;
+ struct ib_send_wr *bad;
+ int err;
+ struct mlx5_ib_umr_context umr_context;
+
+ mlx5_ib_init_umr_context(&umr_context);
+ umrwr->wr.wr_cqe = &umr_context.cqe;
+
+ down(&umrc->sem);
+ err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
+ if (err) {
+ mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
+ } else {
+ wait_for_completion(&umr_context.done);
+ if (umr_context.status != IB_WC_SUCCESS) {
+ mlx5_ib_warn(dev, "reg umr failed (%u)\n",
+ umr_context.status);
+ err = -EFAULT;
+ }
+ }
+ up(&umrc->sem);
+ return err;
+}
+
static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
u64 virt_addr, u64 len, int npages,
int page_shift, int order, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct device *ddev = dev->ib_dev.dma_device;
- struct umr_common *umrc = &dev->umrc;
- struct mlx5_ib_umr_context umr_context;
- struct mlx5_umr_wr umrwr = {};
- struct ib_send_wr *bad;
struct mlx5_ib_mr *mr;
- struct ib_sge sg;
- int size;
- __be64 *mr_pas;
- dma_addr_t dma;
int err = 0;
int i;
@@ -924,173 +920,181 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
if (!mr)
return ERR_PTR(-EAGAIN);
- err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas,
- &dma);
- if (err)
- goto free_mr;
-
- mlx5_ib_init_umr_context(&umr_context);
-
- umrwr.wr.wr_cqe = &umr_context.cqe;
- prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
- page_shift, virt_addr, len, access_flags);
-
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
- if (err) {
- mlx5_ib_warn(dev, "post send failed, err %d\n", err);
- goto unmap_dma;
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed\n");
- err = -EFAULT;
- }
- }
-
+ mr->ibmr.pd = pd;
+ mr->umem = umem;
+ mr->access_flags = access_flags;
+ mr->desc_size = sizeof(struct mlx5_mtt);
mr->mmkey.iova = virt_addr;
mr->mmkey.size = len;
mr->mmkey.pd = to_mpd(pd)->pdn;
- mr->live = 1;
-
-unmap_dma:
- up(&umrc->sem);
- dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
-
- kfree(mr_pas);
+ err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
+ MLX5_IB_UPD_XLT_ENABLE);
-free_mr:
if (err) {
- free_cached_mr(dev, mr);
+ mlx5_mr_cache_free(dev, mr);
return ERR_PTR(err);
}
+ mr->live = 1;
+
return mr;
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
- int zap)
+static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
+ void *xlt, int page_shift, size_t size,
+ int flags)
{
struct mlx5_ib_dev *dev = mr->dev;
- struct device *ddev = dev->ib_dev.dma_device;
- struct umr_common *umrc = &dev->umrc;
- struct mlx5_ib_umr_context umr_context;
struct ib_umem *umem = mr->umem;
+ if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
+ mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
+ return npages;
+ }
+
+ npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
+
+ if (!(flags & MLX5_IB_UPD_XLT_ZAP)) {
+ __mlx5_ib_populate_pas(dev, umem, page_shift,
+ idx, npages, xlt,
+ MLX5_IB_MTT_PRESENT);
+ /* Clear padding after the pages
+ * brought from the umem.
+ */
+ memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0,
+ size - npages * sizeof(struct mlx5_mtt));
+ }
+
+ return npages;
+}
+
+#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
+ MLX5_UMR_MTT_ALIGNMENT)
+#define MLX5_SPARE_UMR_CHUNK 0x10000
+
+int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
+ int page_shift, int flags)
+{
+ struct mlx5_ib_dev *dev = mr->dev;
+ struct device *ddev = dev->ib_dev.dev.parent;
+ struct mlx5_ib_ucontext *uctx = NULL;
int size;
- __be64 *pas;
+ void *xlt;
dma_addr_t dma;
- struct ib_send_wr *bad;
struct mlx5_umr_wr wr;
struct ib_sge sg;
int err = 0;
- const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
- const int page_index_mask = page_index_alignment - 1;
+ int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
+ ? sizeof(struct mlx5_klm)
+ : sizeof(struct mlx5_mtt);
+ const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
+ const int page_mask = page_align - 1;
size_t pages_mapped = 0;
size_t pages_to_map = 0;
size_t pages_iter = 0;
- int use_emergency_buf = 0;
+ gfp_t gfp;
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
- * so we need to align the offset and length accordingly */
- if (start_page_index & page_index_mask) {
- npages += start_page_index & page_index_mask;
- start_page_index &= ~page_index_mask;
+ * so we need to align the offset and length accordingly
+ */
+ if (idx & page_mask) {
+ npages += idx & page_mask;
+ idx &= ~page_mask;
}
- pages_to_map = ALIGN(npages, page_index_alignment);
+ gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
+ gfp |= __GFP_ZERO | __GFP_NOWARN;
- if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
- return -EINVAL;
+ pages_to_map = ALIGN(npages, page_align);
+ size = desc_size * pages_to_map;
+ size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
- size = sizeof(u64) * pages_to_map;
- size = min_t(int, PAGE_SIZE, size);
- /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
- * code, when we are called from an invalidation. The pas buffer must
- * be 2k-aligned for Connect-IB. */
- pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
- if (!pas) {
- mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
- pas = mlx5_ib_update_mtt_emergency_buffer;
- size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
- use_emergency_buf = 1;
- mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
- memset(pas, 0, size);
+ xlt = (void *)__get_free_pages(gfp, get_order(size));
+ if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
+ mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
+ size, get_order(size), MLX5_SPARE_UMR_CHUNK);
+
+ size = MLX5_SPARE_UMR_CHUNK;
+ xlt = (void *)__get_free_pages(gfp, get_order(size));
+ }
+
+ if (!xlt) {
+ uctx = to_mucontext(mr->ibmr.pd->uobject->context);
+ mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
+ size = PAGE_SIZE;
+ xlt = (void *)uctx->upd_xlt_page;
+ mutex_lock(&uctx->upd_xlt_page_mutex);
+ memset(xlt, 0, size);
}
- pages_iter = size / sizeof(u64);
- dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
+ pages_iter = size / desc_size;
+ dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
if (dma_mapping_error(ddev, dma)) {
- mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
+ mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
err = -ENOMEM;
- goto free_pas;
+ goto free_xlt;
}
+ sg.addr = dma;
+ sg.lkey = dev->umrc.pd->local_dma_lkey;
+
+ memset(&wr, 0, sizeof(wr));
+ wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
+ if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
+ wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
+ wr.wr.sg_list = &sg;
+ wr.wr.num_sge = 1;
+ wr.wr.opcode = MLX5_IB_WR_UMR;
+
+ wr.pd = mr->ibmr.pd;
+ wr.mkey = mr->mmkey.key;
+ wr.length = mr->mmkey.size;
+ wr.virt_addr = mr->mmkey.iova;
+ wr.access_flags = mr->access_flags;
+ wr.page_shift = page_shift;
+
for (pages_mapped = 0;
pages_mapped < pages_to_map && !err;
- pages_mapped += pages_iter, start_page_index += pages_iter) {
+ pages_mapped += pages_iter, idx += pages_iter) {
+ npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
-
- npages = min_t(size_t,
- pages_iter,
- ib_umem_num_pages(umem) - start_page_index);
-
- if (!zap) {
- __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
- start_page_index, npages, pas,
- MLX5_IB_MTT_PRESENT);
- /* Clear padding after the pages brought from the
- * umem. */
- memset(pas + npages, 0, size - npages * sizeof(u64));
- }
+ npages = populate_xlt(mr, idx, npages, xlt,
+ page_shift, size, flags);
dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
- mlx5_ib_init_umr_context(&umr_context);
-
- memset(&wr, 0, sizeof(wr));
- wr.wr.wr_cqe = &umr_context.cqe;
-
- sg.addr = dma;
- sg.length = ALIGN(npages * sizeof(u64),
- MLX5_UMR_MTT_ALIGNMENT);
- sg.lkey = dev->umrc.pd->local_dma_lkey;
+ sg.length = ALIGN(npages * desc_size,
+ MLX5_UMR_MTT_ALIGNMENT);
+
+ if (pages_mapped + pages_iter >= pages_to_map) {
+ if (flags & MLX5_IB_UPD_XLT_ENABLE)
+ wr.wr.send_flags |=
+ MLX5_IB_SEND_UMR_ENABLE_MR |
+ MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
+ MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
+ if (flags & MLX5_IB_UPD_XLT_PD ||
+ flags & MLX5_IB_UPD_XLT_ACCESS)
+ wr.wr.send_flags |=
+ MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
+ if (flags & MLX5_IB_UPD_XLT_ADDR)
+ wr.wr.send_flags |=
+ MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
+ }
- wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
- MLX5_IB_SEND_UMR_UPDATE_MTT;
- wr.wr.sg_list = &sg;
- wr.wr.num_sge = 1;
- wr.wr.opcode = MLX5_IB_WR_UMR;
- wr.npages = sg.length / sizeof(u64);
- wr.page_shift = PAGE_SHIFT;
- wr.mkey = mr->mmkey.key;
- wr.target.offset = start_page_index;
+ wr.offset = idx * desc_size;
+ wr.xlt_size = sg.length;
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &wr.wr, &bad);
- if (err) {
- mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_err(dev, "UMR completion failed, code %d\n",
- umr_context.status);
- err = -EFAULT;
- }
- }
- up(&umrc->sem);
+ err = mlx5_ib_post_send_wait(dev, &wr);
}
dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
-free_pas:
- if (!use_emergency_buf)
- free_page((unsigned long)pas);
+free_xlt:
+ if (uctx)
+ mutex_unlock(&uctx->upd_xlt_page_mutex);
else
- mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
+ free_pages((unsigned long)xlt, get_order(size));
return err;
}
-#endif
/*
* If ibmr is NULL it will be allocated by reg_create.
@@ -1116,14 +1120,15 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
sizeof(*pas) * ((npages + 1) / 2) * 2;
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_1;
}
pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
- mlx5_ib_populate_pas(dev, umem, page_shift, pas,
- pg_cap ? MLX5_IB_MTT_PRESENT : 0);
+ if (!(access_flags & IB_ACCESS_ON_DEMAND))
+ mlx5_ib_populate_pas(dev, umem, page_shift, pas,
+ pg_cap ? MLX5_IB_MTT_PRESENT : 0);
/* The pg_access bit allows setting the access flags
* in the page list submitted with the command. */
@@ -1153,6 +1158,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
mlx5_ib_warn(dev, "create mkey failed\n");
goto err_2;
}
+ mr->mmkey.type = MLX5_MKEY_MR;
+ mr->desc_size = sizeof(struct mlx5_mtt);
mr->umem = umem;
mr->dev = dev;
mr->live = 1;
@@ -1198,20 +1205,33 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (!start && length == U64_MAX) {
+ if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
+ !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return ERR_PTR(-EINVAL);
+
+ mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
+ return &mr->ibmr;
+ }
+#endif
+
err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
&page_shift, &ncont, &order);
if (err < 0)
return ERR_PTR(err);
- if (use_umr(order)) {
+ if (use_umr(dev, order)) {
mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
order, access_flags);
if (PTR_ERR(mr) == -EAGAIN) {
mlx5_ib_dbg(dev, "cache empty for order %d", order);
mr = NULL;
}
- } else if (access_flags & IB_ACCESS_ON_DEMAND) {
+ } else if (access_flags & IB_ACCESS_ON_DEMAND &&
+ !MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
err = -EINVAL;
pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
goto error;
@@ -1248,106 +1268,39 @@ error:
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct mlx5_core_dev *mdev = dev->mdev;
- struct umr_common *umrc = &dev->umrc;
- struct mlx5_ib_umr_context umr_context;
struct mlx5_umr_wr umrwr = {};
- struct ib_send_wr *bad;
- int err;
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return 0;
- mlx5_ib_init_umr_context(&umr_context);
-
- umrwr.wr.wr_cqe = &umr_context.cqe;
- prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key);
-
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
- if (err) {
- up(&umrc->sem);
- mlx5_ib_dbg(dev, "err %d\n", err);
- goto error;
- } else {
- wait_for_completion(&umr_context.done);
- up(&umrc->sem);
- }
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "unreg umr failed\n");
- err = -EFAULT;
- goto error;
- }
- return 0;
+ umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
+ MLX5_IB_SEND_UMR_FAIL_IF_FREE;
+ umrwr.wr.opcode = MLX5_IB_WR_UMR;
+ umrwr.mkey = mr->mmkey.key;
-error:
- return err;
+ return mlx5_ib_post_send_wait(dev, &umrwr);
}
-static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
- u64 length, int npages, int page_shift, int order,
+static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
int access_flags, int flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct device *ddev = dev->ib_dev.dma_device;
- struct mlx5_ib_umr_context umr_context;
- struct ib_send_wr *bad;
struct mlx5_umr_wr umrwr = {};
- struct ib_sge sg;
- struct umr_common *umrc = &dev->umrc;
- dma_addr_t dma = 0;
- __be64 *mr_pas = NULL;
- int size;
int err;
- mlx5_ib_init_umr_context(&umr_context);
-
- umrwr.wr.wr_cqe = &umr_context.cqe;
umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
- if (flags & IB_MR_REREG_TRANS) {
- err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size,
- &mr_pas, &dma);
- if (err)
- return err;
-
- umrwr.target.virt_addr = virt_addr;
- umrwr.length = length;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
- }
-
- prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
- page_shift);
+ umrwr.wr.opcode = MLX5_IB_WR_UMR;
+ umrwr.mkey = mr->mmkey.key;
- if (flags & IB_MR_REREG_PD) {
+ if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
umrwr.pd = pd;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD;
- }
-
- if (flags & IB_MR_REREG_ACCESS) {
umrwr.access_flags = access_flags;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS;
+ umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
}
- /* post send request to UMR QP */
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
-
- if (err) {
- mlx5_ib_warn(dev, "post send failed, err %d\n", err);
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed (%u)\n",
- umr_context.status);
- err = -EFAULT;
- }
- }
+ err = mlx5_ib_post_send_wait(dev, &umrwr);
- up(&umrc->sem);
- if (flags & IB_MR_REREG_TRANS) {
- dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
- kfree(mr_pas);
- }
return err;
}
@@ -1364,6 +1317,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
int page_shift = 0;
+ int upd_flags = 0;
int npages = 0;
int ncont = 0;
int order = 0;
@@ -1372,6 +1326,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
+ atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
+
if (flags != IB_MR_REREG_PD) {
/*
* Replace umem. This needs to be done whether or not UMR is
@@ -1382,7 +1338,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
&npages, &page_shift, &ncont, &order);
if (err < 0) {
- mr->umem = NULL;
+ clean_mr(mr);
return err;
}
}
@@ -1414,32 +1370,37 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
/*
* Send a UMR WQE
*/
- err = rereg_umr(pd, mr, addr, len, npages, page_shift,
- order, access_flags, flags);
+ mr->ibmr.pd = pd;
+ mr->access_flags = access_flags;
+ mr->mmkey.iova = addr;
+ mr->mmkey.size = len;
+ mr->mmkey.pd = to_mpd(pd)->pdn;
+
+ if (flags & IB_MR_REREG_TRANS) {
+ upd_flags = MLX5_IB_UPD_XLT_ADDR;
+ if (flags & IB_MR_REREG_PD)
+ upd_flags |= MLX5_IB_UPD_XLT_PD;
+ if (flags & IB_MR_REREG_ACCESS)
+ upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
+ err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
+ upd_flags);
+ } else {
+ err = rereg_umr(pd, mr, access_flags, flags);
+ }
+
if (err) {
mlx5_ib_warn(dev, "Failed to rereg UMR\n");
+ ib_umem_release(mr->umem);
+ clean_mr(mr);
return err;
}
}
- if (flags & IB_MR_REREG_PD) {
- ib_mr->pd = pd;
- mr->mmkey.pd = to_mpd(pd)->pdn;
- }
-
- if (flags & IB_MR_REREG_ACCESS)
- mr->access_flags = access_flags;
+ set_mr_fileds(dev, mr, npages, len, access_flags);
- if (flags & IB_MR_REREG_TRANS) {
- atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
- set_mr_fileds(dev, mr, npages, len, access_flags);
- mr->mmkey.iova = addr;
- mr->mmkey.size = len;
- }
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
update_odp_mr(mr);
#endif
-
return 0;
}
@@ -1461,9 +1422,9 @@ mlx5_alloc_priv_descs(struct ib_device *device,
mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
- mr->desc_map = dma_map_single(device->dma_device, mr->descs,
+ mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
size, DMA_TO_DEVICE);
- if (dma_mapping_error(device->dma_device, mr->desc_map)) {
+ if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
ret = -ENOMEM;
goto err;
}
@@ -1482,7 +1443,7 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
struct ib_device *device = mr->ibmr.device;
int size = mr->max_descs * mr->desc_size;
- dma_unmap_single(device->dma_device, mr->desc_map,
+ dma_unmap_single(device->dev.parent, mr->desc_map,
size, DMA_TO_DEVICE);
kfree(mr->descs_alloc);
mr->descs = NULL;
@@ -1518,12 +1479,7 @@ static int clean_mr(struct mlx5_ib_mr *mr)
return err;
}
} else {
- err = unreg_umr(dev, mr);
- if (err) {
- mlx5_ib_warn(dev, "failed unregister\n");
- return err;
- }
- free_cached_mr(dev, mr);
+ mlx5_mr_cache_free(dev, mr);
}
if (!umred)
@@ -1546,8 +1502,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
/* Wait for all running page-fault handlers to finish. */
synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */
- mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
- ib_umem_end(umem));
+ if (umem->odp_data->page_list)
+ mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
+ ib_umem_end(umem));
+ else
+ mlx5_ib_free_implicit_mr(mr);
/*
* We kill the umem before the MR for ODP,
* so that there will not be any invalidations in
@@ -1603,11 +1562,11 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
err = mlx5_alloc_priv_descs(pd->device, mr,
- ndescs, sizeof(u64));
+ ndescs, sizeof(struct mlx5_mtt));
if (err)
goto err_free_in;
- mr->desc_size = sizeof(u64);
+ mr->desc_size = sizeof(struct mlx5_mtt);
mr->max_descs = ndescs;
} else if (mr_type == IB_MR_TYPE_SG_GAPS) {
mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
@@ -1656,6 +1615,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
if (err)
goto err_destroy_psv;
+ mr->mmkey.type = MLX5_MKEY_MR;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
mr->umem = NULL;
@@ -1736,7 +1696,9 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
if (err)
goto free;
+ mw->mmkey.type = MLX5_MKEY_MW;
mw->ibmw.rkey = mw->mmkey.key;
+ mw->ndescs = ndescs;
resp.response_length = min(offsetof(typeof(resp), response_length) +
sizeof(resp.response_length), udata->outlen);
@@ -1827,12 +1789,12 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
mr->ndescs = sg_nents;
for_each_sg(sgl, sg, sg_nents, i) {
- if (unlikely(i > mr->max_descs))
+ if (unlikely(i >= mr->max_descs))
break;
klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
klms[i].key = cpu_to_be32(lkey);
- mr->ibmr.length += sg_dma_len(sg);
+ mr->ibmr.length += sg_dma_len(sg) - sg_offset;
sg_offset = 0;
}
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index cacb631a7b0a..ae0746754008 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -34,6 +34,7 @@
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
+#include "cmd.h"
#define MAX_PREFETCH_LEN (4*1024*1024U)
@@ -41,13 +42,139 @@
* a pagefault. */
#define MMU_NOTIFIER_TIMEOUT 1000
-struct workqueue_struct *mlx5_ib_page_fault_wq;
+#define MLX5_IMR_MTT_BITS (30 - PAGE_SHIFT)
+#define MLX5_IMR_MTT_SHIFT (MLX5_IMR_MTT_BITS + PAGE_SHIFT)
+#define MLX5_IMR_MTT_ENTRIES BIT_ULL(MLX5_IMR_MTT_BITS)
+#define MLX5_IMR_MTT_SIZE BIT_ULL(MLX5_IMR_MTT_SHIFT)
+#define MLX5_IMR_MTT_MASK (~(MLX5_IMR_MTT_SIZE - 1))
+
+#define MLX5_KSM_PAGE_SHIFT MLX5_IMR_MTT_SHIFT
+
+static u64 mlx5_imr_ksm_entries;
+
+static int check_parent(struct ib_umem_odp *odp,
+ struct mlx5_ib_mr *parent)
+{
+ struct mlx5_ib_mr *mr = odp->private;
+
+ return mr && mr->parent == parent && !odp->dying;
+}
+
+static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
+{
+ struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent;
+ struct ib_ucontext *ctx = odp->umem->context;
+ struct rb_node *rb;
+
+ down_read(&ctx->umem_rwsem);
+ while (1) {
+ rb = rb_next(&odp->interval_tree.rb);
+ if (!rb)
+ goto not_found;
+ odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
+ if (check_parent(odp, parent))
+ goto end;
+ }
+not_found:
+ odp = NULL;
+end:
+ up_read(&ctx->umem_rwsem);
+ return odp;
+}
+
+static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx,
+ u64 start, u64 length,
+ struct mlx5_ib_mr *parent)
+{
+ struct ib_umem_odp *odp;
+ struct rb_node *rb;
+
+ down_read(&ctx->umem_rwsem);
+ odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length);
+ if (!odp)
+ goto end;
+
+ while (1) {
+ if (check_parent(odp, parent))
+ goto end;
+ rb = rb_next(&odp->interval_tree.rb);
+ if (!rb)
+ goto not_found;
+ odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
+ if (ib_umem_start(odp->umem) > start + length)
+ goto not_found;
+ }
+not_found:
+ odp = NULL;
+end:
+ up_read(&ctx->umem_rwsem);
+ return odp;
+}
+
+void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
+ size_t nentries, struct mlx5_ib_mr *mr, int flags)
+{
+ struct ib_pd *pd = mr->ibmr.pd;
+ struct ib_ucontext *ctx = pd->uobject->context;
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem_odp *odp;
+ unsigned long va;
+ int i;
+
+ if (flags & MLX5_IB_UPD_XLT_ZAP) {
+ for (i = 0; i < nentries; i++, pklm++) {
+ pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
+ pklm->key = cpu_to_be32(dev->null_mkey);
+ pklm->va = 0;
+ }
+ return;
+ }
+
+ odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE,
+ nentries * MLX5_IMR_MTT_SIZE, mr);
+
+ for (i = 0; i < nentries; i++, pklm++) {
+ pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
+ va = (offset + i) * MLX5_IMR_MTT_SIZE;
+ if (odp && odp->umem->address == va) {
+ struct mlx5_ib_mr *mtt = odp->private;
+
+ pklm->key = cpu_to_be32(mtt->ibmr.lkey);
+ odp = odp_next(odp);
+ } else {
+ pklm->key = cpu_to_be32(dev->null_mkey);
+ }
+ mlx5_ib_dbg(dev, "[%d] va %lx key %x\n",
+ i, va, be32_to_cpu(pklm->key));
+ }
+}
+
+static void mr_leaf_free_action(struct work_struct *work)
+{
+ struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
+ int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT;
+ struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
+
+ mr->parent = NULL;
+ synchronize_srcu(&mr->dev->mr_srcu);
+
+ ib_umem_release(odp->umem);
+ if (imr->live)
+ mlx5_ib_update_xlt(imr, idx, 1, 0,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ATOMIC);
+ mlx5_mr_cache_free(mr->dev, mr);
+
+ if (atomic_dec_and_test(&imr->num_leaf_free))
+ wake_up(&imr->q_leaf_free);
+}
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
unsigned long end)
{
struct mlx5_ib_mr *mr;
- const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(u64)) - 1;
+ const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT /
+ sizeof(struct mlx5_mtt)) - 1;
u64 idx = 0, blk_start_idx = 0;
int in_block = 0;
u64 addr;
@@ -72,8 +199,8 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
* but they will write 0s as well, so no difference in the end result.
*/
- for (addr = start; addr < end; addr += (u64)umem->page_size) {
- idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+ for (addr = start; addr < end; addr += BIT(umem->page_shift)) {
+ idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
/*
* Strive to write the MTTs in chunks, but avoid overwriting
* non-existing MTTs. The huristic here can be improved to
@@ -90,16 +217,19 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
u64 umr_offset = idx & umr_block_mask;
if (in_block && umr_offset == 0) {
- mlx5_ib_update_mtt(mr, blk_start_idx,
- idx - blk_start_idx, 1);
+ mlx5_ib_update_xlt(mr, blk_start_idx,
+ idx - blk_start_idx, 0,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0;
}
}
}
if (in_block)
- mlx5_ib_update_mtt(mr, blk_start_idx, idx - blk_start_idx + 1,
- 1);
-
+ mlx5_ib_update_xlt(mr, blk_start_idx,
+ idx - blk_start_idx + 1, 0,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ATOMIC);
/*
* We are now sure that the device will not access the
* memory. We can safely unmap it, and mark it as dirty if
@@ -107,6 +237,13 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
*/
ib_umem_odp_unmap_dma_pages(umem, start, end);
+
+ if (unlikely(!umem->npages && mr->parent &&
+ !umem->odp_data->dying)) {
+ WRITE_ONCE(umem->odp_data->dying, 1);
+ atomic_inc(&mr->parent->num_leaf_free);
+ schedule_work(&umem->odp_data->work);
+ }
}
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
@@ -120,6 +257,11 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
caps->general_caps = IB_ODP_SUPPORT;
+ if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
+ dev->odp_max_size = U64_MAX;
+ else
+ dev->odp_max_size = BIT_ULL(MLX5_MAX_UMR_SHIFT + PAGE_SHIFT);
+
if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.send))
caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND;
@@ -135,159 +277,484 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.read))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ;
- return;
-}
+ if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic))
+ caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
-static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
- u32 key)
-{
- u32 base_key = mlx5_base_mkey(key);
- struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key);
- struct mlx5_ib_mr *mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
+ MLX5_CAP_GEN(dev->mdev, null_mkey) &&
+ MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
+ caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT;
- if (!mmkey || mmkey->key != key || !mr->live)
- return NULL;
-
- return container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ return;
}
-static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault,
+static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
+ struct mlx5_pagefault *pfault,
int error)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
- u32 qpn = qp->trans_qp.base.mqp.qpn;
+ int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ?
+ pfault->wqe.wq_num : pfault->token;
int ret = mlx5_core_page_fault_resume(dev->mdev,
- qpn,
- pfault->mpfault.flags,
+ pfault->token,
+ wq_num,
+ pfault->type,
error);
if (ret)
- pr_err("Failed to resolve the page fault on QP 0x%x\n", qpn);
+ mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x\n",
+ wq_num);
}
-/*
- * Handle a single data segment in a page-fault WQE.
- *
- * Returns number of pages retrieved on success. The caller will continue to
- * the next data segment.
- * Can return the following error codes:
- * -EAGAIN to designate a temporary error. The caller will abort handling the
- * page fault and resolve it.
- * -EFAULT when there's an error mapping the requested pages. The caller will
- * abort the page fault handling and possibly move the QP to an error state.
- * On other errors the QP should also be closed with an error.
- */
-static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault,
- u32 key, u64 io_virt, size_t bcnt,
- u32 *bytes_mapped)
+static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
+ struct ib_umem *umem,
+ bool ksm, int access_flags)
{
- struct mlx5_ib_dev *mib_dev = to_mdev(qp->ibqp.pd->device);
- int srcu_key;
- unsigned int current_seq;
- u64 start_idx;
- int npages = 0, ret = 0;
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr;
- u64 access_mask = ODP_READ_ALLOWED_BIT;
+ int err;
+
+ mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY :
+ MLX5_IMR_MTT_CACHE_ENTRY);
+
+ if (IS_ERR(mr))
+ return mr;
+
+ mr->ibmr.pd = pd;
+
+ mr->dev = dev;
+ mr->access_flags = access_flags;
+ mr->mmkey.iova = 0;
+ mr->umem = umem;
+
+ if (ksm) {
+ err = mlx5_ib_update_xlt(mr, 0,
+ mlx5_imr_ksm_entries,
+ MLX5_KSM_PAGE_SHIFT,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ENABLE);
+
+ } else {
+ err = mlx5_ib_update_xlt(mr, 0,
+ MLX5_IMR_MTT_ENTRIES,
+ PAGE_SHIFT,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ENABLE |
+ MLX5_IB_UPD_XLT_ATOMIC);
+ }
- srcu_key = srcu_read_lock(&mib_dev->mr_srcu);
- mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key);
- /*
- * If we didn't find the MR, it means the MR was closed while we were
- * handling the ODP event. In this case we return -EFAULT so that the
- * QP will be closed.
- */
- if (!mr || !mr->ibmr.pd) {
- pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
- key);
- ret = -EFAULT;
- goto srcu_unlock;
+ if (err)
+ goto fail;
+
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+
+ mr->live = 1;
+
+ mlx5_ib_dbg(dev, "key %x dev %p mr %p\n",
+ mr->mmkey.key, dev->mdev, mr);
+
+ return mr;
+
+fail:
+ mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
+ mlx5_mr_cache_free(dev, mr);
+
+ return ERR_PTR(err);
+}
+
+static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
+ u64 io_virt, size_t bcnt)
+{
+ struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context;
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device);
+ struct ib_umem_odp *odp, *result = NULL;
+ u64 addr = io_virt & MLX5_IMR_MTT_MASK;
+ int nentries = 0, start_idx = 0, ret;
+ struct mlx5_ib_mr *mtt;
+ struct ib_umem *umem;
+
+ mutex_lock(&mr->umem->odp_data->umem_mutex);
+ odp = odp_lookup(ctx, addr, 1, mr);
+
+ mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n",
+ io_virt, bcnt, addr, odp);
+
+next_mr:
+ if (likely(odp)) {
+ if (nentries)
+ nentries++;
+ } else {
+ umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE);
+ if (IS_ERR(umem)) {
+ mutex_unlock(&mr->umem->odp_data->umem_mutex);
+ return ERR_CAST(umem);
+ }
+
+ mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags);
+ if (IS_ERR(mtt)) {
+ mutex_unlock(&mr->umem->odp_data->umem_mutex);
+ ib_umem_release(umem);
+ return ERR_CAST(mtt);
+ }
+
+ odp = umem->odp_data;
+ odp->private = mtt;
+ mtt->umem = umem;
+ mtt->mmkey.iova = addr;
+ mtt->parent = mr;
+ INIT_WORK(&odp->work, mr_leaf_free_action);
+
+ if (!nentries)
+ start_idx = addr >> MLX5_IMR_MTT_SHIFT;
+ nentries++;
}
- if (!mr->umem->odp_data) {
- pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
- key);
- if (bytes_mapped)
- *bytes_mapped +=
- (bcnt - pfault->mpfault.bytes_committed);
- goto srcu_unlock;
+
+ /* Return first odp if region not covered by single one */
+ if (likely(!result))
+ result = odp;
+
+ addr += MLX5_IMR_MTT_SIZE;
+ if (unlikely(addr < io_virt + bcnt)) {
+ odp = odp_next(odp);
+ if (odp && odp->umem->address != addr)
+ odp = NULL;
+ goto next_mr;
}
- if (mr->ibmr.pd != qp->ibqp.pd) {
- pr_err("Page-fault with different PDs for QP and MR.\n");
- ret = -EFAULT;
- goto srcu_unlock;
+
+ if (unlikely(nentries)) {
+ ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ATOMIC);
+ if (ret) {
+ mlx5_ib_err(dev, "Failed to update PAS\n");
+ result = ERR_PTR(ret);
+ }
+ }
+
+ mutex_unlock(&mr->umem->odp_data->umem_mutex);
+ return result;
+}
+
+struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+ int access_flags)
+{
+ struct ib_ucontext *ctx = pd->ibpd.uobject->context;
+ struct mlx5_ib_mr *imr;
+ struct ib_umem *umem;
+
+ umem = ib_umem_get(ctx, 0, 0, IB_ACCESS_ON_DEMAND, 0);
+ if (IS_ERR(umem))
+ return ERR_CAST(umem);
+
+ imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags);
+ if (IS_ERR(imr)) {
+ ib_umem_release(umem);
+ return ERR_CAST(imr);
+ }
+
+ imr->umem = umem;
+ init_waitqueue_head(&imr->q_leaf_free);
+ atomic_set(&imr->num_leaf_free, 0);
+
+ return imr;
+}
+
+static int mr_leaf_free(struct ib_umem *umem, u64 start,
+ u64 end, void *cookie)
+{
+ struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie;
+
+ if (mr->parent != imr)
+ return 0;
+
+ ib_umem_odp_unmap_dma_pages(umem,
+ ib_umem_start(umem),
+ ib_umem_end(umem));
+
+ if (umem->odp_data->dying)
+ return 0;
+
+ WRITE_ONCE(umem->odp_data->dying, 1);
+ atomic_inc(&imr->num_leaf_free);
+ schedule_work(&umem->odp_data->work);
+
+ return 0;
+}
+
+void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
+{
+ struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context;
+
+ down_read(&ctx->umem_rwsem);
+ rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX,
+ mr_leaf_free, imr);
+ up_read(&ctx->umem_rwsem);
+
+ wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
+}
+
+static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+ u64 io_virt, size_t bcnt, u32 *bytes_mapped)
+{
+ u64 access_mask = ODP_READ_ALLOWED_BIT;
+ int npages = 0, page_shift, np;
+ u64 start_idx, page_mask;
+ struct ib_umem_odp *odp;
+ int current_seq;
+ size_t size;
+ int ret;
+
+ if (!mr->umem->odp_data->page_list) {
+ odp = implicit_mr_get_data(mr, io_virt, bcnt);
+
+ if (IS_ERR(odp))
+ return PTR_ERR(odp);
+ mr = odp->private;
+
+ } else {
+ odp = mr->umem->odp_data;
}
- current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq);
+next_mr:
+ size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
+
+ page_shift = mr->umem->page_shift;
+ page_mask = ~(BIT(page_shift) - 1);
+ start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
+
+ if (mr->umem->writable)
+ access_mask |= ODP_WRITE_ALLOWED_BIT;
+
+ current_seq = READ_ONCE(odp->notifiers_seq);
/*
* Ensure the sequence number is valid for some time before we call
* gup.
*/
smp_rmb();
- /*
- * Avoid branches - this code will perform correctly
- * in all iterations (in iteration 2 and above,
- * bytes_committed == 0).
- */
- io_virt += pfault->mpfault.bytes_committed;
- bcnt -= pfault->mpfault.bytes_committed;
+ ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
+ access_mask, current_seq);
- start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
+ if (ret < 0)
+ goto out;
- if (mr->umem->writable)
- access_mask |= ODP_WRITE_ALLOWED_BIT;
- npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt,
- access_mask, current_seq);
- if (npages < 0) {
- ret = npages;
- goto srcu_unlock;
+ np = ret;
+
+ mutex_lock(&odp->umem_mutex);
+ if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+ /*
+ * No need to check whether the MTTs really belong to
+ * this MR, since ib_umem_odp_map_dma_pages already
+ * checks this.
+ */
+ ret = mlx5_ib_update_xlt(mr, start_idx, np,
+ page_shift, MLX5_IB_UPD_XLT_ATOMIC);
+ } else {
+ ret = -EAGAIN;
}
+ mutex_unlock(&odp->umem_mutex);
- if (npages > 0) {
- mutex_lock(&mr->umem->odp_data->umem_mutex);
- if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
- /*
- * No need to check whether the MTTs really belong to
- * this MR, since ib_umem_odp_map_dma_pages already
- * checks this.
- */
- ret = mlx5_ib_update_mtt(mr, start_idx, npages, 0);
- } else {
- ret = -EAGAIN;
- }
- mutex_unlock(&mr->umem->odp_data->umem_mutex);
- if (ret < 0) {
- if (ret != -EAGAIN)
- pr_err("Failed to update mkey page tables\n");
- goto srcu_unlock;
- }
+ if (ret < 0) {
+ if (ret != -EAGAIN)
+ mlx5_ib_err(dev, "Failed to update mkey page tables\n");
+ goto out;
+ }
+
+ if (bytes_mapped) {
+ u32 new_mappings = (np << page_shift) -
+ (io_virt - round_down(io_virt, 1 << page_shift));
+ *bytes_mapped += min_t(u32, new_mappings, size);
+ }
+
+ npages += np << (page_shift - PAGE_SHIFT);
+ bcnt -= size;
- if (bytes_mapped) {
- u32 new_mappings = npages * PAGE_SIZE -
- (io_virt - round_down(io_virt, PAGE_SIZE));
- *bytes_mapped += min_t(u32, new_mappings, bcnt);
+ if (unlikely(bcnt)) {
+ struct ib_umem_odp *next;
+
+ io_virt += size;
+ next = odp_next(odp);
+ if (unlikely(!next || next->umem->address != io_virt)) {
+ mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
+ io_virt, next);
+ return -EAGAIN;
}
+ odp = next;
+ mr = odp->private;
+ goto next_mr;
}
-srcu_unlock:
+ return npages;
+
+out:
if (ret == -EAGAIN) {
- if (!mr->umem->odp_data->dying) {
- struct ib_umem_odp *odp_data = mr->umem->odp_data;
+ if (mr->parent || !odp->dying) {
unsigned long timeout =
msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
if (!wait_for_completion_timeout(
- &odp_data->notifier_completion,
+ &odp->notifier_completion,
timeout)) {
- pr_warn("timeout waiting for mmu notifier completion\n");
+ mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n",
+ current_seq, odp->notifiers_seq);
}
} else {
/* The MR is being killed, kill the QP as well. */
ret = -EFAULT;
}
}
- srcu_read_unlock(&mib_dev->mr_srcu, srcu_key);
- pfault->mpfault.bytes_committed = 0;
+
+ return ret;
+}
+
+struct pf_frame {
+ struct pf_frame *next;
+ u32 key;
+ u64 io_virt;
+ size_t bcnt;
+ int depth;
+};
+
+/*
+ * Handle a single data segment in a page-fault WQE or RDMA region.
+ *
+ * Returns number of OS pages retrieved on success. The caller may continue to
+ * the next data segment.
+ * Can return the following error codes:
+ * -EAGAIN to designate a temporary error. The caller will abort handling the
+ * page fault and resolve it.
+ * -EFAULT when there's an error mapping the requested pages. The caller will
+ * abort the page fault handling.
+ */
+static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
+ u32 key, u64 io_virt, size_t bcnt,
+ u32 *bytes_committed,
+ u32 *bytes_mapped)
+{
+ int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0;
+ struct pf_frame *head = NULL, *frame;
+ struct mlx5_core_mkey *mmkey;
+ struct mlx5_ib_mw *mw;
+ struct mlx5_ib_mr *mr;
+ struct mlx5_klm *pklm;
+ u32 *out = NULL;
+ size_t offset;
+
+ srcu_key = srcu_read_lock(&dev->mr_srcu);
+
+ io_virt += *bytes_committed;
+ bcnt -= *bytes_committed;
+
+next_mr:
+ mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key));
+ if (!mmkey || mmkey->key != key) {
+ mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+
+ switch (mmkey->type) {
+ case MLX5_MKEY_MR:
+ mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ if (!mr->live || !mr->ibmr.pd) {
+ mlx5_ib_dbg(dev, "got dead MR\n");
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+
+ ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped);
+ if (ret < 0)
+ goto srcu_unlock;
+
+ npages += ret;
+ ret = 0;
+ break;
+
+ case MLX5_MKEY_MW:
+ mw = container_of(mmkey, struct mlx5_ib_mw, mmkey);
+
+ if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) {
+ mlx5_ib_dbg(dev, "indirection level exceeded\n");
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+
+ outlen = MLX5_ST_SZ_BYTES(query_mkey_out) +
+ sizeof(*pklm) * (mw->ndescs - 2);
+
+ if (outlen > cur_outlen) {
+ kfree(out);
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out) {
+ ret = -ENOMEM;
+ goto srcu_unlock;
+ }
+ cur_outlen = outlen;
+ }
+
+ pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out,
+ bsf0_klm0_pas_mtt0_1);
+
+ ret = mlx5_core_query_mkey(dev->mdev, &mw->mmkey, out, outlen);
+ if (ret)
+ goto srcu_unlock;
+
+ offset = io_virt - MLX5_GET64(query_mkey_out, out,
+ memory_key_mkey_entry.start_addr);
+
+ for (i = 0; bcnt && i < mw->ndescs; i++, pklm++) {
+ if (offset >= be32_to_cpu(pklm->bcount)) {
+ offset -= be32_to_cpu(pklm->bcount);
+ continue;
+ }
+
+ frame = kzalloc(sizeof(*frame), GFP_KERNEL);
+ if (!frame) {
+ ret = -ENOMEM;
+ goto srcu_unlock;
+ }
+
+ frame->key = be32_to_cpu(pklm->key);
+ frame->io_virt = be64_to_cpu(pklm->va) + offset;
+ frame->bcnt = min_t(size_t, bcnt,
+ be32_to_cpu(pklm->bcount) - offset);
+ frame->depth = depth + 1;
+ frame->next = head;
+ head = frame;
+
+ bcnt -= frame->bcnt;
+ }
+ break;
+
+ default:
+ mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type);
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+
+ if (head) {
+ frame = head;
+ head = frame->next;
+
+ key = frame->key;
+ io_virt = frame->io_virt;
+ bcnt = frame->bcnt;
+ depth = frame->depth;
+ kfree(frame);
+
+ goto next_mr;
+ }
+
+srcu_unlock:
+ while (head) {
+ frame = head;
+ head = frame->next;
+ kfree(frame);
+ }
+ kfree(out);
+
+ srcu_read_unlock(&dev->mr_srcu, srcu_key);
+ *bytes_committed = 0;
return ret ? ret : npages;
}
@@ -309,8 +776,9 @@ srcu_unlock:
* Returns the number of pages loaded if positive, zero for an empty WQE, or a
* negative error code.
*/
-static int pagefault_data_segments(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault, void *wqe,
+static int pagefault_data_segments(struct mlx5_ib_dev *dev,
+ struct mlx5_pagefault *pfault,
+ struct mlx5_ib_qp *qp, void *wqe,
void *wqe_end, u32 *bytes_mapped,
u32 *total_wqe_bytes, int receive_queue)
{
@@ -354,22 +822,23 @@ static int pagefault_data_segments(struct mlx5_ib_qp *qp,
if (!inline_segment && total_wqe_bytes) {
*total_wqe_bytes += bcnt - min_t(size_t, bcnt,
- pfault->mpfault.bytes_committed);
+ pfault->bytes_committed);
}
/* A zero length data segment designates a length of 2GB. */
if (bcnt == 0)
bcnt = 1U << 31;
- if (inline_segment || bcnt <= pfault->mpfault.bytes_committed) {
- pfault->mpfault.bytes_committed -=
+ if (inline_segment || bcnt <= pfault->bytes_committed) {
+ pfault->bytes_committed -=
min_t(size_t, bcnt,
- pfault->mpfault.bytes_committed);
+ pfault->bytes_committed);
continue;
}
- ret = pagefault_single_data_segment(qp, pfault, key, io_virt,
- bcnt, bytes_mapped);
+ ret = pagefault_single_data_segment(dev, key, io_virt, bcnt,
+ &pfault->bytes_committed,
+ bytes_mapped);
if (ret < 0)
break;
npages += ret;
@@ -378,17 +847,29 @@ static int pagefault_data_segments(struct mlx5_ib_qp *qp,
return ret < 0 ? ret : npages;
}
+static const u32 mlx5_ib_odp_opcode_cap[] = {
+ [MLX5_OPCODE_SEND] = IB_ODP_SUPPORT_SEND,
+ [MLX5_OPCODE_SEND_IMM] = IB_ODP_SUPPORT_SEND,
+ [MLX5_OPCODE_SEND_INVAL] = IB_ODP_SUPPORT_SEND,
+ [MLX5_OPCODE_RDMA_WRITE] = IB_ODP_SUPPORT_WRITE,
+ [MLX5_OPCODE_RDMA_WRITE_IMM] = IB_ODP_SUPPORT_WRITE,
+ [MLX5_OPCODE_RDMA_READ] = IB_ODP_SUPPORT_READ,
+ [MLX5_OPCODE_ATOMIC_CS] = IB_ODP_SUPPORT_ATOMIC,
+ [MLX5_OPCODE_ATOMIC_FA] = IB_ODP_SUPPORT_ATOMIC,
+};
+
/*
* Parse initiator WQE. Advances the wqe pointer to point at the
* scatter-gather list, and set wqe_end to the end of the WQE.
*/
static int mlx5_ib_mr_initiator_pfault_handler(
- struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
- void **wqe, void **wqe_end, int wqe_length)
+ struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault,
+ struct mlx5_ib_qp *qp, void **wqe, void **wqe_end, int wqe_length)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
struct mlx5_wqe_ctrl_seg *ctrl = *wqe;
- u16 wqe_index = pfault->mpfault.wqe.wqe_index;
+ u16 wqe_index = pfault->wqe.wqe_index;
+ u32 transport_caps;
+ struct mlx5_base_av *av;
unsigned ds, opcode;
#if defined(DEBUG)
u32 ctrl_wqe_index, ctrl_qpn;
@@ -434,53 +915,49 @@ static int mlx5_ib_mr_initiator_pfault_handler(
opcode = be32_to_cpu(ctrl->opmod_idx_opcode) &
MLX5_WQE_CTRL_OPCODE_MASK;
+
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
- switch (opcode) {
- case MLX5_OPCODE_SEND:
- case MLX5_OPCODE_SEND_IMM:
- case MLX5_OPCODE_SEND_INVAL:
- if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
- IB_ODP_SUPPORT_SEND))
- goto invalid_transport_or_opcode;
- break;
- case MLX5_OPCODE_RDMA_WRITE:
- case MLX5_OPCODE_RDMA_WRITE_IMM:
- if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
- IB_ODP_SUPPORT_WRITE))
- goto invalid_transport_or_opcode;
- *wqe += sizeof(struct mlx5_wqe_raddr_seg);
- break;
- case MLX5_OPCODE_RDMA_READ:
- if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
- IB_ODP_SUPPORT_READ))
- goto invalid_transport_or_opcode;
- *wqe += sizeof(struct mlx5_wqe_raddr_seg);
- break;
- default:
- goto invalid_transport_or_opcode;
- }
+ transport_caps = dev->odp_caps.per_transport_caps.rc_odp_caps;
break;
case IB_QPT_UD:
- switch (opcode) {
- case MLX5_OPCODE_SEND:
- case MLX5_OPCODE_SEND_IMM:
- if (!(dev->odp_caps.per_transport_caps.ud_odp_caps &
- IB_ODP_SUPPORT_SEND))
- goto invalid_transport_or_opcode;
- *wqe += sizeof(struct mlx5_wqe_datagram_seg);
- break;
- default:
- goto invalid_transport_or_opcode;
- }
+ transport_caps = dev->odp_caps.per_transport_caps.ud_odp_caps;
break;
default:
-invalid_transport_or_opcode:
- mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode or transport. transport: 0x%x opcode: 0x%x.\n",
- qp->ibqp.qp_type, opcode);
+ mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport 0x%x\n",
+ qp->ibqp.qp_type);
+ return -EFAULT;
+ }
+
+ if (unlikely(opcode >= sizeof(mlx5_ib_odp_opcode_cap) /
+ sizeof(mlx5_ib_odp_opcode_cap[0]) ||
+ !(transport_caps & mlx5_ib_odp_opcode_cap[opcode]))) {
+ mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode 0x%x\n",
+ opcode);
return -EFAULT;
}
+ if (qp->ibqp.qp_type != IB_QPT_RC) {
+ av = *wqe;
+ if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT))
+ *wqe += sizeof(struct mlx5_av);
+ else
+ *wqe += sizeof(struct mlx5_base_av);
+ }
+
+ switch (opcode) {
+ case MLX5_OPCODE_RDMA_WRITE:
+ case MLX5_OPCODE_RDMA_WRITE_IMM:
+ case MLX5_OPCODE_RDMA_READ:
+ *wqe += sizeof(struct mlx5_wqe_raddr_seg);
+ break;
+ case MLX5_OPCODE_ATOMIC_CS:
+ case MLX5_OPCODE_ATOMIC_FA:
+ *wqe += sizeof(struct mlx5_wqe_raddr_seg);
+ *wqe += sizeof(struct mlx5_wqe_atomic_seg);
+ break;
+ }
+
return 0;
}
@@ -489,10 +966,9 @@ invalid_transport_or_opcode:
* scatter-gather list, and set wqe_end to the end of the WQE.
*/
static int mlx5_ib_mr_responder_pfault_handler(
- struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
- void **wqe, void **wqe_end, int wqe_length)
+ struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault,
+ struct mlx5_ib_qp *qp, void **wqe, void **wqe_end, int wqe_length)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
struct mlx5_ib_wq *wq = &qp->rq;
int wqe_size = 1 << wq->wqe_shift;
@@ -529,70 +1005,80 @@ invalid_transport_or_opcode:
return 0;
}
-static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault)
+static struct mlx5_ib_qp *mlx5_ib_odp_find_qp(struct mlx5_ib_dev *dev,
+ u32 wq_num)
+{
+ struct mlx5_core_qp *mqp = __mlx5_qp_lookup(dev->mdev, wq_num);
+
+ if (!mqp) {
+ mlx5_ib_err(dev, "QPN 0x%6x not found\n", wq_num);
+ return NULL;
+ }
+
+ return to_mibqp(mqp);
+}
+
+static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
+ struct mlx5_pagefault *pfault)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
int ret;
void *wqe, *wqe_end;
u32 bytes_mapped, total_wqe_bytes;
char *buffer = NULL;
- int resume_with_error = 0;
- u16 wqe_index = pfault->mpfault.wqe.wqe_index;
- int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
- u32 qpn = qp->trans_qp.base.mqp.qpn;
+ int resume_with_error = 1;
+ u16 wqe_index = pfault->wqe.wqe_index;
+ int requestor = pfault->type & MLX5_PFAULT_REQUESTOR;
+ struct mlx5_ib_qp *qp;
buffer = (char *)__get_free_page(GFP_KERNEL);
if (!buffer) {
mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
- resume_with_error = 1;
goto resolve_page_fault;
}
+ qp = mlx5_ib_odp_find_qp(dev, pfault->wqe.wq_num);
+ if (!qp)
+ goto resolve_page_fault;
+
ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
PAGE_SIZE, &qp->trans_qp.base);
if (ret < 0) {
- mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
- -ret, wqe_index, qpn);
- resume_with_error = 1;
+ mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%d, wqe_index=%x, qpn=%x\n",
+ ret, wqe_index, pfault->token);
goto resolve_page_fault;
}
wqe = buffer;
if (requestor)
- ret = mlx5_ib_mr_initiator_pfault_handler(qp, pfault, &wqe,
+ ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, &wqe,
&wqe_end, ret);
else
- ret = mlx5_ib_mr_responder_pfault_handler(qp, pfault, &wqe,
+ ret = mlx5_ib_mr_responder_pfault_handler(dev, pfault, qp, &wqe,
&wqe_end, ret);
- if (ret < 0) {
- resume_with_error = 1;
+ if (ret < 0)
goto resolve_page_fault;
- }
if (wqe >= wqe_end) {
mlx5_ib_err(dev, "ODP fault on invalid WQE.\n");
- resume_with_error = 1;
goto resolve_page_fault;
}
- ret = pagefault_data_segments(qp, pfault, wqe, wqe_end, &bytes_mapped,
- &total_wqe_bytes, !requestor);
+ ret = pagefault_data_segments(dev, pfault, qp, wqe, wqe_end,
+ &bytes_mapped, &total_wqe_bytes,
+ !requestor);
if (ret == -EAGAIN) {
+ resume_with_error = 0;
goto resolve_page_fault;
} else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
- mlx5_ib_err(dev, "Error getting user pages for page fault. Error: 0x%x\n",
- -ret);
- resume_with_error = 1;
goto resolve_page_fault;
}
+ resume_with_error = 0;
resolve_page_fault:
- mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
- mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
- qpn, resume_with_error,
- pfault->mpfault.flags);
-
+ mlx5_ib_page_fault_resume(dev, pfault, resume_with_error);
+ mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
+ pfault->wqe.wq_num, resume_with_error,
+ pfault->type);
free_page((unsigned long)buffer);
}
@@ -602,15 +1088,14 @@ static int pages_in_range(u64 address, u32 length)
(address & PAGE_MASK)) >> PAGE_SHIFT;
}
-static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault)
+static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
+ struct mlx5_pagefault *pfault)
{
- struct mlx5_pagefault *mpfault = &pfault->mpfault;
u64 address;
u32 length;
- u32 prefetch_len = mpfault->bytes_committed;
+ u32 prefetch_len = pfault->bytes_committed;
int prefetch_activated = 0;
- u32 rkey = mpfault->rdma.r_key;
+ u32 rkey = pfault->rdma.r_key;
int ret;
/* The RDMA responder handler handles the page fault in two parts.
@@ -619,38 +1104,40 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
* prefetches more pages. The second operation cannot use the pfault
* context and therefore uses the dummy_pfault context allocated on
* the stack */
- struct mlx5_ib_pfault dummy_pfault = {};
+ pfault->rdma.rdma_va += pfault->bytes_committed;
+ pfault->rdma.rdma_op_len -= min(pfault->bytes_committed,
+ pfault->rdma.rdma_op_len);
+ pfault->bytes_committed = 0;
- dummy_pfault.mpfault.bytes_committed = 0;
-
- mpfault->rdma.rdma_va += mpfault->bytes_committed;
- mpfault->rdma.rdma_op_len -= min(mpfault->bytes_committed,
- mpfault->rdma.rdma_op_len);
- mpfault->bytes_committed = 0;
-
- address = mpfault->rdma.rdma_va;
- length = mpfault->rdma.rdma_op_len;
+ address = pfault->rdma.rdma_va;
+ length = pfault->rdma.rdma_op_len;
/* For some operations, the hardware cannot tell the exact message
* length, and in those cases it reports zero. Use prefetch
* logic. */
if (length == 0) {
prefetch_activated = 1;
- length = mpfault->rdma.packet_size;
+ length = pfault->rdma.packet_size;
prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len);
}
- ret = pagefault_single_data_segment(qp, pfault, rkey, address, length,
- NULL);
+ ret = pagefault_single_data_segment(dev, rkey, address, length,
+ &pfault->bytes_committed, NULL);
if (ret == -EAGAIN) {
/* We're racing with an invalidation, don't prefetch */
prefetch_activated = 0;
} else if (ret < 0 || pages_in_range(address, length) > ret) {
- mlx5_ib_page_fault_resume(qp, pfault, 1);
+ mlx5_ib_page_fault_resume(dev, pfault, 1);
+ if (ret != -ENOENT)
+ mlx5_ib_dbg(dev, "PAGE FAULT error %d. QP 0x%x, type: 0x%x\n",
+ ret, pfault->token, pfault->type);
return;
}
- mlx5_ib_page_fault_resume(qp, pfault, 0);
+ mlx5_ib_page_fault_resume(dev, pfault, 0);
+ mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x, type: 0x%x, prefetch_activated: %d\n",
+ pfault->token, pfault->type,
+ prefetch_activated);
/* At this point, there might be a new pagefault already arriving in
* the eq, switch to the dummy pagefault for the rest of the
@@ -658,139 +1145,93 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
* work-queue is being fenced. */
if (prefetch_activated) {
- ret = pagefault_single_data_segment(qp, &dummy_pfault, rkey,
- address,
+ u32 bytes_committed = 0;
+
+ ret = pagefault_single_data_segment(dev, rkey, address,
prefetch_len,
- NULL);
- if (ret < 0) {
- pr_warn("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n",
- ret, prefetch_activated,
- qp->ibqp.qp_num, address, prefetch_len);
+ &bytes_committed, NULL);
+ if (ret < 0 && ret != -EAGAIN) {
+ mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
+ ret, pfault->token, address, prefetch_len);
}
}
}
-void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault)
+void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
+ struct mlx5_pagefault *pfault)
{
- u8 event_subtype = pfault->mpfault.event_subtype;
+ struct mlx5_ib_dev *dev = context;
+ u8 event_subtype = pfault->event_subtype;
switch (event_subtype) {
case MLX5_PFAULT_SUBTYPE_WQE:
- mlx5_ib_mr_wqe_pfault_handler(qp, pfault);
+ mlx5_ib_mr_wqe_pfault_handler(dev, pfault);
break;
case MLX5_PFAULT_SUBTYPE_RDMA:
- mlx5_ib_mr_rdma_pfault_handler(qp, pfault);
+ mlx5_ib_mr_rdma_pfault_handler(dev, pfault);
break;
default:
- pr_warn("Invalid page fault event subtype: 0x%x\n",
- event_subtype);
- mlx5_ib_page_fault_resume(qp, pfault, 1);
- break;
+ mlx5_ib_err(dev, "Invalid page fault event subtype: 0x%x\n",
+ event_subtype);
+ mlx5_ib_page_fault_resume(dev, pfault, 1);
}
}
-static void mlx5_ib_qp_pfault_action(struct work_struct *work)
-{
- struct mlx5_ib_pfault *pfault = container_of(work,
- struct mlx5_ib_pfault,
- work);
- enum mlx5_ib_pagefault_context context =
- mlx5_ib_get_pagefault_context(&pfault->mpfault);
- struct mlx5_ib_qp *qp = container_of(pfault, struct mlx5_ib_qp,
- pagefaults[context]);
- mlx5_ib_mr_pfault_handler(qp, pfault);
-}
-
-void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
- qp->disable_page_faults = 1;
- spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
-
- /*
- * Note that at this point, we are guarenteed that no more
- * work queue elements will be posted to the work queue with
- * the QP we are closing.
- */
- flush_workqueue(mlx5_ib_page_fault_wq);
-}
-
-void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
- qp->disable_page_faults = 0;
- spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
-}
-
-static void mlx5_ib_pfault_handler(struct mlx5_core_qp *qp,
- struct mlx5_pagefault *pfault)
+void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
{
- /*
- * Note that we will only get one fault event per QP per context
- * (responder/initiator, read/write), until we resolve the page fault
- * with the mlx5_ib_page_fault_resume command. Since this function is
- * called from within the work element, there is no risk of missing
- * events.
- */
- struct mlx5_ib_qp *mibqp = to_mibqp(qp);
- enum mlx5_ib_pagefault_context context =
- mlx5_ib_get_pagefault_context(pfault);
- struct mlx5_ib_pfault *qp_pfault = &mibqp->pagefaults[context];
-
- qp_pfault->mpfault = *pfault;
-
- /* No need to stop interrupts here since we are in an interrupt */
- spin_lock(&mibqp->disable_page_faults_lock);
- if (!mibqp->disable_page_faults)
- queue_work(mlx5_ib_page_fault_wq, &qp_pfault->work);
- spin_unlock(&mibqp->disable_page_faults_lock);
-}
-
-void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
-{
- int i;
-
- qp->disable_page_faults = 1;
- spin_lock_init(&qp->disable_page_faults_lock);
+ if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return;
- qp->trans_qp.base.mqp.pfault_handler = mlx5_ib_pfault_handler;
+ switch (ent->order - 2) {
+ case MLX5_IMR_MTT_CACHE_ENTRY:
+ ent->page = PAGE_SHIFT;
+ ent->xlt = MLX5_IMR_MTT_ENTRIES *
+ sizeof(struct mlx5_mtt) /
+ MLX5_IB_UMR_OCTOWORD;
+ ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
+ ent->limit = 0;
+ break;
- for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
- INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
+ case MLX5_IMR_KSM_CACHE_ENTRY:
+ ent->page = MLX5_KSM_PAGE_SHIFT;
+ ent->xlt = mlx5_imr_ksm_entries *
+ sizeof(struct mlx5_klm) /
+ MLX5_IB_UMR_OCTOWORD;
+ ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
+ ent->limit = 0;
+ break;
+ }
}
-int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev)
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
{
int ret;
- ret = init_srcu_struct(&ibdev->mr_srcu);
+ ret = init_srcu_struct(&dev->mr_srcu);
if (ret)
return ret;
+ if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
+ ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
+ if (ret) {
+ mlx5_ib_err(dev, "Error getting null_mkey %d\n", ret);
+ return ret;
+ }
+ }
+
return 0;
}
-void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)
+void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *dev)
{
- cleanup_srcu_struct(&ibdev->mr_srcu);
+ cleanup_srcu_struct(&dev->mr_srcu);
}
-int __init mlx5_ib_odp_init(void)
+int mlx5_ib_odp_init(void)
{
- mlx5_ib_page_fault_wq = alloc_ordered_workqueue("mlx5_ib_page_faults",
- WQ_MEM_RECLAIM);
- if (!mlx5_ib_page_fault_wq)
- return -ENOMEM;
+ mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) -
+ MLX5_IMR_MTT_BITS);
return 0;
}
-void mlx5_ib_odp_cleanup(void)
-{
- destroy_workqueue(mlx5_ib_page_fault_wq);
-}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index a1b3125f0a6e..0889ff367c86 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -475,60 +475,53 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
return 1;
}
-static int first_med_uuar(void)
+static int first_med_bfreg(void)
{
return 1;
}
-static int next_uuar(int n)
-{
- n++;
-
- while (((n % 4) & 2))
- n++;
+enum {
+ /* this is the first blue flame register in the array of bfregs assigned
+ * to a processes. Since we do not use it for blue flame but rather
+ * regular 64 bit doorbells, we do not need a lock for maintaiing
+ * "odd/even" order
+ */
+ NUM_NON_BLUE_FLAME_BFREGS = 1,
+};
- return n;
+static int max_bfregs(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi)
+{
+ return get_num_uars(dev, bfregi) * MLX5_NON_FP_BFREGS_PER_UAR;
}
-static int num_med_uuar(struct mlx5_uuar_info *uuari)
+static int num_med_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
{
int n;
- n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE -
- uuari->num_low_latency_uuars - 1;
+ n = max_bfregs(dev, bfregi) - bfregi->num_low_latency_bfregs -
+ NUM_NON_BLUE_FLAME_BFREGS;
return n >= 0 ? n : 0;
}
-static int max_uuari(struct mlx5_uuar_info *uuari)
-{
- return uuari->num_uars * 4;
-}
-
-static int first_hi_uuar(struct mlx5_uuar_info *uuari)
+static int first_hi_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
{
int med;
- int i;
- int t;
-
- med = num_med_uuar(uuari);
- for (t = 0, i = first_med_uuar();; i = next_uuar(i)) {
- t++;
- if (t == med)
- return next_uuar(i);
- }
- return 0;
+ med = num_med_bfreg(dev, bfregi);
+ return ++med;
}
-static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
+static int alloc_high_class_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
{
int i;
- for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) {
- if (!test_bit(i, uuari->bitmap)) {
- set_bit(i, uuari->bitmap);
- uuari->count[i]++;
+ for (i = first_hi_bfreg(dev, bfregi); i < max_bfregs(dev, bfregi); i++) {
+ if (!bfregi->count[i]) {
+ bfregi->count[i]++;
return i;
}
}
@@ -536,87 +529,61 @@ static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
return -ENOMEM;
}
-static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
+static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
{
- int minidx = first_med_uuar();
+ int minidx = first_med_bfreg();
int i;
- for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) {
- if (uuari->count[i] < uuari->count[minidx])
+ for (i = first_med_bfreg(); i < first_hi_bfreg(dev, bfregi); i++) {
+ if (bfregi->count[i] < bfregi->count[minidx])
minidx = i;
+ if (!bfregi->count[minidx])
+ break;
}
- uuari->count[minidx]++;
+ bfregi->count[minidx]++;
return minidx;
}
-static int alloc_uuar(struct mlx5_uuar_info *uuari,
- enum mlx5_ib_latency_class lat)
+static int alloc_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi,
+ enum mlx5_ib_latency_class lat)
{
- int uuarn = -EINVAL;
+ int bfregn = -EINVAL;
- mutex_lock(&uuari->lock);
+ mutex_lock(&bfregi->lock);
switch (lat) {
case MLX5_IB_LATENCY_CLASS_LOW:
- uuarn = 0;
- uuari->count[uuarn]++;
+ BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1);
+ bfregn = 0;
+ bfregi->count[bfregn]++;
break;
case MLX5_IB_LATENCY_CLASS_MEDIUM:
- if (uuari->ver < 2)
- uuarn = -ENOMEM;
+ if (bfregi->ver < 2)
+ bfregn = -ENOMEM;
else
- uuarn = alloc_med_class_uuar(uuari);
+ bfregn = alloc_med_class_bfreg(dev, bfregi);
break;
case MLX5_IB_LATENCY_CLASS_HIGH:
- if (uuari->ver < 2)
- uuarn = -ENOMEM;
+ if (bfregi->ver < 2)
+ bfregn = -ENOMEM;
else
- uuarn = alloc_high_class_uuar(uuari);
- break;
-
- case MLX5_IB_LATENCY_CLASS_FAST_PATH:
- uuarn = 2;
+ bfregn = alloc_high_class_bfreg(dev, bfregi);
break;
}
- mutex_unlock(&uuari->lock);
-
- return uuarn;
-}
+ mutex_unlock(&bfregi->lock);
-static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
-{
- clear_bit(uuarn, uuari->bitmap);
- --uuari->count[uuarn];
+ return bfregn;
}
-static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
+static void free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, int bfregn)
{
- clear_bit(uuarn, uuari->bitmap);
- --uuari->count[uuarn];
-}
-
-static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn)
-{
- int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
- int high_uuar = nuuars - uuari->num_low_latency_uuars;
-
- mutex_lock(&uuari->lock);
- if (uuarn == 0) {
- --uuari->count[uuarn];
- goto out;
- }
-
- if (uuarn < high_uuar) {
- free_med_class_uuar(uuari, uuarn);
- goto out;
- }
-
- free_high_class_uuar(uuari, uuarn);
-
-out:
- mutex_unlock(&uuari->lock);
+ mutex_lock(&bfregi->lock);
+ bfregi->count[bfregn]--;
+ mutex_unlock(&bfregi->lock);
}
static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state)
@@ -657,9 +624,20 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
struct mlx5_ib_cq *recv_cq);
-static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
+static int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi, int bfregn)
{
- return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
+ int bfregs_per_sys_page;
+ int index_of_sys_page;
+ int offset;
+
+ bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
+ MLX5_NON_FP_BFREGS_PER_UAR;
+ index_of_sys_page = bfregn / bfregs_per_sys_page;
+
+ offset = bfregn % bfregs_per_sys_page / MLX5_NON_FP_BFREGS_PER_UAR;
+
+ return bfregi->sys_pages[index_of_sys_page] + offset;
}
static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
@@ -762,6 +740,13 @@ err_umem:
return err;
}
+static int adjust_bfregn(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi, int bfregn)
+{
+ return bfregn / MLX5_NON_FP_BFREGS_PER_UAR * MLX5_BFREGS_PER_UAR +
+ bfregn % MLX5_NON_FP_BFREGS_PER_UAR;
+}
+
static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_qp *qp, struct ib_udata *udata,
struct ib_qp_init_attr *attr,
@@ -776,7 +761,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
int uar_index;
int npages;
u32 offset = 0;
- int uuarn;
+ int bfregn;
int ncont = 0;
__be64 *pas;
void *qpc;
@@ -794,27 +779,27 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
*/
if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
/* In CROSS_CHANNEL CQ and QP must use the same UAR */
- uuarn = MLX5_CROSS_CHANNEL_UUAR;
+ bfregn = MLX5_CROSS_CHANNEL_BFREG;
else {
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
- if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
+ bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_HIGH);
+ if (bfregn < 0) {
+ mlx5_ib_dbg(dev, "failed to allocate low latency BFREG\n");
mlx5_ib_dbg(dev, "reverting to medium latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
- if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
+ bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_MEDIUM);
+ if (bfregn < 0) {
+ mlx5_ib_dbg(dev, "failed to allocate medium latency BFREG\n");
mlx5_ib_dbg(dev, "reverting to high latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
- if (uuarn < 0) {
- mlx5_ib_warn(dev, "uuar allocation failed\n");
- return uuarn;
+ bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_LOW);
+ if (bfregn < 0) {
+ mlx5_ib_warn(dev, "bfreg allocation failed\n");
+ return bfregn;
}
}
}
}
- uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
- mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
+ uar_index = bfregn_to_uar_index(dev, &context->bfregi, bfregn);
+ mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index);
qp->rq.offset = 0;
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
@@ -822,7 +807,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
err = set_user_buf_size(dev, qp, &ucmd, base, attr);
if (err)
- goto err_uuar;
+ goto err_bfreg;
if (ucmd.buf_addr && ubuffer->buf_size) {
ubuffer->buf_addr = ucmd.buf_addr;
@@ -831,14 +816,14 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
&ubuffer->umem, &npages, &page_shift,
&ncont, &offset);
if (err)
- goto err_uuar;
+ goto err_bfreg;
} else {
ubuffer->umem = NULL;
}
*inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont;
- *in = mlx5_vzalloc(*inlen);
+ *in = kvzalloc(*inlen, GFP_KERNEL);
if (!*in) {
err = -ENOMEM;
goto err_umem;
@@ -854,8 +839,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_SET(qpc, qpc, page_offset, offset);
MLX5_SET(qpc, qpc, uar_page, uar_index);
- resp->uuar_index = uuarn;
- qp->uuarn = uuarn;
+ resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn);
+ qp->bfregn = bfregn;
err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db);
if (err) {
@@ -882,13 +867,13 @@ err_umem:
if (ubuffer->umem)
ib_umem_release(ubuffer->umem);
-err_uuar:
- free_uuar(&context->uuari, uuarn);
+err_bfreg:
+ free_bfreg(dev, &context->bfregi, bfregn);
return err;
}
-static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
- struct mlx5_ib_qp_base *base)
+static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp, struct mlx5_ib_qp_base *base)
{
struct mlx5_ib_ucontext *context;
@@ -896,7 +881,7 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
mlx5_ib_db_unmap_user(context, &qp->db);
if (base->ubuffer.umem)
ib_umem_release(base->ubuffer.umem);
- free_uuar(&context->uuari, qp->uuarn);
+ free_bfreg(dev, &context->bfregi, qp->bfregn);
}
static int create_kernel_qp(struct mlx5_ib_dev *dev,
@@ -905,36 +890,32 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
u32 **in, int *inlen,
struct mlx5_ib_qp_base *base)
{
- enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
- struct mlx5_uuar_info *uuari;
int uar_index;
void *qpc;
- int uuarn;
int err;
- uuari = &dev->mdev->priv.uuari;
if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN |
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
IB_QP_CREATE_IPOIB_UD_LSO |
+ IB_QP_CREATE_NETIF_QP |
mlx5_ib_create_qp_sqpn_qp1()))
return -EINVAL;
if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
- lc = MLX5_IB_LATENCY_CLASS_FAST_PATH;
-
- uuarn = alloc_uuar(uuari, lc);
- if (uuarn < 0) {
- mlx5_ib_dbg(dev, "\n");
- return -ENOMEM;
- }
+ qp->bf.bfreg = &dev->fp_bfreg;
+ else
+ qp->bf.bfreg = &dev->bfreg;
- qp->bf = &uuari->bfs[uuarn];
- uar_index = qp->bf->uar->index;
+ /* We need to divide by two since each register is comprised of
+ * two buffers of identical size, namely odd and even
+ */
+ qp->bf.buf_size = (1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size)) / 2;
+ uar_index = qp->bf.bfreg->index;
err = calc_sq_size(dev, init_attr, qp);
if (err < 0) {
mlx5_ib_dbg(dev, "err %d\n", err);
- goto err_uuar;
+ return err;
}
qp->rq.offset = 0;
@@ -944,13 +925,13 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
- goto err_uuar;
+ return err;
}
qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
*inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
- *in = mlx5_vzalloc(*inlen);
+ *in = kvzalloc(*inlen, GFP_KERNEL);
if (!*in) {
err = -ENOMEM;
goto err_buf;
@@ -994,34 +975,30 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
return 0;
err_wrid:
- mlx5_db_free(dev->mdev, &qp->db);
kfree(qp->sq.wqe_head);
kfree(qp->sq.w_list);
kfree(qp->sq.wrid);
kfree(qp->sq.wr_data);
kfree(qp->rq.wrid);
+ mlx5_db_free(dev->mdev, &qp->db);
err_free:
kvfree(*in);
err_buf:
mlx5_buf_free(dev->mdev, &qp->buf);
-
-err_uuar:
- free_uuar(&dev->mdev->priv.uuari, uuarn);
return err;
}
static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
- mlx5_db_free(dev->mdev, &qp->db);
kfree(qp->sq.wqe_head);
kfree(qp->sq.w_list);
kfree(qp->sq.wrid);
kfree(qp->sq.wr_data);
kfree(qp->rq.wrid);
+ mlx5_db_free(dev->mdev, &qp->db);
mlx5_buf_free(dev->mdev, &qp->buf);
- free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn);
}
static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
@@ -1083,7 +1060,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
return err;
inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_umem;
@@ -1163,12 +1140,13 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
u32 rq_pas_size = get_rq_pas_size(qpc);
inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
- MLX5_SET(rqc, rqc, vsd, 1);
+ if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING))
+ MLX5_SET(rqc, rqc, vsd, 1);
MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
MLX5_SET(rqc, rqc, flush_in_error_en, 1);
@@ -1215,7 +1193,7 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
int err;
inlen = MLX5_ST_SZ_BYTES(create_tir_in);
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -1265,6 +1243,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (qp->rq.wqe_cnt) {
rq->base.container_mibqp = qp;
+ if (qp->flags & MLX5_IB_QP_CVLAN_STRIPPING)
+ rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
err = create_raw_packet_qp_rq(dev, rq, in);
if (err)
goto err_destroy_sq;
@@ -1353,7 +1333,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (init_attr->create_flags || init_attr->send_cq)
return -EINVAL;
- min_resp_len = offsetof(typeof(resp), uuar_index) + sizeof(resp.uuar_index);
+ min_resp_len = offsetof(typeof(resp), bfreg_index) + sizeof(resp.bfreg_index);
if (udata->outlen < min_resp_len)
return -EINVAL;
@@ -1392,7 +1372,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
}
inlen = MLX5_ST_SZ_BYTES(create_tir_in);
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -1526,9 +1506,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
- if (init_attr->qp_type != IB_QPT_RAW_PACKET)
- mlx5_ib_odp_create_qp(qp);
-
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
@@ -1589,6 +1566,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+ if (init_attr->create_flags & IB_QP_CREATE_CVLAN_STRIPPING) {
+ if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(dev->mdev, vlan_cap)) ||
+ (init_attr->qp_type != IB_QPT_RAW_PACKET))
+ return -EOPNOTSUPP;
+ qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING;
+ }
+
if (pd && pd->uobject) {
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
mlx5_ib_dbg(dev, "copy failed\n");
@@ -1648,7 +1633,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (err)
return err;
} else {
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -1795,7 +1780,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
err_create:
if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(pd, qp, base);
+ destroy_qp_user(dev, pd, qp, base);
else if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
@@ -1923,7 +1908,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
if (qp->state != IB_QPS_RESET) {
if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
- mlx5_ib_qp_disable_pagefaults(qp);
err = mlx5_core_qp_modify(dev->mdev,
MLX5_CMD_OP_2RST_QP, 0,
NULL, &base->mqp);
@@ -1974,7 +1958,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
else if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(&get_pd(qp)->ibpd, qp, base);
+ destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base);
}
static const char *ib_qp_type_str(enum ib_qp_type type)
@@ -2180,7 +2164,7 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
int err;
inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -2205,7 +2189,7 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
int err;
inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -2222,56 +2206,65 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
}
static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
- const struct ib_ah_attr *ah,
+ const struct rdma_ah_attr *ah,
struct mlx5_qp_path *path, u8 port, int attr_mask,
u32 path_flags, const struct ib_qp_attr *attr,
bool alt)
{
- enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah);
int err;
+ enum ib_gid_type gid_type;
+ u8 ah_flags = rdma_ah_get_ah_flags(ah);
+ u8 sl = rdma_ah_get_sl(ah);
if (attr_mask & IB_QP_PKEY_INDEX)
path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index :
attr->pkey_index);
- if (ah->ah_flags & IB_AH_GRH) {
- if (ah->grh.sgid_index >=
+ if (ah_flags & IB_AH_GRH) {
+ if (grh->sgid_index >=
dev->mdev->port_caps[port - 1].gid_table_len) {
pr_err("sgid_index (%u) too large. max is %d\n",
- ah->grh.sgid_index,
+ grh->sgid_index,
dev->mdev->port_caps[port - 1].gid_table_len);
return -EINVAL;
}
}
- if (ll == IB_LINK_LAYER_ETHERNET) {
- if (!(ah->ah_flags & IB_AH_GRH))
+ if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ if (!(ah_flags & IB_AH_GRH))
return -EINVAL;
- memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
+ err = mlx5_get_roce_gid_type(dev, port, grh->sgid_index,
+ &gid_type);
+ if (err)
+ return err;
+ memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac));
path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
- ah->grh.sgid_index);
- path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
+ grh->sgid_index);
+ path->dci_cfi_prio_sl = (sl & 0x7) << 4;
+ if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f;
} else {
path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
path->fl_free_ar |=
(path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0;
- path->rlid = cpu_to_be16(ah->dlid);
- path->grh_mlid = ah->src_path_bits & 0x7f;
- if (ah->ah_flags & IB_AH_GRH)
+ path->rlid = cpu_to_be16(rdma_ah_get_dlid(ah));
+ path->grh_mlid = rdma_ah_get_path_bits(ah) & 0x7f;
+ if (ah_flags & IB_AH_GRH)
path->grh_mlid |= 1 << 7;
- path->dci_cfi_prio_sl = ah->sl & 0xf;
+ path->dci_cfi_prio_sl = sl & 0xf;
}
- if (ah->ah_flags & IB_AH_GRH) {
- path->mgid_index = ah->grh.sgid_index;
- path->hop_limit = ah->grh.hop_limit;
+ if (ah_flags & IB_AH_GRH) {
+ path->mgid_index = grh->sgid_index;
+ path->hop_limit = grh->hop_limit;
path->tclass_flowlabel =
- cpu_to_be32((ah->grh.traffic_class << 20) |
- (ah->grh.flow_label));
- memcpy(path->rgid, ah->grh.dgid.raw, 16);
+ cpu_to_be32((grh->traffic_class << 20) |
+ (grh->flow_label));
+ memcpy(path->rgid, grh->dgid.raw, 16);
}
- err = ib_rate_to_mlx5(dev, ah->static_rate);
+ err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah));
if (err < 0)
return err;
path->static_rate = err;
@@ -2283,7 +2276,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
return modify_raw_packet_eth_prio(dev->mdev,
&qp->raw_packet_qp.sq,
- ah->sl & 0xf);
+ sl & 0xf);
return 0;
}
@@ -2441,7 +2434,7 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
int err;
inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -2453,7 +2446,7 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) {
if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
MLX5_SET64(modify_rq_in, in, modify_bitmask,
- MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID);
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
} else
pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n",
@@ -2486,7 +2479,7 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
int err;
inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -2808,7 +2801,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->port) - 1;
mibport = &dev->port[port_num];
context->qp_counter_set_usr_page |=
- cpu_to_be32((u32)(mibport->q_cnt_id) << 24);
+ cpu_to_be32((u32)(mibport->cnts.set_id) << 24);
}
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
@@ -2823,16 +2816,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (mlx5_st < 0)
goto out;
- /* If moving to a reset or error state, we must disable page faults on
- * this QP and flush all current page faults. Otherwise a stale page
- * fault may attempt to work on this QP after it is reset and moved
- * again to RTS, and may cause the driver and the device to get out of
- * sync. */
- if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
- (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
- (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
- mlx5_ib_qp_disable_pagefaults(qp);
-
if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
!optab[mlx5_cur][mlx5_new])
goto out;
@@ -2846,7 +2829,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
raw_qp_param.operation = op;
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id;
+ raw_qp_param.rq_q_ctr_id = mibport->cnts.set_id;
raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
}
@@ -2864,10 +2847,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (err)
goto out;
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
- (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
- mlx5_ib_qp_enable_pagefaults(qp);
-
qp->state = new_state;
if (attr_mask & IB_QP_ACCESS_FLAGS)
@@ -3029,20 +3008,20 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
if (wr->opcode == IB_WR_LSO) {
struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
- int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start);
+ int size_of_inl_hdr_start = sizeof(eseg->inline_hdr.start);
u64 left, leftlen, copysz;
void *pdata = ud_wr->header;
left = ud_wr->hlen;
eseg->mss = cpu_to_be16(ud_wr->mss);
- eseg->inline_hdr_sz = cpu_to_be16(left);
+ eseg->inline_hdr.sz = cpu_to_be16(left);
/*
* check if there is space till the end of queue, if yes,
* copy all in one shot, otherwise copy till the end of queue,
* rollback and than the copy the left
*/
- leftlen = qend - (void *)eseg->inline_hdr_start;
+ leftlen = qend - (void *)eseg->inline_hdr.start;
copysz = min_t(u64, leftlen, left);
memcpy(seg - size_of_inl_hdr_start, pdata, copysz);
@@ -3080,9 +3059,10 @@ static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
dseg->addr = cpu_to_be64(sg->addr);
}
-static __be16 get_klm_octo(int npages)
+static u64 get_xlt_octo(u64 bytes)
{
- return cpu_to_be16(ALIGN(npages, 8) / 2);
+ return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
+ MLX5_IB_UMR_OCTOWORD;
}
static __be64 frwr_mkey_mask(void)
@@ -3127,18 +3107,14 @@ static __be64 sig_mkey_mask(void)
}
static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct mlx5_ib_mr *mr)
+ struct mlx5_ib_mr *mr)
{
- int ndescs = mr->ndescs;
+ int size = mr->ndescs * mr->desc_size;
memset(umr, 0, sizeof(*umr));
- if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
- /* KLMs take twice the size of MTTs */
- ndescs *= 2;
-
umr->flags = MLX5_UMR_CHECK_NOT_FREE;
- umr->klm_octowords = get_klm_octo(ndescs);
+ umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
umr->mkey_mask = frwr_mkey_mask();
}
@@ -3149,37 +3125,17 @@ static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
umr->flags = MLX5_UMR_INLINE;
}
-static __be64 get_umr_reg_mr_mask(int atomic)
+static __be64 get_umr_enable_mr_mask(void)
{
u64 result;
- result = MLX5_MKEY_MASK_LEN |
- MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR |
- MLX5_MKEY_MASK_PD |
- MLX5_MKEY_MASK_LR |
- MLX5_MKEY_MASK_LW |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW |
+ result = MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_FREE;
- if (atomic)
- result |= MLX5_MKEY_MASK_A;
-
return cpu_to_be64(result);
}
-static __be64 get_umr_unreg_mr_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_FREE;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_mtt_mask(void)
+static __be64 get_umr_disable_mr_mask(void)
{
u64 result;
@@ -3194,23 +3150,22 @@ static __be64 get_umr_update_translation_mask(void)
result = MLX5_MKEY_MASK_LEN |
MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_FREE;
+ MLX5_MKEY_MASK_START_ADDR;
return cpu_to_be64(result);
}
-static __be64 get_umr_update_access_mask(void)
+static __be64 get_umr_update_access_mask(int atomic)
{
u64 result;
- result = MLX5_MKEY_MASK_LW |
+ result = MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW |
- MLX5_MKEY_MASK_A |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_FREE;
+ MLX5_MKEY_MASK_RW;
+
+ if (atomic)
+ result |= MLX5_MKEY_MASK_A;
return cpu_to_be64(result);
}
@@ -3219,9 +3174,7 @@ static __be64 get_umr_update_pd_mask(void)
{
u64 result;
- result = MLX5_MKEY_MASK_PD |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_FREE;
+ result = MLX5_MKEY_MASK_PD;
return cpu_to_be64(result);
}
@@ -3238,24 +3191,24 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
else
umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */
- if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
- umr->klm_octowords = get_klm_octo(umrwr->npages);
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT) {
- umr->mkey_mask = get_umr_update_mtt_mask();
- umr->bsf_octowords = get_klm_octo(umrwr->target.offset);
- umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
- }
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
- umr->mkey_mask |= get_umr_update_translation_mask();
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_ACCESS)
- umr->mkey_mask |= get_umr_update_access_mask();
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD)
- umr->mkey_mask |= get_umr_update_pd_mask();
- if (!umr->mkey_mask)
- umr->mkey_mask = get_umr_reg_mr_mask(atomic);
- } else {
- umr->mkey_mask = get_umr_unreg_mr_mask();
+ umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size));
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) {
+ u64 offset = get_xlt_octo(umrwr->offset);
+
+ umr->xlt_offset = cpu_to_be16(offset & 0xffff);
+ umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16);
+ umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
+ }
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
+ umr->mkey_mask |= get_umr_update_translation_mask();
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
+ umr->mkey_mask |= get_umr_update_access_mask(atomic);
+ umr->mkey_mask |= get_umr_update_pd_mask();
}
+ if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
+ umr->mkey_mask |= get_umr_enable_mr_mask();
+ if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
+ umr->mkey_mask |= get_umr_disable_mr_mask();
if (!wr->num_sge)
umr->flags |= MLX5_UMR_INLINE;
@@ -3303,17 +3256,17 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
struct mlx5_umr_wr *umrwr = umr_wr(wr);
memset(seg, 0, sizeof(*seg));
- if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
+ if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
seg->status = MLX5_MKEY_STATUS_FREE;
- return;
- }
seg->flags = convert_access(umrwr->access_flags);
- if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) {
- if (umrwr->pd)
- seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
- seg->start_addr = cpu_to_be64(umrwr->target.virt_addr);
- }
+ if (umrwr->pd)
+ seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
+ !umrwr->length)
+ seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64);
+
+ seg->start_addr = cpu_to_be64(umrwr->virt_addr);
seg->len = cpu_to_be64(umrwr->length);
seg->log2_page_size = umrwr->page_shift;
seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
@@ -3611,7 +3564,7 @@ static int set_sig_data_segment(struct ib_sig_handover_wr *wr,
}
static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
- struct ib_sig_handover_wr *wr, u32 nelements,
+ struct ib_sig_handover_wr *wr, u32 size,
u32 length, u32 pdn)
{
struct ib_mr *sig_mr = wr->sig_mr;
@@ -3626,17 +3579,17 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
MLX5_MKEY_BSF_EN | pdn);
seg->len = cpu_to_be64(length);
- seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements)));
+ seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size));
seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
}
static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
- u32 nelements)
+ u32 size)
{
memset(umr, 0, sizeof(*umr));
umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
- umr->klm_octowords = get_klm_octo(nelements);
+ umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
umr->mkey_mask = sig_mkey_mask();
}
@@ -3648,7 +3601,7 @@ static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp,
struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
u32 pdn = get_pd(qp)->pdn;
- u32 klm_oct_size;
+ u32 xlt_size;
int region_len, ret;
if (unlikely(wr->wr.num_sge != 1) ||
@@ -3670,15 +3623,15 @@ static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp,
* then we use strided block format (3 octowords),
* else we use single KLM (1 octoword)
**/
- klm_oct_size = wr->prot ? 3 : 1;
+ xlt_size = wr->prot ? 0x30 : sizeof(struct mlx5_klm);
- set_sig_umr_segment(*seg, klm_oct_size);
+ set_sig_umr_segment(*seg, xlt_size);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
- set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn);
+ set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
@@ -3708,8 +3661,9 @@ static int set_psv_wr(struct ib_sig_domain *domain,
psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
break;
default:
- pr_err("Bad signature type given.\n");
- return 1;
+ pr_err("Bad signature type (%d) is given.\n",
+ domain->sig_type);
+ return -EINVAL;
}
*seg += sizeof(*psv_seg);
@@ -3784,42 +3738,6 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
}
}
-static void mlx5_bf_copy(u64 __iomem *dst, u64 *src,
- unsigned bytecnt, struct mlx5_ib_qp *qp)
-{
- while (bytecnt > 0) {
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- bytecnt -= 64;
- if (unlikely(src == qp->sq.qend))
- src = mlx5_get_send_wqe(qp, 0);
- }
-}
-
-static u8 get_fence(u8 fence, struct ib_send_wr *wr)
-{
- if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
- wr->send_flags & IB_SEND_FENCE))
- return MLX5_FENCE_MODE_STRONG_ORDERING;
-
- if (unlikely(fence)) {
- if (wr->send_flags & IB_SEND_FENCE)
- return MLX5_FENCE_MODE_SMALL_AND_FENCE;
- else
- return fence;
- } else if (unlikely(wr->send_flags & IB_SEND_FENCE)) {
- return MLX5_FENCE_MODE_FENCE;
- }
-
- return 0;
-}
-
static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct mlx5_wqe_ctrl_seg **ctrl,
struct ib_send_wr *wr, unsigned *idx,
@@ -3848,8 +3766,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
static void finish_wqe(struct mlx5_ib_qp *qp,
struct mlx5_wqe_ctrl_seg *ctrl,
u8 size, unsigned idx, u64 wr_id,
- int nreq, u8 fence, u8 next_fence,
- u32 mlx5_opcode)
+ int nreq, u8 fence, u32 mlx5_opcode)
{
u8 opmod = 0;
@@ -3857,7 +3774,6 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
mlx5_opcode | ((u32)opmod << 24));
ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
ctrl->fm_ce_se |= fence;
- qp->fm_cache = next_fence;
if (unlikely(qp->wq_sig))
ctrl->signature = wq_sig(ctrl);
@@ -3897,7 +3813,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
qp = to_mqp(ibqp);
- bf = qp->bf;
+ bf = &qp->bf;
qend = qp->sq.qend;
spin_lock_irqsave(&qp->sq.lock, flags);
@@ -3917,7 +3833,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
}
- fence = qp->fm_cache;
num_sge = wr->num_sge;
if (unlikely(num_sge > qp->sq.max_gs)) {
mlx5_ib_warn(dev, "\n");
@@ -3934,6 +3849,19 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
}
+ if (wr->opcode == IB_WR_LOCAL_INV ||
+ wr->opcode == IB_WR_REG_MR) {
+ fence = dev->umr_fence;
+ next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+ } else if (wr->send_flags & IB_SEND_FENCE) {
+ if (qp->next_fence)
+ fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
+ else
+ fence = MLX5_FENCE_MODE_FENCE;
+ } else {
+ fence = qp->next_fence;
+ }
+
switch (ibqp->qp_type) {
case IB_QPT_XRC_INI:
xrc = seg;
@@ -3960,7 +3888,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
case IB_WR_LOCAL_INV:
- next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
set_linv_wr(qp, &seg, &size);
@@ -3968,7 +3895,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case IB_WR_REG_MR:
- next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
qp->sq.wr_data[idx] = IB_WR_REG_MR;
ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
@@ -3991,9 +3917,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id,
- nreq, get_fence(fence, wr),
- next_fence, MLX5_OPCODE_UMR);
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+ fence, MLX5_OPCODE_UMR);
/*
* SET_PSV WQEs are not signaled and solicited
* on error
@@ -4018,9 +3943,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id,
- nreq, get_fence(fence, wr),
- next_fence, MLX5_OPCODE_SET_PSV);
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+ fence, MLX5_OPCODE_SET_PSV);
err = begin_wqe(qp, &seg, &ctrl, wr,
&idx, &size, nreq);
if (err) {
@@ -4030,7 +3954,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
}
- next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire,
mr->sig->psv_wire.psv_idx, &seg,
&size);
@@ -4040,9 +3963,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id,
- nreq, get_fence(fence, wr),
- next_fence, MLX5_OPCODE_SET_PSV);
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+ fence, MLX5_OPCODE_SET_PSV);
+ qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
num_sge = 0;
goto skip_psv;
@@ -4067,6 +3990,12 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case IB_QPT_SMI:
+ if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) {
+ mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
+ err = -EPERM;
+ *bad_wr = wr;
+ goto out;
+ }
case MLX5_IB_QPT_HW_GSI:
set_datagram_seg(seg, wr);
seg += sizeof(struct mlx5_wqe_datagram_seg);
@@ -4147,8 +4076,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
- get_fence(fence, wr), next_fence,
+ qp->next_fence = next_fence;
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence,
mlx5_ib_opcode[wr->opcode]);
skip_psv:
if (0)
@@ -4170,28 +4099,13 @@ out:
* we hit doorbell */
wmb();
- if (bf->need_lock)
- spin_lock(&bf->lock);
- else
- __acquire(&bf->lock);
-
- /* TBD enable WC */
- if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) {
- mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp);
- /* wc_wmb(); */
- } else {
- mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset,
- MLX5_GET_DOORBELL_LOCK(&bf->lock32));
- /* Make sure doorbells don't leak out of SQ spinlock
- * and reach the HCA out of order.
- */
- mmiowb();
- }
+ /* currently we support only regular doorbells */
+ mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset, NULL);
+ /* Make sure doorbells don't leak out of SQ spinlock
+ * and reach the HCA out of order.
+ */
+ mmiowb();
bf->offset ^= bf->buf_size;
- if (bf->need_lock)
- spin_unlock(&bf->lock);
- else
- __release(&bf->lock);
}
spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -4324,33 +4238,36 @@ static int to_ib_qp_access_flags(int mlx5_flags)
return ib_flags;
}
-static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
- struct mlx5_qp_path *path)
+static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev,
+ struct rdma_ah_attr *ah_attr,
+ struct mlx5_qp_path *path)
{
struct mlx5_core_dev *dev = ibdev->mdev;
- memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
- ib_ah_attr->port_num = path->port;
+ memset(ah_attr, 0, sizeof(*ah_attr));
- if (ib_ah_attr->port_num == 0 ||
- ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
+ ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, path->port);
+ rdma_ah_set_port_num(ah_attr, path->port);
+ if (rdma_ah_get_port_num(ah_attr) == 0 ||
+ rdma_ah_get_port_num(ah_attr) > MLX5_CAP_GEN(dev, num_ports))
return;
- ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
+ rdma_ah_set_port_num(ah_attr, path->port);
+ rdma_ah_set_sl(ah_attr, path->dci_cfi_prio_sl & 0xf);
- ib_ah_attr->dlid = be16_to_cpu(path->rlid);
- ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
- ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
- ib_ah_attr->ah_flags = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0;
- if (ib_ah_attr->ah_flags) {
- ib_ah_attr->grh.sgid_index = path->mgid_index;
- ib_ah_attr->grh.hop_limit = path->hop_limit;
- ib_ah_attr->grh.traffic_class =
- (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
- ib_ah_attr->grh.flow_label =
- be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
- memcpy(ib_ah_attr->grh.dgid.raw,
- path->rgid, sizeof(ib_ah_attr->grh.dgid.raw));
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(path->rlid));
+ rdma_ah_set_path_bits(ah_attr, path->grh_mlid & 0x7f);
+ rdma_ah_set_static_rate(ah_attr,
+ path->static_rate ? path->static_rate - 5 : 0);
+ if (path->grh_mlid & (1 << 7)) {
+ u32 tc_fl = be32_to_cpu(path->tclass_flowlabel);
+
+ rdma_ah_set_grh(ah_attr, NULL,
+ tc_fl & 0xfffff,
+ path->mgid_index,
+ path->hop_limit,
+ (tc_fl >> 20) & 0xff);
+ rdma_ah_set_dgid_raw(ah_attr, path->rgid);
}
}
@@ -4364,7 +4281,7 @@ static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
int err;
inlen = MLX5_ST_SZ_BYTES(query_sq_out);
- out = mlx5_vzalloc(inlen);
+ out = kvzalloc(inlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
@@ -4391,7 +4308,7 @@ static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev,
int err;
inlen = MLX5_ST_SZ_BYTES(query_rq_out);
- out = mlx5_vzalloc(inlen);
+ out = kvzalloc(inlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
@@ -4515,11 +4432,12 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
to_ib_qp_access_flags(be32_to_cpu(context->params2));
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
- to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
- to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+ to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
+ to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
qp_attr->alt_pkey_index =
be16_to_cpu(context->alt_path.pkey_index);
- qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
+ qp_attr->alt_port_num =
+ rdma_ah_get_port_num(&qp_attr->alt_ah_attr);
}
qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index);
@@ -4559,14 +4477,6 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
qp_init_attr);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- /*
- * Wait for any outstanding page faults, in case the user frees memory
- * based upon this query's result.
- */
- flush_workqueue(mlx5_ib_page_fault_wq);
-#endif
-
mutex_lock(&qp->mutex);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
@@ -4691,6 +4601,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
struct ib_wq_init_attr *init_attr)
{
struct mlx5_ib_dev *dev;
+ int has_net_offloads;
__be64 *rq_pas0;
void *in;
void *rqc;
@@ -4701,7 +4612,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
dev = to_mdev(pd->device);
inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas;
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -4722,9 +4633,28 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size);
MLX5_SET(wq, wq, wq_signature, rwq->wq_sig);
MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
+ has_net_offloads = MLX5_CAP_GEN(dev->mdev, eth_net_offloads);
+ if (init_attr->create_flags & IB_WQ_FLAGS_CVLAN_STRIPPING) {
+ if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
+ mlx5_ib_dbg(dev, "VLAN offloads are not supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ } else {
+ MLX5_SET(rqc, rqc, vsd, 1);
+ }
+ if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS) {
+ if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, scatter_fcs))) {
+ mlx5_ib_dbg(dev, "Scatter FCS is not supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ MLX5_SET(rqc, rqc, scatter_fcs, 1);
+ }
rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
+out:
kvfree(in);
return err;
}
@@ -4912,7 +4842,7 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
return ERR_PTR(-ENOMEM);
inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err;
@@ -4991,7 +4921,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
return -EOPNOTSUPP;
inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -5008,10 +4938,38 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
MLX5_SET(rqc, rqc, state, wq_state);
+ if (wq_attr_mask & IB_WQ_FLAGS) {
+ if (wq_attr->flags_mask & IB_WQ_FLAGS_CVLAN_STRIPPING) {
+ if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
+ mlx5_ib_dbg(dev, "VLAN offloads are not "
+ "supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
+ MLX5_SET(rqc, rqc, vsd,
+ (wq_attr->flags & IB_WQ_FLAGS_CVLAN_STRIPPING) ? 0 : 1);
+ }
+ }
+
+ if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) {
+ if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
+ MLX5_SET(rqc, rqc, counter_set_id,
+ dev->port->cnts.set_id);
+ } else
+ pr_info_once("%s: Receive WQ counters are not supported on current FW\n",
+ dev->ib_dev.name);
+ }
+
err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
- kvfree(in);
if (!err)
rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
+out:
+ kvfree(in);
return err;
}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 6f4397ee1ed6..43707b101f47 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -127,7 +127,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
goto err_umem;
}
- in->pas = mlx5_vzalloc(sizeof(*in->pas) * ncont);
+ in->pas = kvzalloc(sizeof(*in->pas) * ncont, GFP_KERNEL);
if (!in->pas) {
err = -ENOMEM;
goto err_umem;
@@ -165,8 +165,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
int err;
int i;
struct mlx5_wqe_srq_next_seg *next;
- int page_shift;
- int npages;
err = mlx5_db_alloc(dev->mdev, &srq->db);
if (err) {
@@ -179,7 +177,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
err = -ENOMEM;
goto err_db;
}
- page_shift = srq->buf.page_shift;
srq->head = 0;
srq->tail = srq->msrq.max - 1;
@@ -191,10 +188,8 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
cpu_to_be16((i + 1) & (srq->msrq.max - 1));
}
- npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
- mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
- buf_size, page_shift, srq->buf.npages, npages);
- in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages);
+ mlx5_ib_dbg(dev, "srq->buf.page_shift = %d\n", srq->buf.page_shift);
+ in->pas = kvzalloc(sizeof(*in->pas) * srq->buf.npages, GFP_KERNEL);
if (!in->pas) {
err = -ENOMEM;
goto err_buf;
@@ -208,7 +203,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
}
srq->wq_sig = !!srq_signature;
- in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
in->type == IB_SRQT_XRC)
in->user_index = MLX5_IB_DEFAULT_UIDX;
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index c9f0f364f484..2aec9908c40a 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -152,7 +152,7 @@ u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port)
int mthca_create_ah(struct mthca_dev *dev,
struct mthca_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct mthca_ah *ah)
{
u32 index = -1;
@@ -196,21 +196,26 @@ on_hca_fail:
ah->key = pd->ntmr.ibmr.lkey;
- av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24));
- av->g_slid = ah_attr->src_path_bits;
- av->dlid = cpu_to_be16(ah_attr->dlid);
+ av->port_pd = cpu_to_be32(pd->pd_num |
+ (rdma_ah_get_port_num(ah_attr) << 24));
+ av->g_slid = rdma_ah_get_path_bits(ah_attr);
+ av->dlid = cpu_to_be16(rdma_ah_get_dlid(ah_attr));
av->msg_sr = (3 << 4) | /* 2K message */
- mthca_get_rate(dev, ah_attr->static_rate, ah_attr->port_num);
- av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ mthca_get_rate(dev, rdma_ah_get_static_rate(ah_attr),
+ rdma_ah_get_port_num(ah_attr));
+ av->sl_tclass_flowlabel = cpu_to_be32(rdma_ah_get_sl(ah_attr) << 28);
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+
av->g_slid |= 0x80;
- av->gid_index = (ah_attr->port_num - 1) * dev->limits.gid_table_len +
- ah_attr->grh.sgid_index;
- av->hop_limit = ah_attr->grh.hop_limit;
+ av->gid_index = (rdma_ah_get_port_num(ah_attr) - 1) *
+ dev->limits.gid_table_len +
+ grh->sgid_index;
+ av->hop_limit = grh->hop_limit;
av->sl_tclass_flowlabel |=
- cpu_to_be32((ah_attr->grh.traffic_class << 20) |
- ah_attr->grh.flow_label);
- memcpy(av->dgid, ah_attr->grh.dgid.raw, 16);
+ cpu_to_be32((grh->traffic_class << 20) |
+ grh->flow_label);
+ memcpy(av->dgid, grh->dgid.raw, 16);
} else {
/* Arbel workaround -- low byte of GID must be 2 */
av->dgid[3] = cpu_to_be32(2);
@@ -287,33 +292,35 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
return 0;
}
-int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr)
+int mthca_ah_query(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
struct mthca_ah *ah = to_mah(ibah);
struct mthca_dev *dev = to_mdev(ibah->device);
+ u8 port_num = be32_to_cpu(ah->av->port_pd) >> 24;
/* Only implement for MAD and memfree ah for now. */
if (ah->type == MTHCA_AH_ON_HCA)
return -ENOSYS;
memset(attr, 0, sizeof *attr);
- attr->dlid = be16_to_cpu(ah->av->dlid);
- attr->sl = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
- attr->port_num = be32_to_cpu(ah->av->port_pd) >> 24;
- attr->static_rate = mthca_rate_to_ib(dev, ah->av->msg_sr & 0x7,
- attr->port_num);
- attr->src_path_bits = ah->av->g_slid & 0x7F;
- attr->ah_flags = mthca_ah_grh_present(ah) ? IB_AH_GRH : 0;
-
- if (attr->ah_flags) {
- attr->grh.traffic_class =
- be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20;
- attr->grh.flow_label =
- be32_to_cpu(ah->av->sl_tclass_flowlabel) & 0xfffff;
- attr->grh.hop_limit = ah->av->hop_limit;
- attr->grh.sgid_index = ah->av->gid_index &
- (dev->limits.gid_table_len - 1);
- memcpy(attr->grh.dgid.raw, ah->av->dgid, 16);
+ attr->type = ibah->type;
+ rdma_ah_set_dlid(attr, be16_to_cpu(ah->av->dlid));
+ rdma_ah_set_sl(attr, be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28);
+ rdma_ah_set_port_num(attr, port_num);
+ rdma_ah_set_static_rate(attr,
+ mthca_rate_to_ib(dev, ah->av->msg_sr & 0x7,
+ port_num));
+ rdma_ah_set_path_bits(attr, ah->av->g_slid & 0x7F);
+ if (mthca_ah_grh_present(ah)) {
+ u32 tc_fl = be32_to_cpu(ah->av->sl_tclass_flowlabel);
+
+ rdma_ah_set_grh(attr, NULL,
+ tc_fl & 0xfffff,
+ ah->av->gid_index &
+ (dev->limits.gid_table_len - 1),
+ ah->av->hop_limit,
+ (tc_fl >> 20) & 0xff);
+ rdma_ah_set_dgid_raw(attr, ah->av->dgid);
}
return 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index c7f49bbb0c72..9d83a53c0c67 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -367,12 +367,16 @@ static int mthca_cmd_poll(struct mthca_dev *dev,
goto out;
}
- if (out_is_imm)
+ if (out_is_imm && out_param) {
*out_param =
(u64) be32_to_cpu((__force __be32)
__raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
(u64) be32_to_cpu((__force __be32)
__raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET + 4));
+ } else if (out_is_imm) {
+ err = -EINVAL;
+ goto out;
+ }
status = be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24;
if (status) {
@@ -450,8 +454,12 @@ static int mthca_cmd_wait(struct mthca_dev *dev,
err = mthca_status_to_errno(context->status);
}
- if (out_is_imm)
+ if (out_is_imm && out_param) {
*out_param = context->out_param;
+ } else if (out_is_imm) {
+ err = -EINVAL;
+ goto out;
+ }
out:
spin_lock(&dev->cmd.context_lock);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 4393a022867b..ec7da9a474cd 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -560,12 +560,12 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
int mthca_create_ah(struct mthca_dev *dev,
struct mthca_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct mthca_ah *ah);
int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah);
int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
struct ib_ud_header *header);
-int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr);
+int mthca_ah_query(struct ib_ah *ibah, struct rdma_ah_attr *attr);
int mthca_ah_grh_present(struct mthca_ah *ah);
u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port);
enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port);
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 9139405c4810..7df3db71777a 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -75,25 +75,26 @@ static void update_sm_ah(struct mthca_dev *dev,
u8 port_num, u16 lid, u8 sl)
{
struct ib_ah *new_ah;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
if (!dev->send_agent[port_num - 1][0])
return;
memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = lid;
- ah_attr.sl = sl;
- ah_attr.port_num = port_num;
+ ah_attr.type = rdma_ah_find_type(&dev->ib_dev, port_num);
+ rdma_ah_set_dlid(&ah_attr, lid);
+ rdma_ah_set_sl(&ah_attr, sl);
+ rdma_ah_set_port_num(&ah_attr, port_num);
- new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
- &ah_attr);
+ new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
+ &ah_attr);
if (IS_ERR(new_ah))
return;
spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
- ib_destroy_ah(dev->sm_ah[port_num - 1]);
+ rdma_destroy_ah(dev->sm_ah[port_num - 1]);
dev->sm_ah[port_num - 1] = new_ah;
spin_unlock_irqrestore(&dev->sm_lock, flags);
}
@@ -345,6 +346,6 @@ void mthca_free_agents(struct mthca_dev *dev)
}
if (dev->sm_ah[p])
- ib_destroy_ah(dev->sm_ah[p]);
+ rdma_destroy_ah(dev->sm_ah[p]);
}
}
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index ded76c101dde..c309e5c96383 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -851,20 +851,18 @@ err_uar_table_free:
static int mthca_enable_msi_x(struct mthca_dev *mdev)
{
- struct msix_entry entries[3];
int err;
- entries[0].entry = 0;
- entries[1].entry = 1;
- entries[2].entry = 2;
-
- err = pci_enable_msix_exact(mdev->pdev, entries, ARRAY_SIZE(entries));
- if (err)
+ err = pci_alloc_irq_vectors(mdev->pdev, 3, 3, PCI_IRQ_MSIX);
+ if (err < 0)
return err;
- mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector;
- mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector;
- mdev->eq_table.eq[MTHCA_EQ_CMD ].msi_x_vector = entries[2].vector;
+ mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector =
+ pci_irq_vector(mdev->pdev, 0);
+ mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector =
+ pci_irq_vector(mdev->pdev, 1);
+ mdev->eq_table.eq[MTHCA_EQ_CMD ].msi_x_vector =
+ pci_irq_vector(mdev->pdev, 2);
return 0;
}
@@ -1018,7 +1016,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
err = mthca_setup_hca(mdev);
if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) {
if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
- pci_disable_msix(pdev);
+ pci_free_irq_vectors(pdev);
mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X;
err = mthca_setup_hca(mdev);
@@ -1062,7 +1060,7 @@ err_cleanup:
err_close:
if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
- pci_disable_msix(pdev);
+ pci_free_irq_vectors(pdev);
mthca_close_hca(mdev);
@@ -1113,7 +1111,7 @@ static void __mthca_remove_one(struct pci_dev *pdev)
mthca_cmd_cleanup(mdev);
if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
- pci_disable_msix(pdev);
+ pci_free_irq_vectors(pdev);
ib_dealloc_device(&mdev->ib_dev);
pci_release_regions(pdev);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index d31708742ba5..c197cd9b193f 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -146,7 +146,7 @@ static int mthca_query_port(struct ib_device *ibdev,
if (!in_mad || !out_mad)
goto out;
- memset(props, 0, sizeof *props);
+ /* props being zeroed by the caller, avoid zeroing it here */
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
@@ -212,7 +212,7 @@ static int mthca_modify_port(struct ib_device *ibdev,
if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
return -ERESTARTSYS;
- err = mthca_query_port(ibdev, port, &attr);
+ err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
@@ -410,7 +410,7 @@ static int mthca_dealloc_pd(struct ib_pd *pd)
}
static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
@@ -937,7 +937,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err;
}
- shift = ffs(mr->umem->page_size) - 1;
+ shift = mr->umem->page_shift;
n = mr->umem->nmap;
mr->mtt = mthca_alloc_mtt(dev, n);
@@ -959,8 +959,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(sg) +
- mr->umem->page_size * k;
+ pages[i++] = sg_dma_address(sg) + (k << shift);
/*
* Be friendly to write_mtt and pass it chunks
* of appropriate size.
@@ -1166,13 +1165,14 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = mthca_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
@@ -1223,7 +1223,7 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
dev->ib_dev.num_comp_vectors = 1;
- dev->ib_dev.dma_device = &dev->pdev->dev;
+ dev->ib_dev.dev.parent = &dev->pdev->dev;
dev->ib_dev.query_device = mthca_query_device;
dev->ib_dev.query_port = mthca_query_port;
dev->ib_dev.modify_device = mthca_modify_device;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 96e5fb91fb48..d21960cd9a49 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -393,31 +393,36 @@ static int to_ib_qp_access_flags(int mthca_flags)
return ib_flags;
}
-static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
- struct mthca_qp_path *path)
+static void to_rdma_ah_attr(struct mthca_dev *dev,
+ struct rdma_ah_attr *ah_attr,
+ struct mthca_qp_path *path)
{
- memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
- ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
+ u8 port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
- if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports)
- return;
+ memset(ah_attr, 0, sizeof(*ah_attr));
- ib_ah_attr->dlid = be16_to_cpu(path->rlid);
- ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
- ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
- ib_ah_attr->static_rate = mthca_rate_to_ib(dev,
- path->static_rate & 0xf,
- ib_ah_attr->port_num);
- ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
- if (ib_ah_attr->ah_flags) {
- ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1);
- ib_ah_attr->grh.hop_limit = path->hop_limit;
- ib_ah_attr->grh.traffic_class =
- (be32_to_cpu(path->sl_tclass_flowlabel) >> 20) & 0xff;
- ib_ah_attr->grh.flow_label =
- be32_to_cpu(path->sl_tclass_flowlabel) & 0xfffff;
- memcpy(ib_ah_attr->grh.dgid.raw,
- path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
+ if (port_num == 0 || port_num > dev->limits.num_ports)
+ return;
+ ah_attr->type = rdma_ah_find_type(&dev->ib_dev, port_num);
+ rdma_ah_set_port_num(ah_attr, port_num);
+
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(path->rlid));
+ rdma_ah_set_sl(ah_attr, be32_to_cpu(path->sl_tclass_flowlabel) >> 28);
+ rdma_ah_set_path_bits(ah_attr, path->g_mylmc & 0x7f);
+ rdma_ah_set_static_rate(ah_attr,
+ mthca_rate_to_ib(dev,
+ path->static_rate & 0xf,
+ port_num));
+ if (path->g_mylmc & (1 << 7)) {
+ u32 tc_fl = be32_to_cpu(path->sl_tclass_flowlabel);
+
+ rdma_ah_set_grh(ah_attr, NULL,
+ tc_fl & 0xfffff,
+ path->mgid_index &
+ (dev->limits.gid_table_len - 1),
+ path->hop_limit,
+ (tc_fl >> 20) & 0xff);
+ rdma_ah_set_dgid_raw(ah_attr, path->rgid);
}
}
@@ -468,11 +473,12 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
to_ib_qp_access_flags(be32_to_cpu(context->params2));
if (qp->transport == RC || qp->transport == UC) {
- to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
- to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+ to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
+ to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
qp_attr->alt_pkey_index =
be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
- qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
+ qp_attr->alt_port_num =
+ rdma_ah_get_port_num(&qp_attr->alt_ah_attr);
}
qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
@@ -512,30 +518,36 @@ out:
return err;
}
-static int mthca_path_set(struct mthca_dev *dev, const struct ib_ah_attr *ah,
+static int mthca_path_set(struct mthca_dev *dev, const struct rdma_ah_attr *ah,
struct mthca_qp_path *path, u8 port)
{
- path->g_mylmc = ah->src_path_bits & 0x7f;
- path->rlid = cpu_to_be16(ah->dlid);
- path->static_rate = mthca_get_rate(dev, ah->static_rate, port);
+ path->g_mylmc = rdma_ah_get_path_bits(ah) & 0x7f;
+ path->rlid = cpu_to_be16(rdma_ah_get_dlid(ah));
+ path->static_rate = mthca_get_rate(dev, rdma_ah_get_static_rate(ah),
+ port);
+
+ if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah);
- if (ah->ah_flags & IB_AH_GRH) {
- if (ah->grh.sgid_index >= dev->limits.gid_table_len) {
+ if (grh->sgid_index >= dev->limits.gid_table_len) {
mthca_dbg(dev, "sgid_index (%u) too large. max is %d\n",
- ah->grh.sgid_index, dev->limits.gid_table_len-1);
+ grh->sgid_index,
+ dev->limits.gid_table_len - 1);
return -1;
}
path->g_mylmc |= 1 << 7;
- path->mgid_index = ah->grh.sgid_index;
- path->hop_limit = ah->grh.hop_limit;
+ path->mgid_index = grh->sgid_index;
+ path->hop_limit = grh->hop_limit;
path->sl_tclass_flowlabel =
- cpu_to_be32((ah->sl << 28) |
- (ah->grh.traffic_class << 20) |
- (ah->grh.flow_label));
- memcpy(path->rgid, ah->grh.dgid.raw, 16);
- } else
- path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28);
+ cpu_to_be32((rdma_ah_get_sl(ah) << 28) |
+ (grh->traffic_class << 20) |
+ (grh->flow_label));
+ memcpy(path->rgid, grh->dgid.raw, 16);
+ } else {
+ path->sl_tclass_flowlabel = cpu_to_be32(rdma_ah_get_sl(ah) <<
+ 28);
+ }
return 0;
}
@@ -680,7 +692,7 @@ static int __mthca_modify_qp(struct ib_qp *ibqp,
}
if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path,
- attr->alt_ah_attr.port_num))
+ rdma_ah_get_port_num(&attr->alt_ah_attr)))
goto out_mailbox;
qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 5b9601014f0c..a30aa6527f7e 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -815,7 +815,7 @@ static struct pci_driver nes_pci_driver = {
.remove = nes_remove,
};
-static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf)
+static ssize_t adapter_show(struct device_driver *ddp, char *buf)
{
unsigned int devfn = 0xffffffff;
unsigned char bus_number = 0xff;
@@ -834,7 +834,7 @@ static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf)
return snprintf(buf, PAGE_SIZE, "%x:%x\n", bus_number, devfn);
}
-static ssize_t nes_store_adapter(struct device_driver *ddp,
+static ssize_t adapter_store(struct device_driver *ddp,
const char *buf, size_t count)
{
char *p = (char *)buf;
@@ -843,7 +843,7 @@ static ssize_t nes_store_adapter(struct device_driver *ddp,
return strnlen(buf, count);
}
-static ssize_t nes_show_ee_cmd(struct device_driver *ddp, char *buf)
+static ssize_t eeprom_cmd_show(struct device_driver *ddp, char *buf)
{
u32 eeprom_cmd = 0xdead;
u32 i = 0;
@@ -859,7 +859,7 @@ static ssize_t nes_show_ee_cmd(struct device_driver *ddp, char *buf)
return snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_cmd);
}
-static ssize_t nes_store_ee_cmd(struct device_driver *ddp,
+static ssize_t eeprom_cmd_store(struct device_driver *ddp,
const char *buf, size_t count)
{
char *p = (char *)buf;
@@ -880,7 +880,7 @@ static ssize_t nes_store_ee_cmd(struct device_driver *ddp,
return strnlen(buf, count);
}
-static ssize_t nes_show_ee_data(struct device_driver *ddp, char *buf)
+static ssize_t eeprom_data_show(struct device_driver *ddp, char *buf)
{
u32 eeprom_data = 0xdead;
u32 i = 0;
@@ -897,7 +897,7 @@ static ssize_t nes_show_ee_data(struct device_driver *ddp, char *buf)
return snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_data);
}
-static ssize_t nes_store_ee_data(struct device_driver *ddp,
+static ssize_t eeprom_data_store(struct device_driver *ddp,
const char *buf, size_t count)
{
char *p = (char *)buf;
@@ -918,7 +918,7 @@ static ssize_t nes_store_ee_data(struct device_driver *ddp,
return strnlen(buf, count);
}
-static ssize_t nes_show_flash_cmd(struct device_driver *ddp, char *buf)
+static ssize_t flash_cmd_show(struct device_driver *ddp, char *buf)
{
u32 flash_cmd = 0xdead;
u32 i = 0;
@@ -935,7 +935,7 @@ static ssize_t nes_show_flash_cmd(struct device_driver *ddp, char *buf)
return snprintf(buf, PAGE_SIZE, "0x%x\n", flash_cmd);
}
-static ssize_t nes_store_flash_cmd(struct device_driver *ddp,
+static ssize_t flash_cmd_store(struct device_driver *ddp,
const char *buf, size_t count)
{
char *p = (char *)buf;
@@ -956,7 +956,7 @@ static ssize_t nes_store_flash_cmd(struct device_driver *ddp,
return strnlen(buf, count);
}
-static ssize_t nes_show_flash_data(struct device_driver *ddp, char *buf)
+static ssize_t flash_data_show(struct device_driver *ddp, char *buf)
{
u32 flash_data = 0xdead;
u32 i = 0;
@@ -973,7 +973,7 @@ static ssize_t nes_show_flash_data(struct device_driver *ddp, char *buf)
return snprintf(buf, PAGE_SIZE, "0x%x\n", flash_data);
}
-static ssize_t nes_store_flash_data(struct device_driver *ddp,
+static ssize_t flash_data_store(struct device_driver *ddp,
const char *buf, size_t count)
{
char *p = (char *)buf;
@@ -994,12 +994,12 @@ static ssize_t nes_store_flash_data(struct device_driver *ddp,
return strnlen(buf, count);
}
-static ssize_t nes_show_nonidx_addr(struct device_driver *ddp, char *buf)
+static ssize_t nonidx_addr_show(struct device_driver *ddp, char *buf)
{
return snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_nonidx_addr);
}
-static ssize_t nes_store_nonidx_addr(struct device_driver *ddp,
+static ssize_t nonidx_addr_store(struct device_driver *ddp,
const char *buf, size_t count)
{
char *p = (char *)buf;
@@ -1010,7 +1010,7 @@ static ssize_t nes_store_nonidx_addr(struct device_driver *ddp,
return strnlen(buf, count);
}
-static ssize_t nes_show_nonidx_data(struct device_driver *ddp, char *buf)
+static ssize_t nonidx_data_show(struct device_driver *ddp, char *buf)
{
u32 nonidx_data = 0xdead;
u32 i = 0;
@@ -1027,7 +1027,7 @@ static ssize_t nes_show_nonidx_data(struct device_driver *ddp, char *buf)
return snprintf(buf, PAGE_SIZE, "0x%x\n", nonidx_data);
}
-static ssize_t nes_store_nonidx_data(struct device_driver *ddp,
+static ssize_t nonidx_data_store(struct device_driver *ddp,
const char *buf, size_t count)
{
char *p = (char *)buf;
@@ -1048,12 +1048,12 @@ static ssize_t nes_store_nonidx_data(struct device_driver *ddp,
return strnlen(buf, count);
}
-static ssize_t nes_show_idx_addr(struct device_driver *ddp, char *buf)
+static ssize_t idx_addr_show(struct device_driver *ddp, char *buf)
{
return snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_idx_addr);
}
-static ssize_t nes_store_idx_addr(struct device_driver *ddp,
+static ssize_t idx_addr_store(struct device_driver *ddp,
const char *buf, size_t count)
{
char *p = (char *)buf;
@@ -1064,7 +1064,7 @@ static ssize_t nes_store_idx_addr(struct device_driver *ddp,
return strnlen(buf, count);
}
-static ssize_t nes_show_idx_data(struct device_driver *ddp, char *buf)
+static ssize_t idx_data_show(struct device_driver *ddp, char *buf)
{
u32 idx_data = 0xdead;
u32 i = 0;
@@ -1081,7 +1081,7 @@ static ssize_t nes_show_idx_data(struct device_driver *ddp, char *buf)
return snprintf(buf, PAGE_SIZE, "0x%x\n", idx_data);
}
-static ssize_t nes_store_idx_data(struct device_driver *ddp,
+static ssize_t idx_data_store(struct device_driver *ddp,
const char *buf, size_t count)
{
char *p = (char *)buf;
@@ -1102,11 +1102,7 @@ static ssize_t nes_store_idx_data(struct device_driver *ddp,
return strnlen(buf, count);
}
-
-/**
- * nes_show_wqm_quanta
- */
-static ssize_t nes_show_wqm_quanta(struct device_driver *ddp, char *buf)
+static ssize_t wqm_quanta_show(struct device_driver *ddp, char *buf)
{
u32 wqm_quanta_value = 0xdead;
u32 i = 0;
@@ -1123,12 +1119,8 @@ static ssize_t nes_show_wqm_quanta(struct device_driver *ddp, char *buf)
return snprintf(buf, PAGE_SIZE, "0x%X\n", wqm_quanta_value);
}
-
-/**
- * nes_store_wqm_quanta
- */
-static ssize_t nes_store_wqm_quanta(struct device_driver *ddp,
- const char *buf, size_t count)
+static ssize_t wqm_quanta_store(struct device_driver *ddp, const char *buf,
+ size_t count)
{
unsigned long wqm_quanta_value;
u32 wqm_config1;
@@ -1153,26 +1145,16 @@ static ssize_t nes_store_wqm_quanta(struct device_driver *ddp,
return strnlen(buf, count);
}
-static DRIVER_ATTR(adapter, S_IRUSR | S_IWUSR,
- nes_show_adapter, nes_store_adapter);
-static DRIVER_ATTR(eeprom_cmd, S_IRUSR | S_IWUSR,
- nes_show_ee_cmd, nes_store_ee_cmd);
-static DRIVER_ATTR(eeprom_data, S_IRUSR | S_IWUSR,
- nes_show_ee_data, nes_store_ee_data);
-static DRIVER_ATTR(flash_cmd, S_IRUSR | S_IWUSR,
- nes_show_flash_cmd, nes_store_flash_cmd);
-static DRIVER_ATTR(flash_data, S_IRUSR | S_IWUSR,
- nes_show_flash_data, nes_store_flash_data);
-static DRIVER_ATTR(nonidx_addr, S_IRUSR | S_IWUSR,
- nes_show_nonidx_addr, nes_store_nonidx_addr);
-static DRIVER_ATTR(nonidx_data, S_IRUSR | S_IWUSR,
- nes_show_nonidx_data, nes_store_nonidx_data);
-static DRIVER_ATTR(idx_addr, S_IRUSR | S_IWUSR,
- nes_show_idx_addr, nes_store_idx_addr);
-static DRIVER_ATTR(idx_data, S_IRUSR | S_IWUSR,
- nes_show_idx_data, nes_store_idx_data);
-static DRIVER_ATTR(wqm_quanta, S_IRUSR | S_IWUSR,
- nes_show_wqm_quanta, nes_store_wqm_quanta);
+static DRIVER_ATTR_RW(adapter);
+static DRIVER_ATTR_RW(eeprom_cmd);
+static DRIVER_ATTR_RW(eeprom_data);
+static DRIVER_ATTR_RW(flash_cmd);
+static DRIVER_ATTR_RW(flash_data);
+static DRIVER_ATTR_RW(nonidx_addr);
+static DRIVER_ATTR_RW(nonidx_data);
+static DRIVER_ATTR_RW(idx_addr);
+static DRIVER_ATTR_RW(idx_data);
+static DRIVER_ATTR_RW(wqm_quanta);
static int nes_create_driver_sysfs(struct pci_driver *drv)
{
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 85acd0843b50..3f9e56e8b379 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -36,6 +36,7 @@
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
+#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/kernel.h>
#include <linux/delay.h>
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 8e703479e7ce..de4025deaa4a 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -135,17 +135,17 @@ static void record_ird_ord(struct nes_cm_node *, u16, u16);
/* instance of function pointers for client API */
/* set address of this instance to cm_core->cm_ops at cm_core alloc */
static const struct nes_cm_ops nes_cm_api = {
- mini_cm_accelerated,
- mini_cm_listen,
- mini_cm_del_listen,
- mini_cm_connect,
- mini_cm_close,
- mini_cm_accept,
- mini_cm_reject,
- mini_cm_recv_pkt,
- mini_cm_dealloc_core,
- mini_cm_get,
- mini_cm_set
+ .accelerated = mini_cm_accelerated,
+ .listen = mini_cm_listen,
+ .stop_listener = mini_cm_del_listen,
+ .connect = mini_cm_connect,
+ .close = mini_cm_close,
+ .accept = mini_cm_accept,
+ .reject = mini_cm_reject,
+ .recv_pkt = mini_cm_recv_pkt,
+ .destroy_cm_core = mini_cm_dealloc_core,
+ .get = mini_cm_get,
+ .set = mini_cm_set
};
static struct nes_cm_core *g_cm_core;
@@ -610,7 +610,6 @@ static void build_mpa_v2(struct nes_cm_node *cm_node,
ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD;
}
ctrl_ird |= IETF_PEER_TO_PEER;
- ctrl_ird |= IETF_FLPDU_ZERO_LEN;
switch (mpa_key) {
case MPA_KEY_REQUEST:
@@ -743,7 +742,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
if (type == NES_TIMER_TYPE_SEND) {
new_send->seq_num = ntohl(tcp_hdr(skb)->seq);
- atomic_inc(&new_send->skb->users);
+ refcount_inc(&new_send->skb->users);
spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
cm_node->send_entry = new_send;
add_ref_cm_node(cm_node);
@@ -925,7 +924,7 @@ static void nes_cm_timer_tick(unsigned long pass)
flags);
break;
}
- atomic_inc(&send_entry->skb->users);
+ refcount_inc(&send_entry->skb->users);
cm_packets_retrans++;
nes_debug(NES_DBG_CM, "Retransmitting send_entry %p "
"for node %p, jiffies = %lu, time to send = "
@@ -1826,7 +1825,7 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb)
type = NES_CM_EVENT_CONNECTED;
cm_node->state = NES_CM_STATE_TSA;
}
-
+ send_ack(cm_node, NULL);
break;
default:
WARN_ON(1);
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 19acd13c6cb1..b0adf65e4bdb 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -551,7 +551,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
if ((0x0F000100 == (pcs_control_status0 & 0x0F000100))
|| (0x0F000100 == (pcs_control_status1 & 0x0F000100)))
int_cnt++;
- msleep(1);
+ usleep_range(1000, 2000);
}
if (int_cnt > 1) {
spin_lock_irqsave(&nesadapter->phy_lock, flags);
@@ -592,7 +592,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
break;
}
}
- msleep(1);
+ usleep_range(1000, 2000);
}
}
}
@@ -1849,9 +1849,8 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
wqe_count -= counter;
nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter << 24) | nesvnic->nic.qp_id);
} while (wqe_count);
- init_timer(&nesvnic->rq_wqes_timer);
- nesvnic->rq_wqes_timer.function = nes_rq_wqes_timeout;
- nesvnic->rq_wqes_timer.data = (unsigned long)nesvnic;
+ setup_timer(&nesvnic->rq_wqes_timer, nes_rq_wqes_timeout,
+ (unsigned long)nesvnic);
nes_debug(NES_DBG_INIT, "NAPI support Enabled\n");
if (nesdev->nesadapter->et_use_adaptive_rx_coalesce)
{
@@ -3055,7 +3054,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
barrier();
- opcode = cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX];
+ opcode = le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]);
if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT)
ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX;
else
diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c
index 33624f17c347..77226cf4ea02 100644
--- a/drivers/infiniband/hw/nes/nes_mgt.c
+++ b/drivers/infiniband/hw/nes/nes_mgt.c
@@ -1040,9 +1040,8 @@ int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct
mgtvnic->mgt.rx_skb[counter] = skb;
}
- init_timer(&mgtvnic->rq_wqes_timer);
- mgtvnic->rq_wqes_timer.function = nes_mgt_rq_wqes_timeout;
- mgtvnic->rq_wqes_timer.data = (unsigned long)mgtvnic;
+ setup_timer(&mgtvnic->rq_wqes_timer, nes_mgt_rq_wqes_timeout,
+ (unsigned long)mgtvnic);
wqe_count = NES_MGT_WQ_COUNT - 1;
mgtvnic->mgt.rq_head = wqe_count;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 5a31f3c6a421..25dcd7573df9 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -475,7 +475,7 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr
struct nes_vnic *nesvnic = to_nesvnic(ibdev);
struct net_device *netdev = nesvnic->netdev;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -761,7 +761,8 @@ static int nes_dealloc_pd(struct ib_pd *ibpd)
/**
* nes_create_ah
*/
-static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+static struct ib_ah *nes_create_ah(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
return ERR_PTR(-ENOSYS);
@@ -1308,9 +1309,8 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
init_completion(&nesqp->rq_drained);
nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
- init_timer(&nesqp->terminate_timer);
- nesqp->terminate_timer.function = nes_terminate_timeout;
- nesqp->terminate_timer.data = (unsigned long)nesqp;
+ setup_timer(&nesqp->terminate_timer, nes_terminate_timeout,
+ (unsigned long)nesqp);
/* update the QP table */
nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
@@ -2165,9 +2165,9 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u,"
- " offset = %u, page size = %u.\n",
+ " offset = %u, page size = %lu.\n",
(unsigned long int)start, (unsigned long int)virt, (u32)length,
- ib_umem_offset(region), region->page_size);
+ ib_umem_offset(region), BIT(region->page_shift));
skip_pages = ((u32)ib_umem_offset(region)) >> 12;
@@ -3660,13 +3660,14 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
err = nes_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
@@ -3730,7 +3731,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
nesibdev->ibdev.phys_port_cnt = 1;
nesibdev->ibdev.num_comp_vectors = 1;
- nesibdev->ibdev.dma_device = &nesdev->pcidev->dev;
nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev;
nesibdev->ibdev.query_device = nes_query_device;
nesibdev->ibdev.query_port = nes_query_port;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 45bdfa0e3b2b..7baedc74e39d 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -527,17 +527,17 @@ static inline int is_cqe_wr_imm(struct ocrdma_cqe *cqe)
}
static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev,
- struct ib_ah_attr *ah_attr, u8 *mac_addr)
+ struct rdma_ah_attr *ah_attr, u8 *mac_addr)
{
struct in6_addr in6;
- memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+ memcpy(&in6, rdma_ah_read_grh(ah_attr)->dgid.raw, sizeof(in6));
if (rdma_is_multicast_addr(&in6))
rdma_get_mcast_mac(&in6, mac_addr);
else if (rdma_link_local_addr(&in6))
rdma_get_ll_mac(&in6, mac_addr);
else
- memcpy(mac_addr, ah_attr->dmac, ETH_ALEN);
+ memcpy(mac_addr, ah_attr->roce.dmac, ETH_ALEN);
return 0;
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 14d33b0f3950..d0249e463338 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -59,7 +59,7 @@ static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type)
{
switch (hdr_type) {
case OCRDMA_L3_TYPE_IB_GRH:
- return (u16)0x8915;
+ return (u16)ETH_P_IBOE;
case OCRDMA_L3_TYPE_IPV4:
return (u16)0x0800;
case OCRDMA_L3_TYPE_IPV6:
@@ -71,7 +71,7 @@ static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type)
}
static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
- struct ib_ah_attr *attr, union ib_gid *sgid,
+ struct rdma_ah_attr *attr, union ib_gid *sgid,
int pdid, bool *isvlan, u16 vlan_tag)
{
int status;
@@ -81,6 +81,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
u16 proto_num = 0;
u8 nxthdr = 0x11;
struct iphdr ipv4;
+ const struct ib_global_route *ib_grh;
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
@@ -94,7 +95,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
proto_num = ocrdma_hdr_type_to_proto_num(dev->id, ah->hdr_type);
if (!proto_num)
return -EINVAL;
- nxthdr = (proto_num == 0x8915) ? 0x1b : 0x11;
+ nxthdr = (proto_num == ETH_P_IBOE) ? 0x1b : 0x11;
/* VLAN */
if (!vlan_tag || (vlan_tag > 0xFFF))
vlan_tag = dev->pvid;
@@ -120,32 +121,33 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
status = ocrdma_resolve_dmac(dev, attr, &eth.dmac[0]);
if (status)
return status;
- ah->sgid_index = attr->grh.sgid_index;
+ ib_grh = rdma_ah_read_grh(attr);
+ ah->sgid_index = ib_grh->sgid_index;
/* Eth HDR */
memcpy(&ah->av->eth_hdr, &eth, eth_sz);
if (ah->hdr_type == RDMA_NETWORK_IPV4) {
*((__be16 *)&ipv4) = htons((4 << 12) | (5 << 8) |
- attr->grh.traffic_class);
+ ib_grh->traffic_class);
ipv4.id = cpu_to_be16(pdid);
ipv4.frag_off = htons(IP_DF);
ipv4.tot_len = htons(0);
- ipv4.ttl = attr->grh.hop_limit;
+ ipv4.ttl = ib_grh->hop_limit;
ipv4.protocol = nxthdr;
rdma_gid2ip(&sgid_addr._sockaddr, sgid);
ipv4.saddr = sgid_addr._sockaddr_in.sin_addr.s_addr;
- rdma_gid2ip(&dgid_addr._sockaddr, &attr->grh.dgid);
+ rdma_gid2ip(&dgid_addr._sockaddr, &ib_grh->dgid);
ipv4.daddr = dgid_addr._sockaddr_in.sin_addr.s_addr;
memcpy((u8 *)ah->av + eth_sz, &ipv4, sizeof(struct iphdr));
} else {
memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid));
grh.tclass_flow = cpu_to_be32((6 << 28) |
- (attr->grh.traffic_class << 24) |
- attr->grh.flow_label);
- memcpy(&grh.dgid[0], attr->grh.dgid.raw,
- sizeof(attr->grh.dgid.raw));
+ (ib_grh->traffic_class << 24) |
+ ib_grh->flow_label);
+ memcpy(&grh.dgid[0], ib_grh->dgid.raw,
+ sizeof(ib_grh->dgid.raw));
grh.pdid_hoplimit = cpu_to_be32((pdid << 16) |
(nxthdr << 8) |
- attr->grh.hop_limit);
+ ib_grh->hop_limit);
memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
}
if (*isvlan)
@@ -154,7 +156,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
return status;
}
-struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
struct ib_udata *udata)
{
u32 *ahid_addr;
@@ -165,11 +167,14 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
struct ib_gid_attr sgid_attr;
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+ const struct ib_global_route *grh;
union ib_gid sgid;
- if (!(attr->ah_flags & IB_AH_GRH))
+ if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) ||
+ !(rdma_ah_get_ah_flags(attr) & IB_AH_GRH))
return ERR_PTR(-EINVAL);
+ grh = rdma_ah_read_grh(attr);
if (atomic_cmpxchg(&dev->update_sl, 1, 0))
ocrdma_init_service_level(dev);
@@ -181,7 +186,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
if (status)
goto av_err;
- status = ib_get_cached_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid,
+ status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, &sgid,
&sgid_attr);
if (status) {
pr_err("%s(): Failed to query sgid, status = %d\n",
@@ -197,10 +202,11 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
if ((pd->uctx) &&
- (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
- (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) {
- status = rdma_addr_find_l2_eth_by_grh(&sgid, &attr->grh.dgid,
- attr->dmac, &vlan_tag,
+ (!rdma_is_multicast_addr((struct in6_addr *)grh->dgid.raw)) &&
+ (!rdma_link_local_addr((struct in6_addr *)grh->dgid.raw))) {
+ status = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+ attr->roce.dmac,
+ &vlan_tag,
&sgid_attr.ndev->ifindex,
NULL);
if (status) {
@@ -216,7 +222,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
/* if pd is for the user process, pass the ah_id to user space */
if ((pd->uctx) && (pd->uctx->ah_tbl.va)) {
- ahid_addr = pd->uctx->ah_tbl.va + attr->dlid;
+ ahid_addr = pd->uctx->ah_tbl.va + rdma_ah_get_dlid(attr);
*ahid_addr = 0;
*ahid_addr |= ah->id & OCRDMA_AH_ID_MASK;
if (ocrdma_is_udp_encap_supported(dev)) {
@@ -248,30 +254,32 @@ int ocrdma_destroy_ah(struct ib_ah *ibah)
return 0;
}
-int ocrdma_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
+int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
struct ocrdma_av *av = ah->av;
struct ocrdma_grh *grh;
- attr->ah_flags |= IB_AH_GRH;
+
+ attr->type = ibah->type;
if (ah->av->valid & OCRDMA_AV_VALID) {
grh = (struct ocrdma_grh *)((u8 *)ah->av +
sizeof(struct ocrdma_eth_vlan));
- attr->sl = be16_to_cpu(av->eth_hdr.vlan_tag) >> 13;
+ rdma_ah_set_sl(attr, be16_to_cpu(av->eth_hdr.vlan_tag) >> 13);
} else {
grh = (struct ocrdma_grh *)((u8 *)ah->av +
sizeof(struct ocrdma_eth_basic));
- attr->sl = 0;
+ rdma_ah_set_sl(attr, 0);
}
- memcpy(&attr->grh.dgid.raw[0], &grh->dgid[0], sizeof(grh->dgid));
- attr->grh.sgid_index = ah->sgid_index;
- attr->grh.hop_limit = be32_to_cpu(grh->pdid_hoplimit) & 0xff;
- attr->grh.traffic_class = be32_to_cpu(grh->tclass_flow) >> 24;
- attr->grh.flow_label = be32_to_cpu(grh->tclass_flow) & 0x00ffffffff;
+ rdma_ah_set_grh(attr, NULL,
+ be32_to_cpu(grh->tclass_flow) & 0xffffffff,
+ ah->sgid_index,
+ be32_to_cpu(grh->pdid_hoplimit) & 0xff,
+ be32_to_cpu(grh->tclass_flow) >> 24);
+ rdma_ah_set_dgid_raw(attr, &grh->dgid[0]);
return 0;
}
-int ocrdma_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
+int ocrdma_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
/* modify_ah is unsupported */
return -ENOSYS;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index 0704a24b17c8..1a65c47945aa 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -51,11 +51,11 @@ enum {
OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */
};
-struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *,
- struct ib_udata *);
-int ocrdma_destroy_ah(struct ib_ah *);
-int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *);
-int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *);
+struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
+ struct ib_udata *udata);
+int ocrdma_destroy_ah(struct ib_ah *ah);
+int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+int ocrdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int ocrdma_process_mad(struct ib_device *,
int process_mad_flags,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 9a305201545e..dcb5942f9fb5 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -44,6 +44,7 @@
#include <linux/interrupt.h>
#include <linux/log2.h>
#include <linux/dma-mapping.h>
+#include <linux/if_ether.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
@@ -2498,7 +2499,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
int attr_mask)
{
int status;
- struct ib_ah_attr *ah_attr = &attrs->ah_attr;
+ struct rdma_ah_attr *ah_attr = &attrs->ah_attr;
union ib_gid sgid, zgid;
struct ib_gid_attr sgid_attr;
u32 vlan_id = 0xFFFF;
@@ -2509,25 +2510,28 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
+ const struct ib_global_route *grh;
- if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
+ if ((rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) == 0)
return -EINVAL;
+ grh = rdma_ah_read_grh(ah_attr);
if (atomic_cmpxchg(&dev->update_sl, 1, 0))
ocrdma_init_service_level(dev);
cmd->params.tclass_sq_psn |=
- (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
+ (grh->traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
cmd->params.rnt_rc_sl_fl |=
- (ah_attr->grh.flow_label & OCRDMA_QP_PARAMS_FLOW_LABEL_MASK);
- cmd->params.rnt_rc_sl_fl |= (ah_attr->sl << OCRDMA_QP_PARAMS_SL_SHIFT);
+ (grh->flow_label & OCRDMA_QP_PARAMS_FLOW_LABEL_MASK);
+ cmd->params.rnt_rc_sl_fl |= (rdma_ah_get_sl(ah_attr) <<
+ OCRDMA_QP_PARAMS_SL_SHIFT);
cmd->params.hop_lmt_rq_psn |=
- (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT);
+ (grh->hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT);
cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
/* GIDs */
- memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
+ memcpy(&cmd->params.dgid[0], &grh->dgid.raw[0],
sizeof(cmd->params.dgid));
- status = ib_get_cached_gid(&dev->ibdev, 1, ah_attr->grh.sgid_index,
+ status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index,
&sgid, &sgid_attr);
if (!status && sgid_attr.ndev) {
vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
@@ -2539,7 +2543,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
if (!memcmp(&sgid, &zgid, sizeof(zgid)))
return -EINVAL;
- qp->sgid_idx = ah_attr->grh.sgid_index;
+ qp->sgid_idx = grh->sgid_index;
memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
status = ocrdma_resolve_dmac(dev, ah_attr, &mac_addr[0]);
if (status)
@@ -2550,7 +2554,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
if (hdr_type == RDMA_NETWORK_IPV4) {
rdma_gid2ip(&sgid_addr._sockaddr, &sgid);
- rdma_gid2ip(&dgid_addr._sockaddr, &ah_attr->grh.dgid);
+ rdma_gid2ip(&dgid_addr._sockaddr, &grh->dgid);
memcpy(&cmd->params.dgid[0],
&dgid_addr._sockaddr_in.sin_addr.s_addr, 4);
memcpy(&cmd->params.sgid[0],
@@ -2984,7 +2988,7 @@ static int ocrdma_parse_dcbxcfg_rsp(struct ocrdma_dev *dev, int ptype,
OCRDMA_APP_PARAM_APP_PROTO_MASK;
if (
- valid && proto == OCRDMA_APP_PROTO_ROCE &&
+ valid && proto == ETH_P_IBOE &&
proto_sel == OCRDMA_PROTO_SELECT_L2) {
for (slindx = 0; slindx <
OCRDMA_MAX_SERVICE_LEVEL_INDEX; slindx++) {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 896071502739..57c9a2ad0260 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -93,15 +93,16 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
int err;
dev = get_ocrdma_dev(ibdev);
- err = ocrdma_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ if (ocrdma_is_udp_encap_supported(dev))
+ immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
- if (ocrdma_is_udp_encap_supported(dev))
- immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
@@ -198,7 +199,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext;
dev->ibdev.mmap = ocrdma_mmap;
- dev->ibdev.dma_device = &dev->nic_info.pdev->dev;
+ dev->ibdev.dev.parent = &dev->nic_info.pdev->dev;
dev->ibdev.process_mad = ocrdma_process_mad;
dev->ibdev.get_port_immutable = ocrdma_port_immutable;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 37df4481bb8f..6ef89c226ad8 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -1901,7 +1901,6 @@ struct ocrdma_eth_vlan {
u8 smac[6];
__be16 eth_type;
__be16 vlan_tag;
-#define OCRDMA_ROCE_ETH_TYPE 0x8915
__be16 roce_eth_type;
} __packed;
@@ -2179,10 +2178,6 @@ enum OCRDMA_DCBX_PARAM_TYPE {
OCRDMA_PARAMETER_TYPE_PEER = 0x02
};
-enum OCRDMA_DCBX_APP_PROTO {
- OCRDMA_APP_PROTO_ROCE = 0x8915
-};
-
enum OCRDMA_DCBX_PROTO {
OCRDMA_PROTO_SELECT_L2 = 0x00,
OCRDMA_PROTO_SELECT_L4 = 0x01
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index f8e4b0a6486f..66056f9a9700 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -643,7 +643,7 @@ static ssize_t ocrdma_dbgfs_ops_write(struct file *filp,
struct ocrdma_stats *pstats = filp->private_data;
struct ocrdma_dev *dev = pstats->dev;
- if (count > 32)
+ if (*ppos != 0 || count == 0 || count > sizeof(tmp_str))
goto err;
if (copy_from_user(tmp_str, buffer, count))
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 6af44f8db3d5..27d5e8d9f08d 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -210,6 +210,7 @@ int ocrdma_query_port(struct ib_device *ibdev,
struct ocrdma_dev *dev;
struct net_device *netdev;
+ /* props being zeroed by the caller, avoid zeroing it here */
dev = get_ocrdma_dev(ibdev);
if (port > 1) {
pr_err("%s(%d) invalid_port=0x%x\n", __func__,
@@ -371,7 +372,7 @@ static int _ocrdma_pd_mgr_put_bitmap(struct ocrdma_dev *dev, u16 pd_id,
return 0;
}
-static u8 ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
+static int ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
bool dpp_pool)
{
int status;
@@ -743,7 +744,8 @@ err:
if (is_uctx_pd) {
ocrdma_release_ucontext_pd(uctx);
} else {
- status = _ocrdma_dealloc_pd(dev, pd);
+ if (_ocrdma_dealloc_pd(dev, pd))
+ pr_err("%s: _ocrdma_dealloc_pd() failed\n", __func__);
}
exit:
return ERR_PTR(status);
@@ -913,21 +915,18 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
pbe = (struct ocrdma_pbe *)pbl_tbl->va;
pbe_cnt = 0;
- shift = ilog2(umem->page_size);
+ shift = umem->page_shift;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
pages = sg_dma_len(sg) >> shift;
for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
/* store the page address in pbe */
pbe->pa_lo =
- cpu_to_le32(sg_dma_address
- (sg) +
- (umem->page_size * pg_cnt));
+ cpu_to_le32(sg_dma_address(sg) +
+ (pg_cnt << shift));
pbe->pa_hi =
- cpu_to_le32(upper_32_bits
- ((sg_dma_address
- (sg) +
- umem->page_size * pg_cnt)));
+ cpu_to_le32(upper_32_bits(sg_dma_address(sg) +
+ (pg_cnt << shift)));
pbe_cnt += 1;
total_num_pbes += 1;
pbe++;
@@ -977,7 +976,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
if (status)
goto umem_err;
- mr->hwmr.pbe_size = mr->umem->page_size;
+ mr->hwmr.pbe_size = BIT(mr->umem->page_shift);
mr->hwmr.fbo = ib_umem_offset(mr->umem);
mr->hwmr.va = usr_addr;
mr->hwmr.len = len;
@@ -1170,8 +1169,7 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq)
dev->cq_tbl[cq->id] = NULL;
indx = ocrdma_get_eq_table_index(dev, cq->eqn);
- if (indx == -EINVAL)
- BUG();
+ BUG_ON(indx == -EINVAL);
eq = &dev->eq_tbl[indx];
irq = ocrdma_get_irq(dev, eq);
@@ -1601,23 +1599,24 @@ int ocrdma_query_qp(struct ib_qp *ibqp,
qp_attr->cap.max_recv_sge = qp->rq.max_sges;
qp_attr->cap.max_inline_data = qp->max_inline_data;
qp_init_attr->cap = qp_attr->cap;
- memcpy(&qp_attr->ah_attr.grh.dgid, &params.dgid[0],
- sizeof(params.dgid));
- qp_attr->ah_attr.grh.flow_label = params.rnt_rc_sl_fl &
- OCRDMA_QP_PARAMS_FLOW_LABEL_MASK;
- qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
- qp_attr->ah_attr.grh.hop_limit = (params.hop_lmt_rq_psn &
- OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
- OCRDMA_QP_PARAMS_HOP_LMT_SHIFT;
- qp_attr->ah_attr.grh.traffic_class = (params.tclass_sq_psn &
- OCRDMA_QP_PARAMS_TCLASS_MASK) >>
- OCRDMA_QP_PARAMS_TCLASS_SHIFT;
-
- qp_attr->ah_attr.ah_flags = IB_AH_GRH;
- qp_attr->ah_attr.port_num = 1;
- qp_attr->ah_attr.sl = (params.rnt_rc_sl_fl &
- OCRDMA_QP_PARAMS_SL_MASK) >>
- OCRDMA_QP_PARAMS_SL_SHIFT;
+ qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+
+ rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
+ params.rnt_rc_sl_fl &
+ OCRDMA_QP_PARAMS_FLOW_LABEL_MASK,
+ qp->sgid_idx,
+ (params.hop_lmt_rq_psn &
+ OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
+ OCRDMA_QP_PARAMS_HOP_LMT_SHIFT,
+ (params.tclass_sq_psn &
+ OCRDMA_QP_PARAMS_TCLASS_MASK) >>
+ OCRDMA_QP_PARAMS_TCLASS_SHIFT);
+ rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid[0]);
+
+ rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
+ rdma_ah_set_sl(&qp_attr->ah_attr, (params.rnt_rc_sl_fl &
+ OCRDMA_QP_PARAMS_SL_MASK) >>
+ OCRDMA_QP_PARAMS_SL_SHIFT);
qp_attr->timeout = (params.ack_to_rnr_rtc_dest_qpn &
OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK) >>
OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
@@ -1630,8 +1629,8 @@ int ocrdma_query_qp(struct ib_qp *ibqp,
qp_attr->min_rnr_timer = 0;
qp_attr->pkey_index = 0;
qp_attr->port_num = 1;
- qp_attr->ah_attr.src_path_bits = 0;
- qp_attr->ah_attr.static_rate = 0;
+ rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
+ rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
qp_attr->alt_pkey_index = 0;
qp_attr->alt_port_num = 0;
qp_attr->alt_timeout = 0;
@@ -1741,8 +1740,7 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
OCRDMA_CQE_BUFTAG_SHIFT) &
qp->srq->rq.max_wqe_idx;
- if (wqe_idx < 1)
- BUG();
+ BUG_ON(wqe_idx < 1);
spin_lock_irqsave(&qp->srq->q_lock, flags);
ocrdma_hwq_inc_tail(&qp->srq->rq);
ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1);
@@ -1904,6 +1902,7 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
goto err;
if (udata == NULL) {
+ status = -ENOMEM;
srq->rqe_wr_id_tbl = kzalloc(sizeof(u64) * srq->rq.max_cnt,
GFP_KERNEL);
if (srq->rqe_wr_id_tbl == NULL)
@@ -2388,15 +2387,13 @@ static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
if (srq->idx_bit_fields[row]) {
indx = ffs(srq->idx_bit_fields[row]);
indx = (row * 32) + (indx - 1);
- if (indx >= srq->rq.max_cnt)
- BUG();
+ BUG_ON(indx >= srq->rq.max_cnt);
ocrdma_srq_toggle_bit(srq, indx);
break;
}
}
- if (row == srq->bit_fields_len)
- BUG();
+ BUG_ON(row == srq->bit_fields_len);
return indx + 1; /* Use from index 1 */
}
@@ -2754,8 +2751,7 @@ static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
srq = get_ocrdma_srq(qp->ibqp.srq);
wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
- if (wqe_idx < 1)
- BUG();
+ BUG_ON(wqe_idx < 1);
ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
spin_lock_irqsave(&srq->q_lock, flags);
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 3ac8aa5ef37d..b5851fd67d4f 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -35,8 +35,9 @@
#include <rdma/ib_user_verbs.h>
#include <linux/netdevice.h>
#include <linux/iommu.h>
+#include <linux/pci.h>
#include <net/addrconf.h>
-#include <linux/qed/qede_roce.h>
+
#include <linux/qed/qed_chain.h>
#include <linux/qed/qed_if.h>
#include "qedr.h"
@@ -170,7 +171,7 @@ static int qedr_register_device(struct qedr_dev *dev)
dev->ibdev.get_port_immutable = qedr_port_immutable;
dev->ibdev.get_netdev = qedr_get_netdev;
- dev->ibdev.dma_device = &dev->pdev->dev;
+ dev->ibdev.dev.parent = &dev->pdev->dev;
dev->ibdev.get_link_layer = qedr_link_layer;
dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
@@ -275,7 +276,7 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
QED_CHAIN_CNT_TYPE_U16,
n_entries,
sizeof(struct regpair *),
- &cnq->pbl);
+ &cnq->pbl, NULL);
if (rc)
goto err4;
@@ -340,43 +341,58 @@ static void qedr_remove_sysfiles(struct qedr_dev *dev)
static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev)
{
struct pci_dev *bridge;
- u32 val;
-
- dev->atomic_cap = IB_ATOMIC_NONE;
+ u32 ctl2, cap2;
+ u16 flags;
+ int rc;
bridge = pdev->bus->self;
if (!bridge)
- return;
-
- /* Check whether we are connected directly or via a switch */
- while (bridge && bridge->bus->parent) {
- DP_DEBUG(dev, QEDR_MSG_INIT,
- "Device is not connected directly to root. bridge->bus->number=%d primary=%d\n",
- bridge->bus->number, bridge->bus->primary);
- /* Need to check Atomic Op Routing Supported all the way to
- * root complex.
- */
- pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
- if (!(val & PCI_EXP_DEVCAP2_ATOMIC_ROUTE)) {
- pcie_capability_clear_word(pdev,
- PCI_EXP_DEVCTL2,
- PCI_EXP_DEVCTL2_ATOMIC_REQ);
- return;
- }
+ goto disable;
+
+ /* Check atomic routing support all the way to root complex */
+ while (bridge->bus->parent) {
+ rc = pcie_capability_read_word(bridge, PCI_EXP_FLAGS, &flags);
+ if (rc || ((flags & PCI_EXP_FLAGS_VERS) < 2))
+ goto disable;
+
+ rc = pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap2);
+ if (rc)
+ goto disable;
+
+ rc = pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2, &ctl2);
+ if (rc)
+ goto disable;
+
+ if (!(cap2 & PCI_EXP_DEVCAP2_ATOMIC_ROUTE) ||
+ (ctl2 & PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK))
+ goto disable;
bridge = bridge->bus->parent->self;
}
- bridge = pdev->bus->self;
- /* according to bridge capability */
- pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
- if (val & PCI_EXP_DEVCAP2_ATOMIC_COMP64) {
- pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2,
- PCI_EXP_DEVCTL2_ATOMIC_REQ);
- dev->atomic_cap = IB_ATOMIC_GLOB;
- } else {
- pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL2,
- PCI_EXP_DEVCTL2_ATOMIC_REQ);
- }
+ rc = pcie_capability_read_word(bridge, PCI_EXP_FLAGS, &flags);
+ if (rc || ((flags & PCI_EXP_FLAGS_VERS) < 2))
+ goto disable;
+
+ rc = pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap2);
+ if (rc || !(cap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP64))
+ goto disable;
+
+ /* Set atomic operations */
+ pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2,
+ PCI_EXP_DEVCTL2_ATOMIC_REQ);
+ dev->atomic_cap = IB_ATOMIC_GLOB;
+
+ DP_DEBUG(dev, QEDR_MSG_INIT, "Atomic capability enabled\n");
+
+ return;
+
+disable:
+ pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL2,
+ PCI_EXP_DEVCTL2_ATOMIC_REQ);
+ dev->atomic_cap = IB_ATOMIC_NONE;
+
+ DP_DEBUG(dev, QEDR_MSG_INIT, "Atomic capability disabled\n");
+
}
static const struct qed_rdma_ops *qed_ops;
@@ -423,14 +439,21 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle)
cq->arm_flags = 0;
- if (cq->ibcq.comp_handler)
+ if (!cq->destroyed && cq->ibcq.comp_handler)
(*cq->ibcq.comp_handler)
(&cq->ibcq, cq->ibcq.cq_context);
+ /* The CQ's CNQ notification counter is checked before
+ * destroying the CQ in a busy-wait loop that waits for all of
+ * the CQ's CNQ interrupts to be processed. It is increased
+ * here, only after the completion handler, to ensure that the
+ * the handler is not running when the CQ is destroyed.
+ */
+ cq->cnq_notif++;
+
sw_comp_cons = qed_chain_get_cons_idx(&cnq->pbl);
cnq->n_comp++;
-
}
qed_ops->rdma_cnq_prod_update(cnq->dev->rdma_ctx, cnq->index,
@@ -587,9 +610,8 @@ void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
#define EVENT_TYPE_CQ 1
#define EVENT_TYPE_QP 2
struct qedr_dev *dev = (struct qedr_dev *)context;
- union event_ring_data *data = fw_handle;
- u64 roce_handle64 = ((u64)data->roce_handle.hi << 32) +
- data->roce_handle.lo;
+ struct regpair *async_handle = (struct regpair *)fw_handle;
+ u64 roce_handle64 = ((u64) async_handle->hi << 32) + async_handle->lo;
u8 event_type = EVENT_TYPE_NOT_DEFINED;
struct ib_event event;
struct ib_cq *ibcq;
@@ -864,9 +886,9 @@ static void qedr_mac_address_change(struct qedr_dev *dev)
memcpy(&sgid->raw[8], guid, sizeof(guid));
/* Update LL2 */
- rc = dev->ops->roce_ll2_set_mac_filter(dev->cdev,
- dev->gsi_ll2_mac_address,
- dev->ndev->dev_addr);
+ rc = dev->ops->ll2_set_mac_filter(dev->cdev,
+ dev->gsi_ll2_mac_address,
+ dev->ndev->dev_addr);
ether_addr_copy(dev->gsi_ll2_mac_address, dev->ndev->dev_addr);
@@ -880,7 +902,7 @@ static void qedr_mac_address_change(struct qedr_dev *dev)
* initialization done before RoCE driver notifies
* event to stack.
*/
-static void qedr_notify(struct qedr_dev *dev, enum qede_roce_event event)
+static void qedr_notify(struct qedr_dev *dev, enum qede_rdma_event event)
{
switch (event) {
case QEDE_UP:
@@ -909,12 +931,12 @@ static struct qedr_driver qedr_drv = {
static int __init qedr_init_module(void)
{
- return qede_roce_register_driver(&qedr_drv);
+ return qede_rdma_register_driver(&qedr_drv);
}
static void __exit qedr_exit_module(void)
{
- qede_roce_unregister_driver(&qedr_drv);
+ qede_rdma_unregister_driver(&qedr_drv);
}
module_init(qedr_init_module);
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index bb32e4792ec9..ab7784bfdac6 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -36,9 +36,10 @@
#include <rdma/ib_addr.h>
#include <linux/qed/qed_if.h>
#include <linux/qed/qed_chain.h>
-#include <linux/qed/qed_roce_if.h>
-#include <linux/qed/qede_roce.h>
-#include "qedr_hsi.h"
+#include <linux/qed/qed_rdma_if.h>
+#include <linux/qed/qede_rdma.h>
+#include <linux/qed/roce_common.h>
+#include "qedr_hsi_rdma.h"
#define QEDR_MODULE_VERSION "8.10.10.0"
#define QEDR_NODE_DESC "QLogic 579xx RoCE HCA"
@@ -57,7 +58,10 @@
#define QEDR_MSG_QP " QP"
#define QEDR_MSG_GSI " GSI"
-#define QEDR_CQ_MAGIC_NUMBER (0x11223344)
+#define QEDR_CQ_MAGIC_NUMBER (0x11223344)
+
+#define FW_PAGE_SIZE (RDMA_RING_PAGE_SIZE)
+#define FW_PAGE_SHIFT (12)
struct qedr_dev;
@@ -149,6 +153,8 @@ struct qedr_dev {
u32 dp_module;
u8 dp_level;
u8 num_hwfns;
+ u8 gsi_ll2_handle;
+
uint wq_multiplier;
u8 gsi_ll2_mac_address[ETH_ALEN];
int gsi_qp_created;
@@ -271,6 +277,8 @@ struct qedr_cq {
u32 cq_cons;
struct qedr_userq q;
+ u8 destroyed;
+ u16 cnq_notif;
};
struct qedr_pd {
@@ -389,7 +397,7 @@ struct qedr_qp {
struct qedr_ah {
struct ib_ah ibah;
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
};
enum qedr_mr_type {
@@ -428,7 +436,8 @@ struct qedr_mr {
RDMA_CQE_RESPONDER_IMM_FLG_SHIFT)
#define QEDR_RESP_RDMA (RDMA_CQE_RESPONDER_RDMA_FLG_MASK << \
RDMA_CQE_RESPONDER_RDMA_FLG_SHIFT)
-#define QEDR_RESP_RDMA_IMM (QEDR_RESP_IMM | QEDR_RESP_RDMA)
+#define QEDR_RESP_INV (RDMA_CQE_RESPONDER_INV_FLG_MASK << \
+ RDMA_CQE_RESPONDER_INV_FLG_SHIFT)
static inline void qedr_inc_sw_cons(struct qedr_qp_hwq_info *info)
{
@@ -442,19 +451,24 @@ static inline void qedr_inc_sw_prod(struct qedr_qp_hwq_info *info)
}
static inline int qedr_get_dmac(struct qedr_dev *dev,
- struct ib_ah_attr *ah_attr, u8 *mac_addr)
+ struct rdma_ah_attr *ah_attr, u8 *mac_addr)
{
union ib_gid zero_sgid = { { 0 } };
struct in6_addr in6;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+ u8 *dmac;
- if (!memcmp(&ah_attr->grh.dgid, &zero_sgid, sizeof(union ib_gid))) {
+ if (!memcmp(&grh->dgid, &zero_sgid, sizeof(union ib_gid))) {
DP_ERR(dev, "Local port GID not supported\n");
eth_zero_addr(mac_addr);
return -EINVAL;
}
- memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
- ether_addr_copy(mac_addr, ah_attr->dmac);
+ memcpy(&in6, grh->dgid.raw, sizeof(in6));
+ dmac = rdma_ah_retrieve_dmac(ah_attr);
+ if (!dmac)
+ return -EINVAL;
+ ether_addr_copy(mac_addr, dmac);
return 0;
}
diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c
index a9a8d8745d2e..4689e802b332 100644
--- a/drivers/infiniband/hw/qedr/qedr_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_cm.c
@@ -43,14 +43,11 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
-#include "qedr_hsi.h"
#include <linux/qed/qed_if.h>
-#include <linux/qed/qed_roce_if.h>
+#include <linux/qed/qed_rdma_if.h>
#include "qedr.h"
-#include "qedr_hsi.h"
#include "verbs.h"
#include <rdma/qedr-abi.h>
-#include "qedr_hsi.h"
#include "qedr_cm.h"
void qedr_inc_sw_gsi_cons(struct qedr_qp_hwq_info *info)
@@ -67,9 +64,14 @@ void qedr_store_gsi_qp_cq(struct qedr_dev *dev, struct qedr_qp *qp,
dev->gsi_qp = qp;
}
-void qedr_ll2_tx_cb(void *_qdev, struct qed_roce_ll2_packet *pkt)
+void qedr_ll2_complete_tx_packet(void *cxt,
+ u8 connection_handle,
+ void *cookie,
+ dma_addr_t first_frag_addr,
+ bool b_last_fragment, bool b_last_packet)
{
- struct qedr_dev *dev = (struct qedr_dev *)_qdev;
+ struct qedr_dev *dev = (struct qedr_dev *)cxt;
+ struct qed_roce_ll2_packet *pkt = cookie;
struct qedr_cq *cq = dev->gsi_sqcq;
struct qedr_qp *qp = dev->gsi_qp;
unsigned long flags;
@@ -91,20 +93,26 @@ void qedr_ll2_tx_cb(void *_qdev, struct qed_roce_ll2_packet *pkt)
(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
}
-void qedr_ll2_rx_cb(void *_dev, struct qed_roce_ll2_packet *pkt,
- struct qed_roce_ll2_rx_params *params)
+void qedr_ll2_complete_rx_packet(void *cxt,
+ struct qed_ll2_comp_rx_data *data)
{
- struct qedr_dev *dev = (struct qedr_dev *)_dev;
+ struct qedr_dev *dev = (struct qedr_dev *)cxt;
struct qedr_cq *cq = dev->gsi_rqcq;
struct qedr_qp *qp = dev->gsi_qp;
unsigned long flags;
spin_lock_irqsave(&qp->q_lock, flags);
- qp->rqe_wr_id[qp->rq.gsi_cons].rc = params->rc;
- qp->rqe_wr_id[qp->rq.gsi_cons].vlan_id = params->vlan_id;
- qp->rqe_wr_id[qp->rq.gsi_cons].sg_list[0].length = pkt->payload[0].len;
- ether_addr_copy(qp->rqe_wr_id[qp->rq.gsi_cons].smac, params->smac);
+ qp->rqe_wr_id[qp->rq.gsi_cons].rc = data->u.data_length_error ?
+ -EINVAL : 0;
+ qp->rqe_wr_id[qp->rq.gsi_cons].vlan_id = data->vlan;
+ /* note: length stands for data length i.e. GRH is excluded */
+ qp->rqe_wr_id[qp->rq.gsi_cons].sg_list[0].length =
+ data->length.data_length;
+ *((u32 *)&qp->rqe_wr_id[qp->rq.gsi_cons].smac[0]) =
+ ntohl(data->opaque_data_0);
+ *((u16 *)&qp->rqe_wr_id[qp->rq.gsi_cons].smac[4]) =
+ ntohs((u16)data->opaque_data_1);
qedr_inc_sw_gsi_cons(&qp->rq);
@@ -114,6 +122,14 @@ void qedr_ll2_rx_cb(void *_dev, struct qed_roce_ll2_packet *pkt,
(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
}
+void qedr_ll2_release_rx_packet(void *cxt,
+ u8 connection_handle,
+ void *cookie,
+ dma_addr_t rx_buf_addr, bool b_last_packet)
+{
+ /* Do nothing... */
+}
+
static void qedr_destroy_gsi_cq(struct qedr_dev *dev,
struct ib_qp_init_attr *attrs)
{
@@ -162,27 +178,159 @@ static inline int qedr_check_gsi_qp_attrs(struct qedr_dev *dev,
return 0;
}
+static int qedr_ll2_post_tx(struct qedr_dev *dev,
+ struct qed_roce_ll2_packet *pkt)
+{
+ enum qed_ll2_roce_flavor_type roce_flavor;
+ struct qed_ll2_tx_pkt_info ll2_tx_pkt;
+ int rc;
+ int i;
+
+ memset(&ll2_tx_pkt, 0, sizeof(ll2_tx_pkt));
+
+ roce_flavor = (pkt->roce_mode == ROCE_V1) ?
+ QED_LL2_ROCE : QED_LL2_RROCE;
+
+ if (pkt->roce_mode == ROCE_V2_IPV4)
+ ll2_tx_pkt.enable_ip_cksum = 1;
+
+ ll2_tx_pkt.num_of_bds = 1 /* hdr */ + pkt->n_seg;
+ ll2_tx_pkt.vlan = 0;
+ ll2_tx_pkt.tx_dest = pkt->tx_dest;
+ ll2_tx_pkt.qed_roce_flavor = roce_flavor;
+ ll2_tx_pkt.first_frag = pkt->header.baddr;
+ ll2_tx_pkt.first_frag_len = pkt->header.len;
+ ll2_tx_pkt.cookie = pkt;
+
+ /* tx header */
+ rc = dev->ops->ll2_prepare_tx_packet(dev->rdma_ctx,
+ dev->gsi_ll2_handle,
+ &ll2_tx_pkt, 1);
+ if (rc) {
+ /* TX failed while posting header - release resources */
+ dma_free_coherent(&dev->pdev->dev, pkt->header.len,
+ pkt->header.vaddr, pkt->header.baddr);
+ kfree(pkt);
+
+ DP_ERR(dev, "roce ll2 tx: header failed (rc=%d)\n", rc);
+ return rc;
+ }
+
+ /* tx payload */
+ for (i = 0; i < pkt->n_seg; i++) {
+ rc = dev->ops->ll2_set_fragment_of_tx_packet(
+ dev->rdma_ctx,
+ dev->gsi_ll2_handle,
+ pkt->payload[i].baddr,
+ pkt->payload[i].len);
+
+ if (rc) {
+ /* if failed not much to do here, partial packet has
+ * been posted we can't free memory, will need to wait
+ * for completion
+ */
+ DP_ERR(dev, "ll2 tx: payload failed (rc=%d)\n", rc);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+int qedr_ll2_stop(struct qedr_dev *dev)
+{
+ int rc;
+
+ if (dev->gsi_ll2_handle == QED_LL2_UNUSED_HANDLE)
+ return 0;
+
+ /* remove LL2 MAC address filter */
+ rc = dev->ops->ll2_set_mac_filter(dev->cdev,
+ dev->gsi_ll2_mac_address, NULL);
+
+ rc = dev->ops->ll2_terminate_connection(dev->rdma_ctx,
+ dev->gsi_ll2_handle);
+ if (rc)
+ DP_ERR(dev, "Failed to terminate LL2 connection (rc=%d)\n", rc);
+
+ dev->ops->ll2_release_connection(dev->rdma_ctx, dev->gsi_ll2_handle);
+
+ dev->gsi_ll2_handle = QED_LL2_UNUSED_HANDLE;
+
+ return rc;
+}
+
+int qedr_ll2_start(struct qedr_dev *dev,
+ struct ib_qp_init_attr *attrs, struct qedr_qp *qp)
+{
+ struct qed_ll2_acquire_data data;
+ struct qed_ll2_cbs cbs;
+ int rc;
+
+ /* configure and start LL2 */
+ cbs.rx_comp_cb = qedr_ll2_complete_rx_packet;
+ cbs.tx_comp_cb = qedr_ll2_complete_tx_packet;
+ cbs.rx_release_cb = qedr_ll2_release_rx_packet;
+ cbs.tx_release_cb = qedr_ll2_complete_tx_packet;
+ cbs.cookie = dev;
+
+ memset(&data, 0, sizeof(data));
+ data.input.conn_type = QED_LL2_TYPE_ROCE;
+ data.input.mtu = dev->ndev->mtu;
+ data.input.rx_num_desc = attrs->cap.max_recv_wr;
+ data.input.rx_drop_ttl0_flg = true;
+ data.input.rx_vlan_removal_en = false;
+ data.input.tx_num_desc = attrs->cap.max_send_wr;
+ data.input.tx_tc = 0;
+ data.input.tx_dest = QED_LL2_TX_DEST_NW;
+ data.input.ai_err_packet_too_big = QED_LL2_DROP_PACKET;
+ data.input.ai_err_no_buf = QED_LL2_DROP_PACKET;
+ data.input.gsi_enable = 1;
+ data.p_connection_handle = &dev->gsi_ll2_handle;
+ data.cbs = &cbs;
+
+ rc = dev->ops->ll2_acquire_connection(dev->rdma_ctx, &data);
+ if (rc) {
+ DP_ERR(dev,
+ "ll2 start: failed to acquire LL2 connection (rc=%d)\n",
+ rc);
+ return rc;
+ }
+
+ rc = dev->ops->ll2_establish_connection(dev->rdma_ctx,
+ dev->gsi_ll2_handle);
+ if (rc) {
+ DP_ERR(dev,
+ "ll2 start: failed to establish LL2 connection (rc=%d)\n",
+ rc);
+ goto err1;
+ }
+
+ rc = dev->ops->ll2_set_mac_filter(dev->cdev, NULL, dev->ndev->dev_addr);
+ if (rc)
+ goto err2;
+
+ return 0;
+
+err2:
+ dev->ops->ll2_terminate_connection(dev->rdma_ctx, dev->gsi_ll2_handle);
+err1:
+ dev->ops->ll2_release_connection(dev->rdma_ctx, dev->gsi_ll2_handle);
+
+ return rc;
+}
+
struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev,
struct ib_qp_init_attr *attrs,
struct qedr_qp *qp)
{
- struct qed_roce_ll2_params ll2_params;
int rc;
rc = qedr_check_gsi_qp_attrs(dev, attrs);
if (rc)
return ERR_PTR(rc);
- /* configure and start LL2 */
- memset(&ll2_params, 0, sizeof(ll2_params));
- ll2_params.max_tx_buffers = attrs->cap.max_send_wr;
- ll2_params.max_rx_buffers = attrs->cap.max_recv_wr;
- ll2_params.cbs.tx_cb = qedr_ll2_tx_cb;
- ll2_params.cbs.rx_cb = qedr_ll2_rx_cb;
- ll2_params.cb_cookie = (void *)dev;
- ll2_params.mtu = dev->ndev->mtu;
- ether_addr_copy(ll2_params.mac_address, dev->ndev->dev_addr);
- rc = dev->ops->roce_ll2_start(dev->cdev, &ll2_params);
+ rc = qedr_ll2_start(dev, attrs, qp);
if (rc) {
DP_ERR(dev, "create gsi qp: failed on ll2 start. rc=%d\n", rc);
return ERR_PTR(rc);
@@ -217,7 +365,7 @@ struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev,
err:
kfree(qp->rqe_wr_id);
- rc = dev->ops->roce_ll2_stop(dev->cdev);
+ rc = qedr_ll2_stop(dev);
if (rc)
DP_ERR(dev, "create gsi qp: failed destroy on create\n");
@@ -226,15 +374,7 @@ err:
int qedr_destroy_gsi_qp(struct qedr_dev *dev)
{
- int rc;
-
- rc = dev->ops->roce_ll2_stop(dev->cdev);
- if (rc)
- DP_ERR(dev, "destroy gsi qp: failed (rc=%d)\n", rc);
- else
- DP_DEBUG(dev, QEDR_MSG_GSI, "destroy gsi qp: success\n");
-
- return rc;
+ return qedr_ll2_stop(dev);
}
#define QEDR_MAX_UD_HEADER_SIZE (100)
@@ -246,8 +386,8 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
int *roce_mode)
{
bool has_vlan = false, has_grh_ipv6 = true;
- struct ib_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr;
- struct ib_global_route *grh = &ah_attr->grh;
+ struct rdma_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
union ib_gid sgid;
int send_size = 0;
u16 vlan_id = 0;
@@ -263,31 +403,34 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
for (i = 0; i < swr->num_sge; ++i)
send_size += swr->sg_list[i].length;
- rc = ib_get_cached_gid(qp->ibqp.device, ah_attr->port_num,
+ rc = ib_get_cached_gid(qp->ibqp.device, rdma_ah_get_port_num(ah_attr),
grh->sgid_index, &sgid, &sgid_attr);
if (rc) {
DP_ERR(dev,
"gsi post send: failed to get cached GID (port=%d, ix=%d)\n",
- ah_attr->port_num, grh->sgid_index);
+ rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index);
return rc;
}
- vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
- if (vlan_id < VLAN_CFI_MASK)
- has_vlan = true;
- if (sgid_attr.ndev)
+ if (sgid_attr.ndev) {
+ vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
+ if (vlan_id < VLAN_CFI_MASK)
+ has_vlan = true;
+
dev_put(sgid_attr.ndev);
+ }
if (!memcmp(&sgid, &zgid, sizeof(sgid))) {
DP_ERR(dev, "gsi post send: GID not found GID index %d\n",
- ah_attr->grh.sgid_index);
+ grh->sgid_index);
return -ENOENT;
}
has_udp = (sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
if (!has_udp) {
/* RoCE v1 */
- ether_type = ETH_P_ROCE;
+ ether_type = ETH_P_IBOE;
*roce_mode = ROCE_V1;
} else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) {
/* RoCE v2 IPv4 */
@@ -310,7 +453,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
}
/* ENET + VLAN headers */
- ether_addr_copy(udh->eth.dmac_h, ah_attr->dmac);
+ ether_addr_copy(udh->eth.dmac_h, ah_attr->roce.dmac);
ether_addr_copy(udh->eth.smac_h, dev->ndev->dev_addr);
if (has_vlan) {
udh->eth.type = htons(ETH_P_8021Q);
@@ -344,13 +487,13 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
u32 ipv4_addr;
udh->ip4.protocol = IPPROTO_UDP;
- udh->ip4.tos = htonl(ah_attr->grh.flow_label);
+ udh->ip4.tos = htonl(grh->flow_label);
udh->ip4.frag_off = htons(IP_DF);
- udh->ip4.ttl = ah_attr->grh.hop_limit;
+ udh->ip4.ttl = grh->hop_limit;
ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw);
udh->ip4.saddr = ipv4_addr;
- ipv4_addr = qedr_get_ipv4_from_gid(ah_attr->grh.dgid.raw);
+ ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw);
udh->ip4.daddr = ipv4_addr;
/* note: checksum is calculated by the device */
}
@@ -421,7 +564,6 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
{
struct qed_roce_ll2_packet *pkt = NULL;
struct qedr_qp *qp = get_qedr_qp(ibqp);
- struct qed_roce_ll2_tx_params params;
struct qedr_dev *dev = qp->dev;
unsigned long flags;
int rc;
@@ -449,8 +591,6 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto err;
}
- memset(&params, 0, sizeof(params));
-
spin_lock_irqsave(&qp->q_lock, flags);
rc = qedr_gsi_build_packet(dev, qp, wr, &pkt);
@@ -459,7 +599,8 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto err;
}
- rc = dev->ops->roce_ll2_tx(dev->cdev, pkt, &params);
+ rc = qedr_ll2_post_tx(dev, pkt);
+
if (!rc) {
qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
qedr_inc_sw_prod(&qp->sq);
@@ -467,17 +608,6 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
"gsi post send: opcode=%d, in_irq=%ld, irqs_disabled=%d, wr_id=%llx\n",
wr->opcode, in_irq(), irqs_disabled(), wr->wr_id);
} else {
- if (rc == QED_ROCE_TX_HEAD_FAILURE) {
- /* TX failed while posting header - release resources */
- dma_free_coherent(&dev->pdev->dev, pkt->header.len,
- pkt->header.vaddr, pkt->header.baddr);
- kfree(pkt);
- } else if (rc == QED_ROCE_TX_FRAG_FAILURE) {
- /* NTD since TX failed while posting a fragment. We will
- * release the resources on TX callback
- */
- }
-
DP_ERR(dev, "gsi post send: failed to transmit (rc=%d)\n", rc);
rc = -EAGAIN;
*bad_wr = wr;
@@ -504,10 +634,8 @@ int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
{
struct qedr_dev *dev = get_qedr_dev(ibqp->device);
struct qedr_qp *qp = get_qedr_qp(ibqp);
- struct qed_roce_ll2_buffer buf;
unsigned long flags;
- int status = 0;
- int rc;
+ int rc = 0;
if ((qp->state != QED_ROCE_QP_STATE_RTR) &&
(qp->state != QED_ROCE_QP_STATE_RTS)) {
@@ -518,8 +646,6 @@ int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
return -EINVAL;
}
- memset(&buf, 0, sizeof(buf));
-
spin_lock_irqsave(&qp->q_lock, flags);
while (wr) {
@@ -530,10 +656,12 @@ int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
goto err;
}
- buf.baddr = wr->sg_list[0].addr;
- buf.len = wr->sg_list[0].length;
-
- rc = dev->ops->roce_ll2_post_rx_buffer(dev->cdev, &buf, 0, 1);
+ rc = dev->ops->ll2_post_rx_buffer(dev->rdma_ctx,
+ dev->gsi_ll2_handle,
+ wr->sg_list[0].addr,
+ wr->sg_list[0].length,
+ 0 /* cookie */,
+ 1 /* notify_fw */);
if (rc) {
DP_ERR(dev,
"gsi post recv: failed to post rx buffer (rc=%d)\n",
@@ -553,7 +681,7 @@ int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_unlock_irqrestore(&qp->q_lock, flags);
- return status;
+ return rc;
err:
spin_unlock_irqrestore(&qp->q_lock, flags);
*bad_wr = wr;
diff --git a/drivers/infiniband/hw/qedr/qedr_cm.h b/drivers/infiniband/hw/qedr/qedr_cm.h
index 9ba6e15cd93f..a55916323ea9 100644
--- a/drivers/infiniband/hw/qedr/qedr_cm.h
+++ b/drivers/infiniband/hw/qedr/qedr_cm.h
@@ -37,10 +37,9 @@
#define QEDR_GSI_MAX_RECV_SGE (1) /* LL2 FW limitation */
-#define ETH_P_ROCE (0x8915)
#define QEDR_ROCE_V2_UDP_SPORT (0000)
-static inline u32 qedr_get_ipv4_from_gid(u8 *gid)
+static inline u32 qedr_get_ipv4_from_gid(const u8 *gid)
{
return *(u32 *)(void *)&gid[12];
}
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index c7d6c9a783bd..2ae71b8f1ba8 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -43,7 +43,8 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
-#include "qedr_hsi.h"
+#include <linux/qed/common_hsi.h>
+#include "qedr_hsi_rdma.h"
#include <linux/qed/qed_if.h>
#include "qedr.h"
#include "verbs.h"
@@ -52,6 +53,14 @@
#define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
+static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
+ size_t len)
+{
+ size_t min_len = min_t(size_t, len, udata->outlen);
+
+ return ib_copy_to_udata(udata, src, min_len);
+}
+
int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
{
if (index > QEDR_ROCE_PKEY_TABLE_LEN)
@@ -238,8 +247,8 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
}
rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
- memset(attr, 0, sizeof(*attr));
+ /* *attr being zeroed by the caller, avoid zeroing it here */
if (rdma_port->port_state == QED_RDMA_PORT_UP) {
attr->state = IB_PORT_ACTIVE;
attr->phys_state = 5;
@@ -377,7 +386,7 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
uresp.max_cqes = QEDR_MAX_CQES;
- rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (rc)
goto err;
@@ -498,7 +507,7 @@ struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
uresp.pd_id = pd_id;
- rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (rc) {
DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
@@ -652,14 +661,15 @@ static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
struct qedr_pbl *pbl,
- struct qedr_pbl_info *pbl_info)
+ struct qedr_pbl_info *pbl_info, u32 pg_shift)
{
int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
+ u32 fw_pg_cnt, fw_pg_per_umem_pg;
struct qedr_pbl *pbl_tbl;
struct scatterlist *sg;
struct regpair *pbe;
+ u64 pg_addr;
int entry;
- u32 addr;
if (!pbl_info->num_pbes)
return;
@@ -680,31 +690,37 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
pbe_cnt = 0;
- shift = ilog2(umem->page_size);
+ shift = umem->page_shift;
+
+ fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
pages = sg_dma_len(sg) >> shift;
+ pg_addr = sg_dma_address(sg);
for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
- /* store the page address in pbe */
- pbe->lo = cpu_to_le32(sg_dma_address(sg) +
- umem->page_size * pg_cnt);
- addr = upper_32_bits(sg_dma_address(sg) +
- umem->page_size * pg_cnt);
- pbe->hi = cpu_to_le32(addr);
- pbe_cnt++;
- total_num_pbes++;
- pbe++;
-
- if (total_num_pbes == pbl_info->num_pbes)
- return;
-
- /* If the given pbl is full storing the pbes,
- * move to next pbl.
- */
- if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
- pbl_tbl++;
- pbe = (struct regpair *)pbl_tbl->va;
- pbe_cnt = 0;
+ for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
+ pbe->lo = cpu_to_le32(pg_addr);
+ pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
+
+ pg_addr += BIT(pg_shift);
+ pbe_cnt++;
+ total_num_pbes++;
+ pbe++;
+
+ if (total_num_pbes == pbl_info->num_pbes)
+ return;
+
+ /* If the given pbl is full storing the pbes,
+ * move to next pbl.
+ */
+ if (pbe_cnt ==
+ (pbl_info->pbl_size / sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct regpair *)pbl_tbl->va;
+ pbe_cnt = 0;
+ }
+
+ fw_pg_cnt++;
}
}
}
@@ -721,7 +737,7 @@ static int qedr_copy_cq_uresp(struct qedr_dev *dev,
uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
uresp.icid = cq->icid;
- rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (rc)
DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
@@ -753,7 +769,7 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
u64 buf_addr, size_t buf_len,
int access, int dmasync)
{
- int page_cnt;
+ u32 fw_pages;
int rc;
q->buf_addr = buf_addr;
@@ -765,16 +781,21 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
return PTR_ERR(q->umem);
}
- page_cnt = ib_umem_page_count(q->umem);
- rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, 0);
+ fw_pages = ib_umem_page_count(q->umem) <<
+ (q->umem->page_shift - FW_PAGE_SHIFT);
+
+ rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
if (rc)
goto err0;
q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
- if (IS_ERR_OR_NULL(q->pbl_tbl))
+ if (IS_ERR(q->pbl_tbl)) {
+ rc = PTR_ERR(q->pbl_tbl);
goto err0;
+ }
- qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info);
+ qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
+ FW_PAGE_SHIFT);
return 0;
@@ -819,6 +840,17 @@ int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct qedr_cq *cq = get_qedr_cq(ibcq);
unsigned long sflags;
+ struct qedr_dev *dev;
+
+ dev = get_qedr_dev(ibcq->device);
+
+ if (cq->destroyed) {
+ DP_ERR(dev,
+ "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
+ cq, cq->icid);
+ return -EINVAL;
+ }
+
if (cq->cq_type == QEDR_CQ_TYPE_GSI)
return 0;
@@ -911,7 +943,7 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
QED_CHAIN_CNT_TYPE_U32,
chain_entries,
sizeof(union rdma_cqe),
- &cq->pbl);
+ &cq->pbl, NULL);
if (rc)
goto err1;
@@ -984,35 +1016,82 @@ int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
return 0;
}
+#define QEDR_DESTROY_CQ_MAX_ITERATIONS (10)
+#define QEDR_DESTROY_CQ_ITER_DURATION (10)
+
int qedr_destroy_cq(struct ib_cq *ibcq)
{
struct qedr_dev *dev = get_qedr_dev(ibcq->device);
struct qed_rdma_destroy_cq_out_params oparams;
struct qed_rdma_destroy_cq_in_params iparams;
struct qedr_cq *cq = get_qedr_cq(ibcq);
+ int iter;
+ int rc;
+
+ DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
- DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq: cq_id %d", cq->icid);
+ cq->destroyed = 1;
/* GSIs CQs are handled by driver, so they don't exist in the FW */
- if (cq->cq_type != QEDR_CQ_TYPE_GSI) {
- int rc;
+ if (cq->cq_type == QEDR_CQ_TYPE_GSI)
+ goto done;
- iparams.icid = cq->icid;
- rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams,
- &oparams);
- if (rc)
- return rc;
- dev->ops->common->chain_free(dev->cdev, &cq->pbl);
- }
+ iparams.icid = cq->icid;
+ rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
+ if (rc)
+ return rc;
+
+ dev->ops->common->chain_free(dev->cdev, &cq->pbl);
if (ibcq->uobject && ibcq->uobject->context) {
qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
ib_umem_release(cq->q.umem);
}
+ /* We don't want the IRQ handler to handle a non-existing CQ so we
+ * wait until all CNQ interrupts, if any, are received. This will always
+ * happen and will always happen very fast. If not, then a serious error
+ * has occured. That is why we can use a long delay.
+ * We spin for a short time so we don’t lose time on context switching
+ * in case all the completions are handled in that span. Otherwise
+ * we sleep for a while and check again. Since the CNQ may be
+ * associated with (only) the current CPU we use msleep to allow the
+ * current CPU to be freed.
+ * The CNQ notification is increased in qedr_irq_handler().
+ */
+ iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
+ while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
+ udelay(QEDR_DESTROY_CQ_ITER_DURATION);
+ iter--;
+ }
+
+ iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
+ while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
+ msleep(QEDR_DESTROY_CQ_ITER_DURATION);
+ iter--;
+ }
+
+ if (oparams.num_cq_notif != cq->cnq_notif)
+ goto err;
+
+ /* Note that we don't need to have explicit code to wait for the
+ * completion of the event handler because it is invoked from the EQ.
+ * Since the destroy CQ ramrod has also been received on the EQ we can
+ * be certain that there's no event handler in process.
+ */
+done:
+ cq->sig = ~cq->sig;
+
kfree(cq);
return 0;
+
+err:
+ DP_ERR(dev,
+ "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
+ cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
+
+ return -EINVAL;
}
static inline int get_gid_info_from_table(struct ib_qp *ibqp,
@@ -1023,13 +1102,15 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
{
enum rdma_network_type nw_type;
struct ib_gid_attr gid_attr;
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
union ib_gid gid;
u32 ipv4_addr;
int rc = 0;
int i;
- rc = ib_get_cached_gid(ibqp->device, attr->ah_attr.port_num,
- attr->ah_attr.grh.sgid_index, &gid, &gid_attr);
+ rc = ib_get_cached_gid(ibqp->device,
+ rdma_ah_get_port_num(&attr->ah_attr),
+ grh->sgid_index, &gid, &gid_attr);
if (rc)
return rc;
@@ -1046,7 +1127,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
sizeof(qp_params->sgid));
memcpy(&qp_params->dgid.bytes[0],
- &attr->ah_attr.grh.dgid,
+ &grh->dgid,
sizeof(qp_params->dgid));
qp_params->roce_mode = ROCE_V2_IPV6;
SET_FIELD(qp_params->modify_flags,
@@ -1056,7 +1137,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
sizeof(qp_params->sgid));
memcpy(&qp_params->dgid.bytes[0],
- &attr->ah_attr.grh.dgid,
+ &grh->dgid,
sizeof(qp_params->dgid));
qp_params->roce_mode = ROCE_V1;
break;
@@ -1066,7 +1147,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
qp_params->sgid.ipv4_addr = ipv4_addr;
ipv4_addr =
- qedr_get_ipv4_from_gid(attr->ah_attr.grh.dgid.raw);
+ qedr_get_ipv4_from_gid(grh->dgid.raw);
qp_params->dgid.ipv4_addr = ipv4_addr;
SET_FIELD(qp_params->modify_flags,
QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
@@ -1086,30 +1167,6 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
return 0;
}
-static void qedr_cleanup_user_sq(struct qedr_dev *dev, struct qedr_qp *qp)
-{
- qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
- ib_umem_release(qp->usq.umem);
-}
-
-static void qedr_cleanup_user_rq(struct qedr_dev *dev, struct qedr_qp *qp)
-{
- qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
- ib_umem_release(qp->urq.umem);
-}
-
-static void qedr_cleanup_kernel_sq(struct qedr_dev *dev, struct qedr_qp *qp)
-{
- dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
- kfree(qp->wqe_wr_id);
-}
-
-static void qedr_cleanup_kernel_rq(struct qedr_dev *dev, struct qedr_qp *qp)
-{
- dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
- kfree(qp->rqe_wr_id);
-}
-
static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
struct ib_qp_init_attr *attrs)
{
@@ -1189,7 +1246,7 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev,
uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
uresp.qp_id = qp->qp_id;
- rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (rc)
DP_ERR(dev,
"create qp: failed a copy to user space with qp icid=0x%x.\n",
@@ -1198,15 +1255,13 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev,
return rc;
}
-static void qedr_set_qp_init_params(struct qedr_dev *dev,
- struct qedr_qp *qp,
- struct qedr_pd *pd,
- struct ib_qp_init_attr *attrs)
+static void qedr_set_common_qp_params(struct qedr_dev *dev,
+ struct qedr_qp *qp,
+ struct qedr_pd *pd,
+ struct ib_qp_init_attr *attrs)
{
- qp->pd = pd;
-
spin_lock_init(&qp->q_lock);
-
+ qp->pd = pd;
qp->qp_type = attrs->qp_type;
qp->max_inline_data = attrs->cap.max_inline_data;
qp->sq.max_sges = attrs->cap.max_send_sge;
@@ -1215,272 +1270,278 @@ static void qedr_set_qp_init_params(struct qedr_dev *dev,
qp->sq_cq = get_qedr_cq(attrs->send_cq);
qp->rq_cq = get_qedr_cq(attrs->recv_cq);
qp->dev = dev;
+ qp->rq.max_sges = attrs->cap.max_recv_sge;
DP_DEBUG(dev, QEDR_MSG_QP,
+ "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
+ qp->rq.max_sges, qp->rq_cq->icid);
+ DP_DEBUG(dev, QEDR_MSG_QP,
"QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
pd->pd_id, qp->qp_type, qp->max_inline_data,
qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
DP_DEBUG(dev, QEDR_MSG_QP,
"SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
qp->sq.max_sges, qp->sq_cq->icid);
- qp->rq.max_sges = attrs->cap.max_recv_sge;
- DP_DEBUG(dev, QEDR_MSG_QP,
- "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
- qp->rq.max_sges, qp->rq_cq->icid);
}
-static inline void
-qedr_init_qp_user_params(struct qed_rdma_create_qp_in_params *params,
- struct qedr_create_qp_ureq *ureq)
-{
- /* QP handle to be written in CQE */
- params->qp_handle_lo = ureq->qp_handle_lo;
- params->qp_handle_hi = ureq->qp_handle_hi;
-}
-
-static inline void
-qedr_init_qp_kernel_doorbell_sq(struct qedr_dev *dev, struct qedr_qp *qp)
+static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
{
qp->sq.db = dev->db_addr +
DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
qp->sq.db_data.data.icid = qp->icid + 1;
+ qp->rq.db = dev->db_addr +
+ DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
+ qp->rq.db_data.data.icid = qp->icid;
}
static inline void
-qedr_init_qp_kernel_doorbell_rq(struct qedr_dev *dev, struct qedr_qp *qp)
+qedr_init_common_qp_in_params(struct qedr_dev *dev,
+ struct qedr_pd *pd,
+ struct qedr_qp *qp,
+ struct ib_qp_init_attr *attrs,
+ bool fmr_and_reserved_lkey,
+ struct qed_rdma_create_qp_in_params *params)
{
- qp->rq.db = dev->db_addr +
- DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
- qp->rq.db_data.data.icid = qp->icid;
+ /* QP handle to be written in an async event */
+ params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
+ params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
+
+ params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
+ params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
+ params->pd = pd->pd_id;
+ params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
+ params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
+ params->stats_queue = 0;
+ params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
+ params->srq_id = 0;
+ params->use_srq = false;
}
-static inline int
-qedr_init_qp_kernel_params_rq(struct qedr_dev *dev,
- struct qedr_qp *qp, struct ib_qp_init_attr *attrs)
+static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
{
- /* Allocate driver internal RQ array */
- qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
- GFP_KERNEL);
- if (!qp->rqe_wr_id)
- return -ENOMEM;
+ DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
+ "qp=%p. "
+ "sq_addr=0x%llx, "
+ "sq_len=%zd, "
+ "rq_addr=0x%llx, "
+ "rq_len=%zd"
+ "\n",
+ qp,
+ qp->usq.buf_addr,
+ qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
+}
- DP_DEBUG(dev, QEDR_MSG_QP, "RQ max_wr set to %d.\n", qp->rq.max_wr);
+static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+ if (qp->usq.umem)
+ ib_umem_release(qp->usq.umem);
+ qp->usq.umem = NULL;
- return 0;
+ if (qp->urq.umem)
+ ib_umem_release(qp->urq.umem);
+ qp->urq.umem = NULL;
}
-static inline int
-qedr_init_qp_kernel_params_sq(struct qedr_dev *dev,
- struct qedr_qp *qp,
- struct ib_qp_init_attr *attrs,
- struct qed_rdma_create_qp_in_params *params)
+static int qedr_create_user_qp(struct qedr_dev *dev,
+ struct qedr_qp *qp,
+ struct ib_pd *ibpd,
+ struct ib_udata *udata,
+ struct ib_qp_init_attr *attrs)
{
- u32 temp_max_wr;
+ struct qed_rdma_create_qp_in_params in_params;
+ struct qed_rdma_create_qp_out_params out_params;
+ struct qedr_pd *pd = get_qedr_pd(ibpd);
+ struct ib_ucontext *ib_ctx = NULL;
+ struct qedr_ucontext *ctx = NULL;
+ struct qedr_create_qp_ureq ureq;
+ int rc = -EINVAL;
- /* Allocate driver internal SQ array */
- temp_max_wr = attrs->cap.max_send_wr * dev->wq_multiplier;
- temp_max_wr = min_t(u32, temp_max_wr, dev->attr.max_sqe);
+ ib_ctx = ibpd->uobject->context;
+ ctx = get_qedr_ucontext(ib_ctx);
- /* temp_max_wr < attr->max_sqe < u16 so the casting is safe */
- qp->sq.max_wr = (u16)temp_max_wr;
- qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
- GFP_KERNEL);
- if (!qp->wqe_wr_id)
- return -ENOMEM;
+ memset(&ureq, 0, sizeof(ureq));
+ rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
+ if (rc) {
+ DP_ERR(dev, "Problem copying data from user space\n");
+ return rc;
+ }
+
+ /* SQ - read access only (0), dma sync not required (0) */
+ rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
+ ureq.sq_len, 0, 0);
+ if (rc)
+ return rc;
- DP_DEBUG(dev, QEDR_MSG_QP, "SQ max_wr set to %d.\n", qp->sq.max_wr);
+ /* RQ - read access only (0), dma sync not required (0) */
+ rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
+ ureq.rq_len, 0, 0);
- /* QP handle to be written in CQE */
- params->qp_handle_lo = lower_32_bits((uintptr_t)qp);
- params->qp_handle_hi = upper_32_bits((uintptr_t)qp);
+ if (rc)
+ return rc;
+
+ memset(&in_params, 0, sizeof(in_params));
+ qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
+ in_params.qp_handle_lo = ureq.qp_handle_lo;
+ in_params.qp_handle_hi = ureq.qp_handle_hi;
+ in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
+ in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
+ in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
+ in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
+
+ qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
+ &in_params, &out_params);
+
+ if (!qp->qed_qp) {
+ rc = -ENOMEM;
+ goto err1;
+ }
+
+ qp->qp_id = out_params.qp_id;
+ qp->icid = out_params.icid;
+
+ rc = qedr_copy_qp_uresp(dev, qp, udata);
+ if (rc)
+ goto err;
+
+ qedr_qp_user_print(dev, qp);
return 0;
+err:
+ rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
+ if (rc)
+ DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
+
+err1:
+ qedr_cleanup_user(dev, qp);
+ return rc;
}
-static inline int qedr_init_qp_kernel_sq(struct qedr_dev *dev,
- struct qedr_qp *qp,
- struct ib_qp_init_attr *attrs)
+static int
+qedr_roce_create_kernel_qp(struct qedr_dev *dev,
+ struct qedr_qp *qp,
+ struct qed_rdma_create_qp_in_params *in_params,
+ u32 n_sq_elems, u32 n_rq_elems)
{
- u32 n_sq_elems, n_sq_entries;
+ struct qed_rdma_create_qp_out_params out_params;
int rc;
- /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
- * the ring. The ring should allow at least a single WR, even if the
- * user requested none, due to allocation issues.
- */
- n_sq_entries = attrs->cap.max_send_wr;
- n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
- n_sq_entries = max_t(u32, n_sq_entries, 1);
- n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
rc = dev->ops->common->chain_alloc(dev->cdev,
QED_CHAIN_USE_TO_PRODUCE,
QED_CHAIN_MODE_PBL,
QED_CHAIN_CNT_TYPE_U32,
n_sq_elems,
QEDR_SQE_ELEMENT_SIZE,
- &qp->sq.pbl);
- if (rc) {
- DP_ERR(dev, "failed to allocate QP %p SQ\n", qp);
+ &qp->sq.pbl, NULL);
+
+ if (rc)
return rc;
- }
- DP_DEBUG(dev, QEDR_MSG_SQ,
- "SQ Pbl base addr = %llx max_send_wr=%d max_wr=%d capacity=%d, rc=%d\n",
- qed_chain_get_pbl_phys(&qp->sq.pbl), attrs->cap.max_send_wr,
- n_sq_entries, qed_chain_get_capacity(&qp->sq.pbl), rc);
- return 0;
-}
+ in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
+ in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
-static inline int qedr_init_qp_kernel_rq(struct qedr_dev *dev,
- struct qedr_qp *qp,
- struct ib_qp_init_attr *attrs)
-{
- u32 n_rq_elems, n_rq_entries;
- int rc;
-
- /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
- * the ring. There ring should allow at least a single WR, even if the
- * user requested none, due to allocation issues.
- */
- n_rq_entries = max_t(u32, attrs->cap.max_recv_wr, 1);
- n_rq_elems = n_rq_entries * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
rc = dev->ops->common->chain_alloc(dev->cdev,
QED_CHAIN_USE_TO_CONSUME_PRODUCE,
QED_CHAIN_MODE_PBL,
QED_CHAIN_CNT_TYPE_U32,
n_rq_elems,
QEDR_RQE_ELEMENT_SIZE,
- &qp->rq.pbl);
-
- if (rc) {
- DP_ERR(dev, "failed to allocate memory for QP %p RQ\n", qp);
- return -ENOMEM;
- }
+ &qp->rq.pbl, NULL);
+ if (rc)
+ return rc;
- DP_DEBUG(dev, QEDR_MSG_RQ,
- "RQ Pbl base addr = %llx max_recv_wr=%d max_wr=%d capacity=%d, rc=%d\n",
- qed_chain_get_pbl_phys(&qp->rq.pbl), attrs->cap.max_recv_wr,
- n_rq_entries, qed_chain_get_capacity(&qp->rq.pbl), rc);
+ in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
+ in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
- /* n_rq_entries < u16 so the casting is safe */
- qp->rq.max_wr = (u16)n_rq_entries;
+ qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
+ in_params, &out_params);
- return 0;
-}
+ if (!qp->qed_qp)
+ return -EINVAL;
-static inline void
-qedr_init_qp_in_params_sq(struct qedr_dev *dev,
- struct qedr_pd *pd,
- struct qedr_qp *qp,
- struct ib_qp_init_attr *attrs,
- struct ib_udata *udata,
- struct qed_rdma_create_qp_in_params *params)
-{
- /* QP handle to be written in an async event */
- params->qp_handle_async_lo = lower_32_bits((uintptr_t)qp);
- params->qp_handle_async_hi = upper_32_bits((uintptr_t)qp);
+ qp->qp_id = out_params.qp_id;
+ qp->icid = out_params.icid;
- params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
- params->fmr_and_reserved_lkey = !udata;
- params->pd = pd->pd_id;
- params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
- params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
- params->max_sq_sges = 0;
- params->stats_queue = 0;
+ qedr_set_roce_db_info(dev, qp);
- if (udata) {
- params->sq_num_pages = qp->usq.pbl_info.num_pbes;
- params->sq_pbl_ptr = qp->usq.pbl_tbl->pa;
- } else {
- params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
- params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
- }
+ return 0;
}
-static inline void
-qedr_init_qp_in_params_rq(struct qedr_qp *qp,
- struct ib_qp_init_attr *attrs,
- struct ib_udata *udata,
- struct qed_rdma_create_qp_in_params *params)
+static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
{
- params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
- params->srq_id = 0;
- params->use_srq = false;
+ dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
+ kfree(qp->wqe_wr_id);
- if (udata) {
- params->rq_num_pages = qp->urq.pbl_info.num_pbes;
- params->rq_pbl_ptr = qp->urq.pbl_tbl->pa;
- } else {
- params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
- params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
- }
+ dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
+ kfree(qp->rqe_wr_id);
}
-static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
+static int qedr_create_kernel_qp(struct qedr_dev *dev,
+ struct qedr_qp *qp,
+ struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attrs)
{
- DP_DEBUG(dev, QEDR_MSG_QP,
- "create qp: successfully created user QP. qp=%p, sq_addr=0x%llx, sq_len=%zd, rq_addr=0x%llx, rq_len=%zd\n",
- qp, qp->usq.buf_addr, qp->usq.buf_len, qp->urq.buf_addr,
- qp->urq.buf_len);
-}
+ struct qed_rdma_create_qp_in_params in_params;
+ struct qedr_pd *pd = get_qedr_pd(ibpd);
+ int rc = -EINVAL;
+ u32 n_rq_elems;
+ u32 n_sq_elems;
+ u32 n_sq_entries;
-static inline int qedr_init_user_qp(struct ib_ucontext *ib_ctx,
- struct qedr_dev *dev,
- struct qedr_qp *qp,
- struct qedr_create_qp_ureq *ureq)
-{
- int rc;
+ memset(&in_params, 0, sizeof(in_params));
- /* SQ - read access only (0), dma sync not required (0) */
- rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq->sq_addr,
- ureq->sq_len, 0, 0);
- if (rc)
- return rc;
+ /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
+ * the ring. The ring should allow at least a single WR, even if the
+ * user requested none, due to allocation issues.
+ * We should add an extra WR since the prod and cons indices of
+ * wqe_wr_id are managed in such a way that the WQ is considered full
+ * when (prod+1)%max_wr==cons. We currently don't do that because we
+ * double the number of entries due an iSER issue that pushes far more
+ * WRs than indicated. If we decline its ib_post_send() then we get
+ * error prints in the dmesg we'd like to avoid.
+ */
+ qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
+ dev->attr.max_sqe);
- /* RQ - read access only (0), dma sync not required (0) */
- rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq->rq_addr,
- ureq->rq_len, 0, 0);
+ qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
+ GFP_KERNEL);
+ if (!qp->wqe_wr_id) {
+ DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
+ return -ENOMEM;
+ }
- if (rc)
- qedr_cleanup_user_sq(dev, qp);
- return rc;
-}
+ /* QP handle to be written in CQE */
+ in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
+ in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
-static inline int
-qedr_init_kernel_qp(struct qedr_dev *dev,
- struct qedr_qp *qp,
- struct ib_qp_init_attr *attrs,
- struct qed_rdma_create_qp_in_params *params)
-{
- int rc;
+ /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
+ * the ring. There ring should allow at least a single WR, even if the
+ * user requested none, due to allocation issues.
+ */
+ qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
- rc = qedr_init_qp_kernel_sq(dev, qp, attrs);
- if (rc) {
- DP_ERR(dev, "failed to init kernel QP %p SQ\n", qp);
- return rc;
+ /* Allocate driver internal RQ array */
+ qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
+ GFP_KERNEL);
+ if (!qp->rqe_wr_id) {
+ DP_ERR(dev,
+ "create qp: failed RQ shadow memory allocation\n");
+ kfree(qp->wqe_wr_id);
+ return -ENOMEM;
}
- rc = qedr_init_qp_kernel_params_sq(dev, qp, attrs, params);
- if (rc) {
- dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
- DP_ERR(dev, "failed to init kernel QP %p SQ params\n", qp);
- return rc;
- }
+ qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
- rc = qedr_init_qp_kernel_rq(dev, qp, attrs);
- if (rc) {
- qedr_cleanup_kernel_sq(dev, qp);
- DP_ERR(dev, "failed to init kernel QP %p RQ\n", qp);
- return rc;
- }
+ n_sq_entries = attrs->cap.max_send_wr;
+ n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
+ n_sq_entries = max_t(u32, n_sq_entries, 1);
+ n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
- rc = qedr_init_qp_kernel_params_rq(dev, qp, attrs);
- if (rc) {
- DP_ERR(dev, "failed to init kernel QP %p RQ params\n", qp);
- qedr_cleanup_kernel_sq(dev, qp);
- dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
- return rc;
- }
+ n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
+
+ rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
+ n_sq_elems, n_rq_elems);
+ if (rc)
+ qedr_cleanup_kernel(dev, qp);
return rc;
}
@@ -1490,12 +1551,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
struct ib_udata *udata)
{
struct qedr_dev *dev = get_qedr_dev(ibpd->device);
- struct qed_rdma_create_qp_out_params out_params;
- struct qed_rdma_create_qp_in_params in_params;
struct qedr_pd *pd = get_qedr_pd(ibpd);
- struct ib_ucontext *ib_ctx = NULL;
- struct qedr_ucontext *ctx = NULL;
- struct qedr_create_qp_ureq ureq;
struct qedr_qp *qp;
struct ib_qp *ibqp;
int rc = 0;
@@ -1510,101 +1566,42 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
if (attrs->srq)
return ERR_PTR(-EINVAL);
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
-
DP_DEBUG(dev, QEDR_MSG_QP,
- "create qp: sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
+ "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
+ udata ? "user library" : "kernel", attrs->event_handler, pd,
get_qedr_cq(attrs->send_cq),
get_qedr_cq(attrs->send_cq)->icid,
get_qedr_cq(attrs->recv_cq),
get_qedr_cq(attrs->recv_cq)->icid);
- qedr_set_qp_init_params(dev, qp, pd, attrs);
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp) {
+ DP_ERR(dev, "create qp: failed allocating memory\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ qedr_set_common_qp_params(dev, qp, pd, attrs);
if (attrs->qp_type == IB_QPT_GSI) {
- if (udata) {
- DP_ERR(dev,
- "create qp: unexpected udata when creating GSI QP\n");
- goto err0;
- }
ibqp = qedr_create_gsi_qp(dev, attrs, qp);
if (IS_ERR(ibqp))
kfree(qp);
return ibqp;
}
- memset(&in_params, 0, sizeof(in_params));
-
- if (udata) {
- if (!(udata && ibpd->uobject && ibpd->uobject->context))
- goto err0;
-
- ib_ctx = ibpd->uobject->context;
- ctx = get_qedr_ucontext(ib_ctx);
-
- memset(&ureq, 0, sizeof(ureq));
- if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
- DP_ERR(dev,
- "create qp: problem copying data from user space\n");
- goto err0;
- }
-
- rc = qedr_init_user_qp(ib_ctx, dev, qp, &ureq);
- if (rc)
- goto err0;
-
- qedr_init_qp_user_params(&in_params, &ureq);
- } else {
- rc = qedr_init_kernel_qp(dev, qp, attrs, &in_params);
- if (rc)
- goto err0;
- }
-
- qedr_init_qp_in_params_sq(dev, pd, qp, attrs, udata, &in_params);
- qedr_init_qp_in_params_rq(qp, attrs, udata, &in_params);
-
- qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
- &in_params, &out_params);
+ if (udata)
+ rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
+ else
+ rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
- if (!qp->qed_qp)
- goto err1;
+ if (rc)
+ goto err;
- qp->qp_id = out_params.qp_id;
- qp->icid = out_params.icid;
qp->ibqp.qp_num = qp->qp_id;
- if (udata) {
- rc = qedr_copy_qp_uresp(dev, qp, udata);
- if (rc)
- goto err2;
-
- qedr_qp_user_print(dev, qp);
- } else {
- qedr_init_qp_kernel_doorbell_sq(dev, qp);
- qedr_init_qp_kernel_doorbell_rq(dev, qp);
- }
-
- DP_DEBUG(dev, QEDR_MSG_QP, "created %s space QP %p\n",
- udata ? "user" : "kernel", qp);
-
return &qp->ibqp;
-err2:
- rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
- if (rc)
- DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
-err1:
- if (udata) {
- qedr_cleanup_user_sq(dev, qp);
- qedr_cleanup_user_rq(dev, qp);
- } else {
- qedr_cleanup_kernel_sq(dev, qp);
- qedr_cleanup_kernel_rq(dev, qp);
- }
-
-err0:
+err:
kfree(qp);
return ERR_PTR(-EFAULT);
@@ -1772,6 +1769,7 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct qedr_qp *qp = get_qedr_qp(ibqp);
struct qed_rdma_modify_qp_in_params qp_params = { 0 };
struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
enum ib_qp_state old_qp_state, new_qp_state;
int rc = 0;
@@ -1854,17 +1852,17 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
SET_FIELD(qp_params.modify_flags,
QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
- qp_params.traffic_class_tos = attr->ah_attr.grh.traffic_class;
- qp_params.flow_label = attr->ah_attr.grh.flow_label;
- qp_params.hop_limit_ttl = attr->ah_attr.grh.hop_limit;
+ qp_params.traffic_class_tos = grh->traffic_class;
+ qp_params.flow_label = grh->flow_label;
+ qp_params.hop_limit_ttl = grh->hop_limit;
- qp->sgid_idx = attr->ah_attr.grh.sgid_index;
+ qp->sgid_idx = grh->sgid_index;
rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
if (rc) {
DP_ERR(dev,
"modify qp: problems with GID index %d (rc=%d)\n",
- attr->ah_attr.grh.sgid_index, rc);
+ grh->sgid_index, rc);
return rc;
}
@@ -2049,25 +2047,21 @@ int qedr_query_qp(struct ib_qp *ibqp,
qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
qp_init_attr->cap = qp_attr->cap;
- memcpy(&qp_attr->ah_attr.grh.dgid.raw[0], &params.dgid.bytes[0],
- sizeof(qp_attr->ah_attr.grh.dgid.raw));
-
- qp_attr->ah_attr.grh.flow_label = params.flow_label;
- qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
- qp_attr->ah_attr.grh.hop_limit = params.hop_limit_ttl;
- qp_attr->ah_attr.grh.traffic_class = params.traffic_class_tos;
-
- qp_attr->ah_attr.ah_flags = IB_AH_GRH;
- qp_attr->ah_attr.port_num = 1;
- qp_attr->ah_attr.sl = 0;
+ qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
+ params.flow_label, qp->sgid_idx,
+ params.hop_limit_ttl, params.traffic_class_tos);
+ rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
+ rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
+ rdma_ah_set_sl(&qp_attr->ah_attr, 0);
qp_attr->timeout = params.timeout;
qp_attr->rnr_retry = params.rnr_retry;
qp_attr->retry_cnt = params.retry_cnt;
qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
qp_attr->pkey_index = params.pkey_index;
qp_attr->port_num = 1;
- qp_attr->ah_attr.src_path_bits = 0;
- qp_attr->ah_attr.static_rate = 0;
+ rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
+ rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
qp_attr->alt_pkey_index = 0;
qp_attr->alt_port_num = 0;
qp_attr->alt_timeout = 0;
@@ -2085,6 +2079,24 @@ err:
return rc;
}
+int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+ int rc = 0;
+
+ if (qp->qp_type != IB_QPT_GSI) {
+ rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
+ if (rc)
+ return rc;
+ }
+
+ if (qp->ibqp.uobject && qp->ibqp.uobject->context)
+ qedr_cleanup_user(dev, qp);
+ else
+ qedr_cleanup_kernel(dev, qp);
+
+ return 0;
+}
+
int qedr_destroy_qp(struct ib_qp *ibqp)
{
struct qedr_qp *qp = get_qedr_qp(ibqp);
@@ -2107,28 +2119,17 @@ int qedr_destroy_qp(struct ib_qp *ibqp)
qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
}
- if (qp->qp_type != IB_QPT_GSI) {
- rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
- if (rc)
- return rc;
- } else {
+ if (qp->qp_type == IB_QPT_GSI)
qedr_destroy_gsi_qp(dev);
- }
- if (ibqp->uobject && ibqp->uobject->context) {
- qedr_cleanup_user_sq(dev, qp);
- qedr_cleanup_user_rq(dev, qp);
- } else {
- qedr_cleanup_kernel_sq(dev, qp);
- qedr_cleanup_kernel_rq(dev, qp);
- }
+ qedr_free_qp_resources(dev, qp);
kfree(qp);
return rc;
}
-struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
struct ib_udata *udata)
{
struct qedr_ah *ah;
@@ -2182,8 +2183,8 @@ static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
goto done;
info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
- if (!info->pbl_table) {
- rc = -ENOMEM;
+ if (IS_ERR(info->pbl_table)) {
+ rc = PTR_ERR(info->pbl_table);
goto done;
}
@@ -2194,7 +2195,7 @@ static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
* list and allocating another one
*/
tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
- if (!tmp) {
+ if (IS_ERR(tmp)) {
DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
goto done;
}
@@ -2243,7 +2244,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
goto err1;
qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
- &mr->info.pbl_info);
+ &mr->info.pbl_info, mr->umem->page_shift);
rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
if (rc) {
@@ -2264,7 +2265,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
- mr->hw_mr.page_size_log = ilog2(mr->umem->page_size);
+ mr->hw_mr.page_size_log = mr->umem->page_shift;
mr->hw_mr.fbo = ib_umem_offset(mr->umem);
mr->hw_mr.length = len;
mr->hw_mr.vaddr = usr_addr;
@@ -2699,6 +2700,8 @@ static int qedr_prepare_reg(struct qedr_qp *qp,
fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
fwqe1->l_key = wr->key;
+ fwqe2->access_ctrl = 0;
+
SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
!!(wr->access & IB_ACCESS_REMOTE_READ));
SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
@@ -3224,6 +3227,10 @@ static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
case IB_WC_REG_MR:
qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
break;
+ case IB_WC_RDMA_READ:
+ case IB_WC_SEND:
+ wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
+ break;
default:
break;
}
@@ -3345,57 +3352,81 @@ static int qedr_poll_cq_req(struct qedr_dev *dev,
return cnt;
}
-static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
- struct qedr_cq *cq, struct ib_wc *wc,
- struct rdma_cqe_responder *resp, u64 wr_id)
+static inline int qedr_cqe_resp_status_to_ib(u8 status)
{
- enum ib_wc_status wc_status = IB_WC_SUCCESS;
- u8 flags;
-
- wc->opcode = IB_WC_RECV;
- wc->wc_flags = 0;
-
- switch (resp->status) {
+ switch (status) {
case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
- wc_status = IB_WC_LOC_ACCESS_ERR;
- break;
+ return IB_WC_LOC_ACCESS_ERR;
case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
- wc_status = IB_WC_LOC_LEN_ERR;
- break;
+ return IB_WC_LOC_LEN_ERR;
case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
- wc_status = IB_WC_LOC_QP_OP_ERR;
- break;
+ return IB_WC_LOC_QP_OP_ERR;
case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
- wc_status = IB_WC_LOC_PROT_ERR;
- break;
+ return IB_WC_LOC_PROT_ERR;
case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
- wc_status = IB_WC_MW_BIND_ERR;
- break;
+ return IB_WC_MW_BIND_ERR;
case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
- wc_status = IB_WC_REM_INV_RD_REQ_ERR;
- break;
+ return IB_WC_REM_INV_RD_REQ_ERR;
case RDMA_CQE_RESP_STS_OK:
- wc_status = IB_WC_SUCCESS;
- wc->byte_len = le32_to_cpu(resp->length);
+ return IB_WC_SUCCESS;
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+}
+
+static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
+ struct ib_wc *wc)
+{
+ wc->status = IB_WC_SUCCESS;
+ wc->byte_len = le32_to_cpu(resp->length);
- flags = resp->flags & QEDR_RESP_RDMA_IMM;
+ if (resp->flags & QEDR_RESP_IMM) {
+ wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
+ wc->wc_flags |= IB_WC_WITH_IMM;
- if (flags == QEDR_RESP_RDMA_IMM)
+ if (resp->flags & QEDR_RESP_RDMA)
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
- if (flags == QEDR_RESP_RDMA_IMM || flags == QEDR_RESP_IMM) {
- wc->ex.imm_data =
- le32_to_cpu(resp->imm_data_or_inv_r_Key);
- wc->wc_flags |= IB_WC_WITH_IMM;
- }
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- DP_ERR(dev, "Invalid CQE status detected\n");
+ if (resp->flags & QEDR_RESP_INV)
+ return -EINVAL;
+
+ } else if (resp->flags & QEDR_RESP_INV) {
+ wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+
+ if (resp->flags & QEDR_RESP_RDMA)
+ return -EINVAL;
+
+ } else if (resp->flags & QEDR_RESP_RDMA) {
+ return -EINVAL;
}
- /* fill WC */
- wc->status = wc_status;
+ return 0;
+}
+
+static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
+ struct qedr_cq *cq, struct ib_wc *wc,
+ struct rdma_cqe_responder *resp, u64 wr_id)
+{
+ /* Must fill fields before qedr_set_ok_cqe_resp_wc() */
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = 0;
+
+ if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
+ if (qedr_set_ok_cqe_resp_wc(resp, wc))
+ DP_ERR(dev,
+ "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
+ cq, cq->icid, resp->flags);
+
+ } else {
+ wc->status = qedr_cqe_resp_status_to_ib(resp->status);
+ if (wc->status == IB_WC_GENERAL_ERR)
+ DP_ERR(dev,
+ "CQ %p (icid=%d) contains an invalid CQE status %d\n",
+ cq, cq->icid, resp->status);
+ }
+
+ /* Fill the rest of the WC */
wc->vendor_err = 0;
wc->src_qp = qp->id;
wc->qp = &qp->ibqp;
@@ -3490,6 +3521,13 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
int update = 0;
int done = 0;
+ if (cq->destroyed) {
+ DP_ERR(dev,
+ "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
+ cq, cq->icid);
+ return 0;
+ }
+
if (cq->cq_type == QEDR_CQ_TYPE_GSI)
return qedr_gsi_poll_cq(ibcq, num_entries, wc);
@@ -3569,14 +3607,15 @@ int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = qedr_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
- RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 070677ca4d19..0f8ab49d5a1a 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -70,7 +70,7 @@ int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *);
int qedr_destroy_qp(struct ib_qp *ibqp);
-struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
struct ib_udata *udata);
int qedr_destroy_ah(struct ib_ah *ibah);
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index 1d6e63eb1146..a4a1f56ce824 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -742,11 +742,7 @@ struct qib_tid_session_member {
#define SIZE_OF_CRC 1
#define QIB_DEFAULT_P_KEY 0xFFFF
-#define QIB_AETH_CREDIT_SHIFT 24
-#define QIB_AETH_CREDIT_MASK 0x1F
-#define QIB_AETH_CREDIT_INVAL 0x1F
#define QIB_PSN_MASK 0xFFFFFF
-#define QIB_MSN_MASK 0xFFFFFF
#define QIB_EAGER_TID_ID QLOGIC_IB_I_TID_MASK
#define QIB_MULTICAST_QPN 0xFFFFFF
diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c
deleted file mode 100644
index 59fe092b4b0f..000000000000
--- a/drivers/infiniband/hw/qib/qib_dma.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (c) 2006, 2009, 2010 QLogic, Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/types.h>
-#include <linux/scatterlist.h>
-
-#include "qib_verbs.h"
-
-#define BAD_DMA_ADDRESS ((u64) 0)
-
-/*
- * The following functions implement driver specific replacements
- * for the ib_dma_*() functions.
- *
- * These functions return kernel virtual addresses instead of
- * device bus addresses since the driver uses the CPU to copy
- * data instead of using hardware DMA.
- */
-
-static int qib_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
- return dma_addr == BAD_DMA_ADDRESS;
-}
-
-static u64 qib_dma_map_single(struct ib_device *dev, void *cpu_addr,
- size_t size, enum dma_data_direction direction)
-{
- BUG_ON(!valid_dma_direction(direction));
- return (u64) cpu_addr;
-}
-
-static void qib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- BUG_ON(!valid_dma_direction(direction));
-}
-
-static u64 qib_dma_map_page(struct ib_device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction)
-{
- u64 addr;
-
- BUG_ON(!valid_dma_direction(direction));
-
- if (offset + size > PAGE_SIZE) {
- addr = BAD_DMA_ADDRESS;
- goto done;
- }
-
- addr = (u64) page_address(page);
- if (addr)
- addr += offset;
- /* TODO: handle highmem pages */
-
-done:
- return addr;
-}
-
-static void qib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- BUG_ON(!valid_dma_direction(direction));
-}
-
-static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction)
-{
- struct scatterlist *sg;
- u64 addr;
- int i;
- int ret = nents;
-
- BUG_ON(!valid_dma_direction(direction));
-
- for_each_sg(sgl, sg, nents, i) {
- addr = (u64) page_address(sg_page(sg));
- /* TODO: handle highmem pages */
- if (!addr) {
- ret = 0;
- break;
- }
- sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
- sg->dma_length = sg->length;
-#endif
- }
- return ret;
-}
-
-static void qib_unmap_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- BUG_ON(!valid_dma_direction(direction));
-}
-
-static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr,
- size_t size, enum dma_data_direction dir)
-{
-}
-
-static void qib_sync_single_for_device(struct ib_device *dev, u64 addr,
- size_t size,
- enum dma_data_direction dir)
-{
-}
-
-static void *qib_dma_alloc_coherent(struct ib_device *dev, size_t size,
- u64 *dma_handle, gfp_t flag)
-{
- struct page *p;
- void *addr = NULL;
-
- p = alloc_pages(flag, get_order(size));
- if (p)
- addr = page_address(p);
- if (dma_handle)
- *dma_handle = (u64) addr;
- return addr;
-}
-
-static void qib_dma_free_coherent(struct ib_device *dev, size_t size,
- void *cpu_addr, u64 dma_handle)
-{
- free_pages((unsigned long) cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops qib_dma_mapping_ops = {
- .mapping_error = qib_mapping_error,
- .map_single = qib_dma_map_single,
- .unmap_single = qib_dma_unmap_single,
- .map_page = qib_dma_map_page,
- .unmap_page = qib_dma_unmap_page,
- .map_sg = qib_map_sg,
- .unmap_sg = qib_unmap_sg,
- .sync_single_for_cpu = qib_sync_single_for_cpu,
- .sync_single_for_device = qib_sync_single_for_device,
- .alloc_coherent = qib_dma_alloc_coherent,
- .free_coherent = qib_dma_free_coherent
-};
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 2d1eacf1dfed..9396c1807cc3 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -893,7 +893,7 @@ bail:
/*
* qib_file_vma_fault - handle a VMA page fault.
*/
-static int qib_file_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int qib_file_vma_fault(struct vm_fault *vmf)
{
struct page *page;
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index f1e66efea98a..1d940a2885c9 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -512,7 +512,7 @@ static int qibfs_fill_super(struct super_block *sb, void *data, int silent)
unsigned long flags;
int ret;
- static struct tree_descr files[] = {
+ static const struct tree_descr files[] = {
[2] = {"driver_stats", &driver_ops[0], S_IRUGO},
[3] = {"driver_stats_names", &driver_ops[1], S_IRUGO},
{""},
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 92399d3ffd15..e423b71e6ea0 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -707,7 +707,7 @@ static void qib_6120_clear_freeze(struct qib_devdata *dd)
/* disable error interrupts, to avoid confusion */
qib_write_kreg(dd, kr_errmask, 0ULL);
- /* also disable interrupts; errormask is sometimes overwriten */
+ /* also disable interrupts; errormask is sometimes overwritten */
qib_6120_set_intr_state(dd, 0);
qib_cancel_sends(dd->pport);
@@ -3295,13 +3295,11 @@ static int init_6120_variables(struct qib_devdata *dd)
dd->rhdrhead_intr_off = 1ULL << 32;
/* setup the stats timer; the add_timer is done at end of init */
- init_timer(&dd->stats_timer);
- dd->stats_timer.function = qib_get_6120_faststats;
- dd->stats_timer.data = (unsigned long) dd;
+ setup_timer(&dd->stats_timer, qib_get_6120_faststats,
+ (unsigned long)dd);
- init_timer(&dd->cspec->pma_timer);
- dd->cspec->pma_timer.function = pma_6120_timer;
- dd->cspec->pma_timer.data = (unsigned long) ppd;
+ setup_timer(&dd->cspec->pma_timer, pma_6120_timer,
+ (unsigned long)ppd);
dd->ureg_align = qib_read_kreg32(dd, kr_palign);
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index e55e31a69195..c3679c48e61c 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -1259,7 +1259,7 @@ static void qib_7220_clear_freeze(struct qib_devdata *dd)
/* disable error interrupts, to avoid confusion */
qib_write_kreg(dd, kr_errmask, 0ULL);
- /* also disable interrupts; errormask is sometimes overwriten */
+ /* also disable interrupts; errormask is sometimes overwritten */
qib_7220_set_intr_state(dd, 0);
qib_cancel_sends(dd->pport);
@@ -4074,9 +4074,8 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
if (!qib_mini_init)
qib_write_kreg(dd, kr_rcvbthqp, QIB_KD_QP);
- init_timer(&ppd->cpspec->chase_timer);
- ppd->cpspec->chase_timer.function = reenable_7220_chase;
- ppd->cpspec->chase_timer.data = (unsigned long)ppd;
+ setup_timer(&ppd->cpspec->chase_timer, reenable_7220_chase,
+ (unsigned long)ppd);
qib_num_cfg_vls = 1; /* if any 7220's, only one VL */
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index c4a3616062f1..bb2439fff8fa 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -2053,7 +2053,7 @@ static void qib_7322_clear_freeze(struct qib_devdata *dd)
qib_write_kreg_port(dd->pport + pidx, krp_errmask,
0ULL);
- /* also disable interrupts; errormask is sometimes overwriten */
+ /* also disable interrupts; errormask is sometimes overwritten */
qib_7322_set_intr_state(dd, 0);
/* clear the freeze, and be sure chip saw it */
@@ -2893,7 +2893,6 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd)
dd->cspec->gpio_mask &= ~mask;
qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask);
spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags);
- qib_qsfp_deinit(&dd->pport[i].cpspec->qsfp_data);
}
}
}
@@ -6612,9 +6611,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
if (!qib_mini_init)
write_7322_init_portregs(ppd);
- init_timer(&cp->chase_timer);
- cp->chase_timer.function = reenable_chase;
- cp->chase_timer.data = (unsigned long)ppd;
+ setup_timer(&cp->chase_timer, reenable_chase,
+ (unsigned long)ppd);
ppd++;
}
@@ -6640,9 +6638,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
(u64) rcv_int_count << IBA7322_HDRHEAD_PKTINT_SHIFT;
/* setup the stats timer; the add_timer is done at end of init */
- init_timer(&dd->stats_timer);
- dd->stats_timer.function = qib_get_7322_faststats;
- dd->stats_timer.data = (unsigned long) dd;
+ setup_timer(&dd->stats_timer, qib_get_7322_faststats,
+ (unsigned long)dd);
dd->ureg_align = 0x10000; /* 64KB alignment */
@@ -7069,7 +7066,7 @@ static void qib_7322_txchk_change(struct qib_devdata *dd, u32 start,
unsigned long flags;
while (wait) {
- unsigned long shadow;
+ unsigned long shadow = 0;
int cstart, previ = -1;
/*
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index b50240b1d5a4..6c16ba1107ba 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -233,9 +233,8 @@ int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
spin_lock_init(&ppd->cc_shadow_lock);
init_waitqueue_head(&ppd->state_wait);
- init_timer(&ppd->symerr_clear_timer);
- ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup;
- ppd->symerr_clear_timer.data = (unsigned long)ppd;
+ setup_timer(&ppd->symerr_clear_timer, qib_clear_symerror_on_linkup,
+ (unsigned long)ppd);
ppd->qib_wq = NULL;
ppd->ibport_data.pmastats =
@@ -429,9 +428,8 @@ static int loadtime_init(struct qib_devdata *dd)
qib_get_eeprom_info(dd);
/* setup time (don't start yet) to verify we got interrupt */
- init_timer(&dd->intrchk_timer);
- dd->intrchk_timer.function = verify_interrupt;
- dd->intrchk_timer.data = (unsigned long) dd;
+ setup_timer(&dd->intrchk_timer, verify_interrupt,
+ (unsigned long)dd);
done:
return ret;
}
@@ -755,9 +753,8 @@ done:
continue;
if (dd->flags & QIB_HAS_SEND_DMA)
ret = qib_setup_sdma(ppd);
- init_timer(&ppd->hol_timer);
- ppd->hol_timer.function = qib_hol_event;
- ppd->hol_timer.data = (unsigned long)ppd;
+ setup_timer(&ppd->hol_timer, qib_hol_event,
+ (unsigned long)ppd);
ppd->hol_state = QIB_HOL_UP;
}
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index 2c3c93572c17..8fdf79f8d4e4 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -158,10 +158,7 @@ int qib_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
unsigned n, m;
size_t off;
- /*
- * We use RKEY == zero for kernel virtual addresses
- * (see qib_get_dma_mr and qib_dma.c).
- */
+ /* We use RKEY == zero for kernel virtual addresses */
rcu_read_lock();
if (rkey == 0) {
struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index d2ac29861af5..da295e0392ed 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -717,9 +717,10 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
spin_lock_irqsave(&ibp->rvp.lock, flags);
if (ibp->rvp.sm_ah) {
if (smlid != ibp->rvp.sm_lid)
- ibp->rvp.sm_ah->attr.dlid = smlid;
+ rdma_ah_set_dlid(&ibp->rvp.sm_ah->attr,
+ smlid);
if (msl != ibp->rvp.sm_sl)
- ibp->rvp.sm_ah->attr.sl = msl;
+ rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl);
}
spin_unlock_irqrestore(&ibp->rvp.lock, flags);
if (smlid != ibp->rvp.sm_lid)
@@ -2500,5 +2501,5 @@ void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx)
del_timer_sync(&dd->pport[port_idx].cong_stats.timer);
if (dd->pport[port_idx].ibport_data.smi_ah)
- ib_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah);
+ rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah);
}
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 6abe1c621aa4..c379b8342a09 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -682,13 +682,6 @@ qib_pci_slot_reset(struct pci_dev *pdev)
return PCI_ERS_RESULT_CAN_RECOVER;
}
-static pci_ers_result_t
-qib_pci_link_reset(struct pci_dev *pdev)
-{
- qib_devinfo(pdev, "QIB link_reset function called, ignored\n");
- return PCI_ERS_RESULT_CAN_RECOVER;
-}
-
static void
qib_pci_resume(struct pci_dev *pdev)
{
@@ -707,7 +700,6 @@ qib_pci_resume(struct pci_dev *pdev)
const struct pci_error_handlers qib_pci_err_handler = {
.error_detected = qib_pci_error_detected,
.mmio_enabled = qib_pci_mmio_enabled,
- .link_reset = qib_pci_link_reset,
.slot_reset = qib_pci_slot_reset,
.resume = qib_pci_resume,
};
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 99d31efe4c2f..a343e3b5d4cb 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -61,43 +61,6 @@ static inline unsigned find_next_offset(struct rvt_qpn_table *qpt,
return off;
}
-/*
- * Convert the AETH credit code into the number of credits.
- */
-static u32 credit_table[31] = {
- 0, /* 0 */
- 1, /* 1 */
- 2, /* 2 */
- 3, /* 3 */
- 4, /* 4 */
- 6, /* 5 */
- 8, /* 6 */
- 12, /* 7 */
- 16, /* 8 */
- 24, /* 9 */
- 32, /* A */
- 48, /* B */
- 64, /* C */
- 96, /* D */
- 128, /* E */
- 192, /* F */
- 256, /* 10 */
- 384, /* 11 */
- 512, /* 12 */
- 768, /* 13 */
- 1024, /* 14 */
- 1536, /* 15 */
- 2048, /* 16 */
- 3072, /* 17 */
- 4096, /* 18 */
- 6144, /* 19 */
- 8192, /* 1A */
- 12288, /* 1B */
- 16384, /* 1C */
- 24576, /* 1D */
- 32768 /* 1E */
-};
-
const struct rvt_operation_params qib_post_parms[RVT_OPERATION_MAX] = {
[IB_WR_RDMA_WRITE] = {
.length = sizeof(struct ib_rdma_wr),
@@ -141,10 +104,9 @@ const struct rvt_operation_params qib_post_parms[RVT_OPERATION_MAX] = {
};
-static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map,
- gfp_t gfp)
+static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map)
{
- unsigned long page = get_zeroed_page(gfp);
+ unsigned long page = get_zeroed_page(GFP_KERNEL);
/*
* Free the page if someone raced with us installing it.
@@ -163,7 +125,7 @@ static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map,
* zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
*/
int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
- enum ib_qp_type type, u8 port, gfp_t gfp)
+ enum ib_qp_type type, u8 port)
{
u32 i, offset, max_scan, qpn;
struct rvt_qpn_map *map;
@@ -197,7 +159,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
max_scan = qpt->nmaps - !offset;
for (i = 0;;) {
if (unlikely(!map->page)) {
- get_map_page(qpt, map, gfp);
+ get_map_page(qpt, map);
if (unlikely(!map->page))
break;
}
@@ -354,76 +316,16 @@ u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
return ib_mtu_enum_to_int(pmtu);
}
-/**
- * qib_compute_aeth - compute the AETH (syndrome + MSN)
- * @qp: the queue pair to compute the AETH for
- *
- * Returns the AETH.
- */
-__be32 qib_compute_aeth(struct rvt_qp *qp)
-{
- u32 aeth = qp->r_msn & QIB_MSN_MASK;
-
- if (qp->ibqp.srq) {
- /*
- * Shared receive queues don't generate credits.
- * Set the credit field to the invalid value.
- */
- aeth |= QIB_AETH_CREDIT_INVAL << QIB_AETH_CREDIT_SHIFT;
- } else {
- u32 min, max, x;
- u32 credits;
- struct rvt_rwq *wq = qp->r_rq.wq;
- u32 head;
- u32 tail;
-
- /* sanity check pointers before trusting them */
- head = wq->head;
- if (head >= qp->r_rq.size)
- head = 0;
- tail = wq->tail;
- if (tail >= qp->r_rq.size)
- tail = 0;
- /*
- * Compute the number of credits available (RWQEs).
- * XXX Not holding the r_rq.lock here so there is a small
- * chance that the pair of reads are not atomic.
- */
- credits = head - tail;
- if ((int)credits < 0)
- credits += qp->r_rq.size;
- /*
- * Binary search the credit table to find the code to
- * use.
- */
- min = 0;
- max = 31;
- for (;;) {
- x = (min + max) / 2;
- if (credit_table[x] == credits)
- break;
- if (credit_table[x] > credits)
- max = x;
- else if (min == x)
- break;
- else
- min = x;
- }
- aeth |= x << QIB_AETH_CREDIT_SHIFT;
- }
- return cpu_to_be32(aeth);
-}
-
-void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp)
+void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
{
struct qib_qp_priv *priv;
- priv = kzalloc(sizeof(*priv), gfp);
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return ERR_PTR(-ENOMEM);
priv->owner = qp;
- priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), gfp);
+ priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), GFP_KERNEL);
if (!priv->s_hdr) {
kfree(priv);
return ERR_PTR(-ENOMEM);
@@ -448,7 +350,6 @@ void qib_stop_send_queue(struct rvt_qp *qp)
struct qib_qp_priv *priv = qp->priv;
cancel_work_sync(&priv->s_work);
- del_timer_sync(&qp->s_timer);
}
void qib_quiesce_qp(struct rvt_qp *qp)
@@ -474,43 +375,6 @@ void qib_flush_qp_waiters(struct rvt_qp *qp)
}
/**
- * qib_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
- * @aeth: the Acknowledge Extended Transport Header
- *
- * The QP s_lock should be held.
- */
-void qib_get_credit(struct rvt_qp *qp, u32 aeth)
-{
- u32 credit = (aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK;
-
- /*
- * If the credit is invalid, we can send
- * as many packets as we like. Otherwise, we have to
- * honor the credit field.
- */
- if (credit == QIB_AETH_CREDIT_INVAL) {
- if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
- qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
- if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
- qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
- qib_schedule_send(qp);
- }
- }
- } else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
- /* Compute new LSN (i.e., MSN + credit) */
- credit = (aeth + credit_table[credit]) & QIB_MSN_MASK;
- if (qib_cmp24(credit, qp->s_lsn) > 0) {
- qp->s_lsn = credit;
- if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
- qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
- qib_schedule_send(qp);
- }
- }
- }
-}
-
-/**
* qib_check_send_wqe - validate wr/wqe
* @qp - The qp
* @wqe - The built wqe
@@ -624,7 +488,7 @@ void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
qp->s_last, qp->s_acked, qp->s_cur,
qp->s_tail, qp->s_head, qp->s_size,
qp->remote_qpn,
- qp->remote_ah_attr.dlid);
+ rdma_ah_get_dlid(&qp->remote_ah_attr));
}
#endif
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c
index 4c7c3c84a741..295d40a83bb6 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.c
+++ b/drivers/infiniband/hw/qib/qib_qsfp.c
@@ -485,16 +485,6 @@ void qib_qsfp_init(struct qib_qsfp_data *qd,
dd->f_gpio_mod(dd, mask, mask, mask);
}
-void qib_qsfp_deinit(struct qib_qsfp_data *qd)
-{
- /*
- * There is nothing to do here for now. our work is scheduled
- * with queue_work(), and flush_workqueue() from remove_one
- * will block until all work setup with queue_work()
- * completes.
- */
-}
-
int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len)
{
struct qib_qsfp_cache cd;
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h
index 91908f533a2b..ad8dbd6ac0cf 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.h
+++ b/drivers/infiniband/hw/qib/qib_qsfp.h
@@ -186,4 +186,3 @@ extern int qib_refresh_qsfp_cache(struct qib_pportdata *ppd,
extern int qib_qsfp_mod_present(struct qib_pportdata *ppd);
extern void qib_qsfp_init(struct qib_qsfp_data *qd,
void (*fevent)(struct work_struct *));
-extern void qib_qsfp_deinit(struct qib_qsfp_data *qd);
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 031433cb7206..4ddbcac5eabe 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -38,7 +38,6 @@
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_RC_##x
-static void rc_timeout(unsigned long arg);
static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
@@ -50,19 +49,10 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
ss->sg_list = wqe->sg_list + 1;
ss->num_sge = wqe->wr.num_sge;
ss->total_len = wqe->length;
- qib_skip_sge(ss, len, 0);
+ rvt_skip_sge(ss, len, false);
return wqe->length - len;
}
-static void start_timer(struct rvt_qp *qp)
-{
- qp->s_flags |= RVT_S_TIMER;
- qp->s_timer.function = rc_timeout;
- /* 4.096 usec. * (1 << qp->timeout) */
- qp->s_timer.expires = jiffies + qp->timeout_jiffies;
- add_timer(&qp->s_timer);
-}
-
/**
* qib_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
* @dev: the device for this QP
@@ -144,7 +134,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
e->sent = 1;
}
- ohdr->u.aeth = qib_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
qp->s_ack_rdma_psn = e->psn;
bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
@@ -153,7 +143,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
qp->s_cur_sge = NULL;
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
- ohdr->u.at.aeth = qib_compute_aeth(qp);
+ ohdr->u.at.aeth = rvt_compute_aeth(qp);
ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
hwords += sizeof(ohdr->u.at) / sizeof(u32);
bth2 = e->psn & QIB_PSN_MASK;
@@ -174,7 +164,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
if (len > pmtu)
len = pmtu;
else {
- ohdr->u.aeth = qib_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
@@ -197,11 +187,11 @@ normal:
qp->s_cur_sge = NULL;
if (qp->s_nak_state)
ohdr->u.aeth =
- cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
+ cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
(qp->s_nak_state <<
- QIB_AETH_CREDIT_SHIFT));
+ IB_AETH_CREDIT_SHIFT));
else
- ohdr->u.aeth = qib_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
len = 0;
bth0 = OP(ACKNOWLEDGE) << 24;
@@ -244,7 +234,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
int delta;
ohdr = &priv->s_hdr->u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
ohdr = &priv->s_hdr->u.l.oth;
/* Sending responses has higher priority over sending requests. */
@@ -257,7 +247,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
goto bail;
/* We are in the error state, flush the work request. */
smp_read_barrier_depends(); /* see post_one_send() */
- if (qp->s_last == ACCESS_ONCE(qp->s_head))
+ if (qp->s_last == READ_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&priv->s_dma_busy)) {
@@ -303,7 +293,8 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
newreq = 0;
if (qp->s_cur == qp->s_tail) {
/* Check if send work queue is empty. */
- if (qp->s_tail == qp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (qp->s_tail == READ_ONCE(qp->s_head))
goto bail;
/*
* If a fence is requested, wait for previous
@@ -330,7 +321,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
case IB_WR_SEND_WITH_IMM:
/* If no credit, return. */
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
@@ -361,7 +352,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
@@ -646,9 +637,11 @@ void qib_send_rc_ack(struct rvt_qp *qp)
lrh0 = QIB_LRH_BTH;
/* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
hwords = 6;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH)) {
hwords += qib_make_grh(ibp, &hdr.u.l.grh,
- &qp->remote_ah_attr.grh, hwords, 0);
+ rdma_ah_read_grh(&qp->remote_ah_attr),
+ hwords, 0);
ohdr = &hdr.u.l.oth;
lrh0 = QIB_LRH_GRH;
}
@@ -657,17 +650,18 @@ void qib_send_rc_ack(struct rvt_qp *qp)
if (qp->s_mig_state == IB_MIG_MIGRATED)
bth0 |= IB_BTH_MIG_REQ;
if (qp->r_nak_state)
- ohdr->u.aeth = cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
+ ohdr->u.aeth = cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
(qp->r_nak_state <<
- QIB_AETH_CREDIT_SHIFT));
+ IB_AETH_CREDIT_SHIFT));
else
- ohdr->u.aeth = qib_compute_aeth(qp);
- lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
- qp->remote_ah_attr.sl << 4;
+ ohdr->u.aeth = rvt_compute_aeth(qp);
+ lrh0 |= ibp->sl_to_vl[rdma_ah_get_sl(&qp->remote_ah_attr)] << 12 |
+ rdma_ah_get_sl(&qp->remote_ah_attr) << 4;
hdr.lrh[0] = cpu_to_be16(lrh0);
- hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ hdr.lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr));
hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
- hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
+ hdr.lrh[3] = cpu_to_be16(ppd->lid |
+ rdma_ah_get_path_bits(&qp->remote_ah_attr));
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & QIB_PSN_MASK);
@@ -836,7 +830,7 @@ done:
* Back up requester to resend the last un-ACKed request.
* The QP r_lock and s_lock should be held and interrupts disabled.
*/
-static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
+void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
{
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
struct qib_ibport *ibp;
@@ -869,46 +863,6 @@ static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
}
/*
- * This is called from s_timer for missing responses.
- */
-static void rc_timeout(unsigned long arg)
-{
- struct rvt_qp *qp = (struct rvt_qp *)arg;
- struct qib_ibport *ibp;
- unsigned long flags;
-
- spin_lock_irqsave(&qp->r_lock, flags);
- spin_lock(&qp->s_lock);
- if (qp->s_flags & RVT_S_TIMER) {
- ibp = to_iport(qp->ibqp.device, qp->port_num);
- ibp->rvp.n_rc_timeouts++;
- qp->s_flags &= ~RVT_S_TIMER;
- del_timer(&qp->s_timer);
- qib_restart_rc(qp, qp->s_last_psn + 1, 1);
- qib_schedule_send(qp);
- }
- spin_unlock(&qp->s_lock);
- spin_unlock_irqrestore(&qp->r_lock, flags);
-}
-
-/*
- * This is called from s_timer for RNR timeouts.
- */
-void qib_rc_rnr_retry(unsigned long arg)
-{
- struct rvt_qp *qp = (struct rvt_qp *)arg;
- unsigned long flags;
-
- spin_lock_irqsave(&qp->s_lock, flags);
- if (qp->s_flags & RVT_S_WAIT_RNR) {
- qp->s_flags &= ~RVT_S_WAIT_RNR;
- del_timer(&qp->s_timer);
- qib_schedule_send(qp);
- }
- spin_unlock_irqrestore(&qp->s_lock, flags);
-}
-
-/*
* Set qp->s_sending_psn to the next PSN after the given one.
* This would be psn+1 except when RDMA reads are present.
*/
@@ -944,7 +898,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
u32 opcode;
u32 psn;
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK))
return;
/* Find out where the BTH is */
@@ -971,7 +925,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
!(qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
- start_timer(qp);
+ rvt_add_retry_timer(qp);
while (qp->s_last != qp->s_acked) {
u32 s_last;
@@ -987,7 +941,10 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
/* see post_send() */
barrier();
rvt_put_swqe(wqe);
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_qib_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before resending,
@@ -1032,7 +989,10 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
qp->s_last = s_last;
/* see post_send() */
barrier();
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_qib_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
} else
this_cpu_inc(*ibp->rvp.rc_delayed_comp);
@@ -1084,12 +1044,6 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
u32 ack_psn;
int diff;
- /* Remove QP from retry timer */
- if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
- qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
- del_timer(&qp->s_timer);
- }
-
/*
* Note that NAKs implicitly ACK outstanding SEND and RDMA write
* requests and implicitly NAK RDMA read and atomic requests issued
@@ -1097,7 +1051,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
* request but will include an ACK'ed request(s).
*/
ack_psn = psn;
- if (aeth >> 29)
+ if (aeth >> IB_AETH_NAK_SHIFT)
ack_psn--;
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
ibp = to_iport(qp->ibqp.device, qp->port_num);
@@ -1177,7 +1131,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
break;
}
- switch (aeth >> 29) {
+ switch (aeth >> IB_AETH_NAK_SHIFT) {
case 0: /* ACK */
this_cpu_inc(*ibp->rvp.rc_acks);
if (qp->s_acked != qp->s_tail) {
@@ -1185,27 +1139,30 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
* We are expecting more ACKs so
* reset the retransmit timer.
*/
- start_timer(qp);
+ rvt_mod_retry_timer(qp);
/*
* We can stop resending the earlier packets and
* continue with the next packet the receiver wants.
*/
if (qib_cmp24(qp->s_psn, psn) <= 0)
reset_psn(qp, psn + 1);
- } else if (qib_cmp24(qp->s_psn, psn) <= 0) {
- qp->s_state = OP(SEND_LAST);
- qp->s_psn = psn + 1;
+ } else {
+ /* No more acks - kill all timers */
+ rvt_stop_rc_timers(qp);
+ if (qib_cmp24(qp->s_psn, psn) <= 0) {
+ qp->s_state = OP(SEND_LAST);
+ qp->s_psn = psn + 1;
+ }
}
if (qp->s_flags & RVT_S_WAIT_ACK) {
qp->s_flags &= ~RVT_S_WAIT_ACK;
qib_schedule_send(qp);
}
- qib_get_credit(qp, aeth);
+ rvt_get_credit(qp, aeth);
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
qp->s_retry = qp->s_retry_cnt;
update_last_psn(qp, psn);
- ret = 1;
- goto bail;
+ return 1;
case 1: /* RNR NAK */
ibp->rvp.n_rnr_naks++;
@@ -1228,21 +1185,17 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
reset_psn(qp, psn);
qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
- qp->s_flags |= RVT_S_WAIT_RNR;
- qp->s_timer.function = qib_rc_rnr_retry;
- qp->s_timer.expires = jiffies + usecs_to_jiffies(
- ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) &
- QIB_AETH_CREDIT_MASK]);
- add_timer(&qp->s_timer);
- goto bail;
+ rvt_stop_rc_timers(qp);
+ rvt_add_rnr_timer(qp, aeth);
+ return 0;
case 3: /* NAK */
if (qp->s_acked == qp->s_tail)
goto bail;
/* The last valid PSN is the previous PSN. */
update_last_psn(qp, psn - 1);
- switch ((aeth >> QIB_AETH_CREDIT_SHIFT) &
- QIB_AETH_CREDIT_MASK) {
+ switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
+ IB_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
ibp->rvp.n_seq_naks++;
/*
@@ -1290,6 +1243,7 @@ reserved:
}
bail:
+ rvt_stop_rc_timers(qp);
return ret;
}
@@ -1303,10 +1257,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
struct rvt_swqe *wqe;
/* Remove QP from retry timer */
- if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
- qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
- del_timer(&qp->s_timer);
- }
+ rvt_stop_rc_timers(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
@@ -1390,7 +1341,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
/* Ignore invalid responses. */
smp_read_barrier_depends(); /* see post_one_send */
- if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
+ if (qib_cmp24(psn, READ_ONCE(qp->s_next_psn)) >= 0)
goto ack_done;
/* Ignore duplicate responses. */
@@ -1399,8 +1350,8 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
/* Update credits for "ghost" ACKs */
if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
aeth = be32_to_cpu(ohdr->u.aeth);
- if ((aeth >> 29) == 0)
- qib_get_credit(qp, aeth);
+ if ((aeth >> IB_AETH_NAK_SHIFT) == 0)
+ rvt_get_credit(qp, aeth);
}
goto ack_done;
}
@@ -1461,8 +1412,7 @@ read_middle:
* We got a response so update the timeout.
* 4.096 usec. * (1 << qp->timeout)
*/
- qp->s_flags |= RVT_S_TIMER;
- mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
+ rvt_mod_retry_timer(qp);
if (qp->s_flags & RVT_S_WAIT_ACK) {
qp->s_flags &= ~RVT_S_WAIT_ACK;
qib_schedule_send(qp);
@@ -1764,25 +1714,6 @@ send_ack:
return 0;
}
-void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
-{
- unsigned long flags;
- int lastwqe;
-
- spin_lock_irqsave(&qp->s_lock, flags);
- lastwqe = rvt_error_qp(qp, err);
- spin_unlock_irqrestore(&qp->s_lock, flags);
-
- if (lastwqe) {
- struct ib_event ev;
-
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
-}
-
static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n)
{
unsigned next;
@@ -1894,17 +1825,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
break;
}
- if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
- qp->r_flags |= RVT_R_COMM_EST;
- if (qp->ibqp.event_handler) {
- struct ib_event ev;
-
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_COMM_EST;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
- }
+ if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
+ rvt_comm_est(qp);
/* OK, process the packet. */
switch (opcode) {
@@ -1985,8 +1907,8 @@ send_last:
wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
@@ -2034,8 +1956,10 @@ send_last:
ret = qib_get_rwqe(qp, 1);
if (ret < 0)
goto nack_op_err;
- if (!ret)
+ if (!ret) {
+ rvt_put_ss(&qp->r_sge);
goto rnr_nak;
+ }
wc.ex.imm_data = ohdr->u.rc.imm_data;
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
@@ -2196,7 +2120,7 @@ rnr_nak:
return;
nack_op_err:
- qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
@@ -2210,7 +2134,7 @@ nack_op_err:
nack_inv_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_inv:
- qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
@@ -2224,7 +2148,7 @@ nack_inv:
nack_acc_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_acc:
- qib_rc_error(qp, IB_WC_LOC_PROT_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
send_ack:
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index e54a2feeeb10..bd09de7c6e56 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -38,44 +38,6 @@
#include "qib_mad.h"
/*
- * Convert the AETH RNR timeout code into the number of microseconds.
- */
-const u32 ib_qib_rnr_table[32] = {
- 655360, /* 00: 655.36 */
- 10, /* 01: .01 */
- 20, /* 02 .02 */
- 30, /* 03: .03 */
- 40, /* 04: .04 */
- 60, /* 05: .06 */
- 80, /* 06: .08 */
- 120, /* 07: .12 */
- 160, /* 08: .16 */
- 240, /* 09: .24 */
- 320, /* 0A: .32 */
- 480, /* 0B: .48 */
- 640, /* 0C: .64 */
- 960, /* 0D: .96 */
- 1280, /* 0E: 1.28 */
- 1920, /* 0F: 1.92 */
- 2560, /* 10: 2.56 */
- 3840, /* 11: 3.84 */
- 5120, /* 12: 5.12 */
- 7680, /* 13: 7.68 */
- 10240, /* 14: 10.24 */
- 15360, /* 15: 15.36 */
- 20480, /* 16: 20.48 */
- 30720, /* 17: 30.72 */
- 40960, /* 18: 40.96 */
- 61440, /* 19: 61.44 */
- 81920, /* 1A: 81.92 */
- 122880, /* 1B: 122.88 */
- 163840, /* 1C: 163.84 */
- 245760, /* 1D: 245.76 */
- 327680, /* 1E: 327.68 */
- 491520 /* 1F: 491.52 */
-};
-
-/*
* Validate a RWQE and fill in the SGE state.
* Return 1 if OK.
*/
@@ -233,7 +195,7 @@ void qib_migrate_qp(struct rvt_qp *qp)
qp->s_mig_state = IB_MIG_MIGRATED;
qp->remote_ah_attr = qp->alt_ah_attr;
- qp->port_num = qp->alt_ah_attr.port_num;
+ qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
qp->s_pkey_index = qp->s_alt_pkey_index;
ev.device = qp->ibqp.device;
@@ -273,18 +235,23 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
if (!has_grh) {
- if (qp->alt_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
+ IB_AH_GRH)
goto err;
} else {
- if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
+ const struct ib_global_route *grh;
+
+ if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
+ IB_AH_GRH))
goto err;
- guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
+ grh = rdma_ah_read_grh(&qp->alt_ah_attr);
+ guid = get_sguid(ibp, grh->sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid,
ibp->rvp.gid_prefix, guid))
goto err;
if (!gid_ok(&hdr->u.l.grh.sgid,
- qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
- qp->alt_ah_attr.grh.dgid.global.interface_id))
+ grh->dgid.global.subnet_prefix,
+ grh->dgid.global.interface_id))
goto err;
}
if (!qib_pkey_ok((u16)bth0,
@@ -297,27 +264,33 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
- ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
+ if ((be16_to_cpu(hdr->lrh[3]) !=
+ rdma_ah_get_dlid(&qp->alt_ah_attr)) ||
+ ppd_from_ibp(ibp)->port !=
+ rdma_ah_get_port_num(&qp->alt_ah_attr))
goto err;
spin_lock_irqsave(&qp->s_lock, flags);
qib_migrate_qp(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else {
if (!has_grh) {
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH)
goto err;
} else {
- if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH))
+ const struct ib_global_route *grh;
+
+ if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH))
goto err;
- guid = get_sguid(ibp,
- qp->remote_ah_attr.grh.sgid_index);
+ grh = rdma_ah_read_grh(&qp->remote_ah_attr);
+ guid = get_sguid(ibp, grh->sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid,
ibp->rvp.gid_prefix, guid))
goto err;
if (!gid_ok(&hdr->u.l.grh.sgid,
- qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
- qp->remote_ah_attr.grh.dgid.global.interface_id))
+ grh->dgid.global.subnet_prefix,
+ grh->dgid.global.interface_id))
goto err;
}
if (!qib_pkey_ok((u16)bth0,
@@ -330,7 +303,8 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid ||
+ if (be16_to_cpu(hdr->lrh[3]) !=
+ rdma_ah_get_dlid(&qp->remote_ah_attr) ||
ppd_from_ibp(ibp)->port != qp->port_num)
goto err;
if (qp->s_mig_state == IB_MIG_REARM &&
@@ -566,8 +540,8 @@ again:
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
@@ -599,11 +573,8 @@ rnr_nak:
spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
goto clr_busy;
- sqp->s_flags |= RVT_S_WAIT_RNR;
- sqp->s_timer.function = qib_rc_rnr_retry;
- sqp->s_timer.expires = jiffies +
- usecs_to_jiffies(ib_qib_rnr_table[qp->r_min_rnr_timer]);
- add_timer(&sqp->s_timer);
+ rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
+ IB_AETH_CREDIT_SHIFT);
goto clr_busy;
op_err:
@@ -621,7 +592,7 @@ acc_err:
wc.status = IB_WC_LOC_PROT_ERR;
err:
/* responder goes to error state */
- qib_rc_error(qp, wc.status);
+ rvt_rc_error(qp, wc.status);
serr:
spin_lock_irqsave(&sqp->s_lock, flags);
@@ -660,7 +631,7 @@ done:
* Return the size of the header in 32 bit words.
*/
u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
- struct ib_global_route *grh, u32 hwords, u32 nwords)
+ const struct ib_global_route *grh, u32 hwords, u32 nwords)
{
hdr->version_tclass_flow =
cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
@@ -693,20 +664,23 @@ void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
extra_bytes = -qp->s_cur_size & 3;
nwords = (qp->s_cur_size + extra_bytes) >> 2;
lrh0 = QIB_LRH_BTH;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
- qp->s_hdrwords += qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
- &qp->remote_ah_attr.grh,
- qp->s_hdrwords, nwords);
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
+ qp->s_hdrwords +=
+ qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
+ rdma_ah_read_grh(&qp->remote_ah_attr),
+ qp->s_hdrwords, nwords);
lrh0 = QIB_LRH_GRH;
}
- lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
- qp->remote_ah_attr.sl << 4;
+ lrh0 |= ibp->sl_to_vl[rdma_ah_get_sl(&qp->remote_ah_attr)] << 12 |
+ rdma_ah_get_sl(&qp->remote_ah_attr) << 4;
priv->s_hdr->lrh[0] = cpu_to_be16(lrh0);
- priv->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ priv->s_hdr->lrh[1] =
+ cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr));
priv->s_hdr->lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- priv->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
- qp->remote_ah_attr.src_path_bits);
+ priv->s_hdr->lrh[3] =
+ cpu_to_be16(ppd_from_ibp(ibp)->lid |
+ rdma_ah_get_path_bits(&qp->remote_ah_attr));
bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
bth0 |= extra_bytes << 20;
if (qp->s_mig_state == IB_MIG_MIGRATED)
@@ -744,7 +718,8 @@ void qib_do_send(struct rvt_qp *qp)
if ((qp->ibqp.qp_type == IB_QPT_RC ||
qp->ibqp.qp_type == IB_QPT_UC) &&
- (qp->remote_ah_attr.dlid & ~((1 << ppd->lmc) - 1)) == ppd->lid) {
+ (rdma_ah_get_dlid(&qp->remote_ah_attr) &
+ ~((1 << ppd->lmc) - 1)) == ppd->lid) {
qib_ruc_loopback(qp);
return;
}
@@ -810,7 +785,10 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
- rvt_qp_swqe_complete(qp, wqe, status);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_qib_wc_opcode[wqe->wr.opcode],
+ status);
if (qp->s_acked == old_last)
qp->s_acked = last;
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 5b2d483451ad..498e2202e72c 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -74,7 +74,7 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags)
}
ohdr = &priv->s_hdr->u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
ohdr = &priv->s_hdr->u.l.oth;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -325,17 +325,8 @@ inv:
goto inv;
}
- if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
- qp->r_flags |= RVT_R_COMM_EST;
- if (qp->ibqp.event_handler) {
- struct ib_event ev;
-
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_COMM_EST;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
- }
+ if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
+ rvt_comm_est(qp);
/* OK, process the packet. */
switch (opcode) {
@@ -403,8 +394,8 @@ last_imm:
wc.status = IB_WC_SUCCESS;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
@@ -527,7 +518,7 @@ drop:
return;
op_err:
- qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
return;
}
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index f45cad1198b0..341a123ee95c 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -54,7 +54,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
struct qib_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
struct rvt_qp *qp;
- struct ib_ah_attr *ah_attr;
+ struct rdma_ah_attr *ah_attr;
unsigned long flags;
struct rvt_sge_state ssge;
struct rvt_sge *sge;
@@ -92,13 +92,13 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
pkey1 = qib_get_pkey(ibp, sqp->s_pkey_index);
pkey2 = qib_get_pkey(ibp, qp->s_pkey_index);
if (unlikely(!qib_pkey_ok(pkey1, pkey2))) {
- lid = ppd->lid | (ah_attr->src_path_bits &
+ lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, pkey1,
- ah_attr->sl,
+ rdma_ah_get_sl(ah_attr),
sqp->ibqp.qp_num, qp->ibqp.qp_num,
cpu_to_be16(lid),
- cpu_to_be16(ah_attr->dlid));
+ cpu_to_be16(rdma_ah_get_dlid(ah_attr)));
goto drop;
}
}
@@ -116,13 +116,13 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (unlikely(qkey != qp->qkey)) {
u16 lid;
- lid = ppd->lid | (ah_attr->src_path_bits &
+ lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey,
- ah_attr->sl,
+ rdma_ah_get_sl(ah_attr),
sqp->ibqp.qp_num, qp->ibqp.qp_num,
cpu_to_be16(lid),
- cpu_to_be16(ah_attr->dlid));
+ cpu_to_be16(rdma_ah_get_dlid(ah_attr)));
goto drop;
}
}
@@ -152,7 +152,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
ret = qib_get_rwqe(qp, 0);
if (ret < 0) {
- qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
goto bail_unlock;
}
if (!ret) {
@@ -168,16 +168,16 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
goto bail_unlock;
}
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
struct ib_grh grh;
- struct ib_global_route grd = ah_attr->grh;
+ const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr);
- qib_make_grh(ibp, &grh, &grd, 0, 0);
+ qib_make_grh(ibp, &grh, grd, 0, 0);
qib_copy_sge(&qp->r_sge, &grh,
sizeof(grh), 1);
wc.wc_flags |= IB_WC_GRH;
} else
- qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+ rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
ssge.sg_list = swqe->sg_list + 1;
ssge.sge = *swqe->sg_list;
ssge.num_sge = swqe->wr.num_sge;
@@ -220,9 +220,10 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
wc.src_qp = sqp->ibqp.qp_num;
wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
swqe->ud_wr.pkey_index : 0;
- wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
- wc.sl = ah_attr->sl;
- wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
+ wc.slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1));
+ wc.sl = rdma_ah_get_sl(ah_attr);
+ wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
@@ -246,7 +247,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
struct ib_other_headers *ohdr;
- struct ib_ah_attr *ah_attr;
+ struct rdma_ah_attr *ah_attr;
struct qib_pportdata *ppd;
struct qib_ibport *ibp;
struct rvt_swqe *wqe;
@@ -289,14 +290,15 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
- if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
- if (ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE))
+ if (rdma_ah_get_dlid(ah_attr) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+ if (rdma_ah_get_dlid(ah_attr) !=
+ be16_to_cpu(IB_LID_PERMISSIVE))
this_cpu_inc(ibp->pmastats->n_multicast_xmit);
else
this_cpu_inc(ibp->pmastats->n_unicast_xmit);
} else {
this_cpu_inc(ibp->pmastats->n_unicast_xmit);
- lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
+ lid = rdma_ah_get_dlid(ah_attr) & ~((1 << ppd->lmc) - 1);
if (unlikely(lid == ppd->lid)) {
unsigned long tflags = *flags;
/*
@@ -328,17 +330,17 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
qp->s_hdrwords = 7;
qp->s_cur_size = wqe->length;
qp->s_cur_sge = &qp->s_sge;
- qp->s_srate = ah_attr->static_rate;
+ qp->s_srate = rdma_ah_get_static_rate(ah_attr);
qp->s_wqe = wqe;
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length;
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
/* Header size in 32-bit words. */
qp->s_hdrwords += qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
- &ah_attr->grh,
+ rdma_ah_read_grh(ah_attr),
qp->s_hdrwords, nwords);
lrh0 = QIB_LRH_GRH;
ohdr = &priv->s_hdr->u.l.oth;
@@ -357,18 +359,20 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
} else
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
- lrh0 |= ah_attr->sl << 4;
+ lrh0 |= rdma_ah_get_sl(ah_attr) << 4;
if (qp->ibqp.qp_type == IB_QPT_SMI)
lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
else
- lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12;
+ lrh0 |= ibp->sl_to_vl[rdma_ah_get_sl(ah_attr)] << 12;
priv->s_hdr->lrh[0] = cpu_to_be16(lrh0);
- priv->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
+ priv->s_hdr->lrh[1] =
+ cpu_to_be16(rdma_ah_get_dlid(ah_attr)); /* DEST LID */
priv->s_hdr->lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
lid = ppd->lid;
if (lid) {
- lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
+ lid |= rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1);
priv->s_hdr->lrh[3] = cpu_to_be16(lid);
} else
priv->s_hdr->lrh[3] = IB_LID_PERMISSIVE;
@@ -382,8 +386,9 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
/*
* Use the multicast QP if the destination LID is a multicast LID.
*/
- ohdr->bth[1] = ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) &&
- ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) ?
+ ohdr->bth[1] = rdma_ah_get_dlid(ah_attr) >=
+ be16_to_cpu(IB_MULTICAST_LID_BASE) &&
+ rdma_ah_get_dlid(ah_attr) != be16_to_cpu(IB_LID_PERMISSIVE) ?
cpu_to_be32(QIB_MULTICAST_QPN) :
cpu_to_be32(wqe->ud_wr.remote_qpn);
ohdr->bth[2] = cpu_to_be32(wqe->psn & QIB_PSN_MASK);
@@ -548,7 +553,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
ret = qib_get_rwqe(qp, 0);
if (ret < 0) {
- qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
return;
}
if (!ret) {
@@ -567,7 +572,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
sizeof(struct ib_grh), 1);
wc.wc_flags |= IB_WC_GRH;
} else
- qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+ rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
rvt_put_ss(&qp->r_sge);
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 75f08624ac05..ce83ba9a12ef 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -32,6 +32,7 @@
*/
#include <linux/mm.h>
+#include <linux/sched/signal.h>
#include <linux/device.h>
#include "qib.h"
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index 3e0677c51276..926f3c8eba69 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -144,8 +144,8 @@ qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
struct rb_node *node = root->rb_node;
while (node) {
- sdma_rb_node = container_of(node,
- struct qib_user_sdma_rb_node, node);
+ sdma_rb_node = rb_entry(node, struct qib_user_sdma_rb_node,
+ node);
if (pid < sdma_rb_node->pid)
node = node->rb_left;
else if (pid > sdma_rb_node->pid)
@@ -164,7 +164,7 @@ qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new)
struct qib_user_sdma_rb_node *got;
while (*node) {
- got = container_of(*node, struct qib_user_sdma_rb_node, node);
+ got = rb_entry(*node, struct qib_user_sdma_rb_node, node);
parent = *node;
if (new->pid < got->pid)
node = &((*node)->rb_left);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 4b54c0ddd08a..ac42dce7e281 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -114,6 +114,19 @@ module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(disable_sma, "Disable the SMA");
/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_qib_wc_opcode[] = {
+ [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [IB_WR_SEND] = IB_WC_SEND,
+ [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+ [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
+};
+
+/*
* System image GUID.
*/
__be64 ib_qib_sys_image_guid;
@@ -129,78 +142,16 @@ void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release)
struct rvt_sge *sge = &ss->sge;
while (length) {
- u32 len = sge->length;
+ u32 len = rvt_get_sge_length(sge, length);
- if (len > length)
- len = length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- BUG_ON(len == 0);
+ WARN_ON_ONCE(len == 0);
memcpy(sge->vaddr, data, len);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (release)
- rvt_put_mr(sge->mr);
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
+ rvt_update_sge(ss, len, release);
data += len;
length -= len;
}
}
-/**
- * qib_skip_sge - skip over SGE memory - XXX almost dup of prev func
- * @ss: the SGE state
- * @length: the number of bytes to skip
- */
-void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
-{
- struct rvt_sge *sge = &ss->sge;
-
- while (length) {
- u32 len = sge->length;
-
- if (len > length)
- len = length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- BUG_ON(len == 0);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (release)
- rvt_put_mr(sge->mr);
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
- length -= len;
- }
-}
-
/*
* Count the number of DMA descriptors needed to send length bytes of data.
* Don't modify the qib_sge_state to get the count.
@@ -405,7 +356,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
if (lnh != QIB_LRH_GRH)
goto drop;
- mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
+ mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
if (mcast == NULL)
goto drop;
this_cpu_inc(ibp->pmastats->n_multicast_rcv);
@@ -468,27 +419,6 @@ static void mem_timer(unsigned long data)
}
}
-static void update_sge(struct rvt_sge_state *ss, u32 length)
-{
- struct rvt_sge *sge = &ss->sge;
-
- sge->vaddr += length;
- sge->length -= length;
- sge->sge_length -= length;
- if (sge->sge_length == 0) {
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- return;
- sge->n = 0;
- }
- sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
- }
-}
-
#ifdef __LITTLE_ENDIAN
static inline u32 get_upper_bits(u32 data, u32 shift)
{
@@ -646,11 +576,11 @@ static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
data = clear_upper_bytes(v, extra, 0);
}
}
- update_sge(ss, len);
+ rvt_update_sge(ss, len, false);
length -= len;
}
/* Update address before sending packet. */
- update_sge(ss, length);
+ rvt_update_sge(ss, length, false);
if (flush_wc) {
/* must flush early everything before trigger word */
qib_flush_wc();
@@ -1069,7 +999,7 @@ static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr,
u32 *addr = (u32 *) ss->sge.vaddr;
/* Update address before sending packet. */
- update_sge(ss, len);
+ rvt_update_sge(ss, len, false);
if (flush_wc) {
qib_pio_copy(piobuf, addr, dwords - 1);
/* must flush early everything before trigger word */
@@ -1303,6 +1233,7 @@ static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num,
enum ib_mtu mtu;
u16 lid = ppd->lid;
+ /* props being zeroed by the caller, avoid zeroing it here */
props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
props->lmc = ppd->lmc;
props->state = dd->f_iblink_state(ppd->lastibcstat);
@@ -1405,16 +1336,16 @@ static int qib_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
return 0;
}
-int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
+int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
{
- if (ah_attr->sl > 15)
+ if (rdma_ah_get_sl(ah_attr) > 15)
return -EINVAL;
return 0;
}
static void qib_notify_new_ah(struct ib_device *ibdev,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct rvt_ah *ah)
{
struct qib_ibport *ibp;
@@ -1425,25 +1356,29 @@ static void qib_notify_new_ah(struct ib_device *ibdev,
* done being setup. We can however modify things which we need to set.
*/
- ibp = to_iport(ibdev, ah_attr->port_num);
+ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
ppd = ppd_from_ibp(ibp);
- ah->vl = ibp->sl_to_vl[ah->attr.sl];
+ ah->vl = ibp->sl_to_vl[rdma_ah_get_sl(&ah->attr)];
ah->log_pmtu = ilog2(ppd->ibmtu);
}
struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
{
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
struct ib_ah *ah = ERR_PTR(-EINVAL);
struct rvt_qp *qp0;
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_devdata *dd = dd_from_ppd(ppd);
+ u8 port_num = ppd->port;
memset(&attr, 0, sizeof(attr));
- attr.dlid = dlid;
- attr.port_num = ppd_from_ibp(ibp)->port;
+ attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
+ rdma_ah_set_dlid(&attr, dlid);
+ rdma_ah_set_port_num(&attr, port_num);
rcu_read_lock();
qp0 = rcu_dereference(ibp->rvp.qp[0]);
if (qp0)
- ah = ib_create_ah(qp0->ibqp.pd, &attr);
+ ah = rdma_create_ah(qp0->ibqp.pd, &attr);
rcu_read_unlock();
return ah;
}
@@ -1632,7 +1567,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
ibdev->owner = THIS_MODULE;
ibdev->node_guid = ppd->guid;
ibdev->phys_port_cnt = dd->num_pports;
- ibdev->dma_device = &dd->pcidev->dev;
+ ibdev->dev.parent = &dd->pcidev->dev;
ibdev->modify_device = qib_modify_device;
ibdev->process_mad = qib_process_mad;
@@ -1659,6 +1594,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
dd->verbs_dev.rdi.driver_f.stop_send_queue = qib_stop_send_queue;
dd->verbs_dev.rdi.driver_f.flush_qp_waiters = qib_flush_qp_waiters;
dd->verbs_dev.rdi.driver_f.notify_error_qp = qib_notify_error_qp;
+ dd->verbs_dev.rdi.driver_f.notify_restart_rc = qib_restart_rc;
dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu;
dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp;
dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 94fd30fdedac..a52fc67b40d7 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -270,18 +270,16 @@ int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
int qib_get_counters(struct qib_pportdata *ppd,
struct qib_verbs_counters *cntrs);
-__be32 qib_compute_aeth(struct rvt_qp *qp);
-
/*
* Functions provided by qib driver for rdmavt to use
*/
unsigned qib_free_all_qps(struct rvt_dev_info *rdi);
-void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp);
+void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp);
void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
void qib_notify_qp_reset(struct rvt_qp *qp);
int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
- enum ib_qp_type type, u8 port, gfp_t gfp);
-
+ enum ib_qp_type type, u8 port);
+void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
#ifdef CONFIG_DEBUG_FS
struct qib_qp_iter;
@@ -294,8 +292,6 @@ void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter);
#endif
-void qib_get_credit(struct rvt_qp *qp, u32 aeth);
-
unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail);
@@ -308,15 +304,13 @@ int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
int release);
-void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
-
void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
-int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
@@ -326,8 +320,6 @@ void qib_rc_rnr_retry(unsigned long arg);
void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
-void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
-
int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr);
void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
@@ -343,7 +335,7 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0);
u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
- struct ib_global_route *grh, u32 hwords, u32 nwords);
+ const struct ib_global_route *grh, u32 hwords, u32 nwords);
void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2);
diff --git a/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
index 596e0ed49a8e..bf7d197a9f42 100644
--- a/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
+++ b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
@@ -34,7 +34,6 @@
#ifndef USNIC_CMN_PKT_HDR_H
#define USNIC_CMN_PKT_HDR_H
-#define USNIC_ROCE_ETHERTYPE (0x8915)
#define USNIC_ROCE_GRH_VER (8)
#define USNIC_PROTO_VER (1)
#define USNIC_ROCE_GRH_VER_SHIFT (4)
diff --git a/drivers/infiniband/hw/usnic/usnic_common_util.h b/drivers/infiniband/hw/usnic/usnic_common_util.h
index b54986de5f0c..ddd81294fa46 100644
--- a/drivers/infiniband/hw/usnic/usnic_common_util.h
+++ b/drivers/infiniband/hw/usnic/usnic_common_util.h
@@ -34,21 +34,7 @@
#ifndef USNIC_CMN_UTIL_H
#define USNIC_CMN_UTIL_H
-static inline void
-usnic_mac_to_gid(const char *const mac, char *raw_gid)
-{
- raw_gid[0] = 0xfe;
- raw_gid[1] = 0x80;
- memset(&raw_gid[2], 0, 6);
- raw_gid[8] = mac[0]^2;
- raw_gid[9] = mac[1];
- raw_gid[10] = mac[2];
- raw_gid[11] = 0xff;
- raw_gid[12] = 0xfe;
- raw_gid[13] = mac[3];
- raw_gid[14] = mac[4];
- raw_gid[15] = mac[5];
-}
+#include <net/addrconf.h>
static inline void
usnic_mac_ip_to_gid(const char *const mac, const __be32 inaddr, char *raw_gid)
@@ -57,27 +43,7 @@ usnic_mac_ip_to_gid(const char *const mac, const __be32 inaddr, char *raw_gid)
raw_gid[1] = 0x80;
memset(&raw_gid[2], 0, 2);
memcpy(&raw_gid[4], &inaddr, 4);
- raw_gid[8] = mac[0]^2;
- raw_gid[9] = mac[1];
- raw_gid[10] = mac[2];
- raw_gid[11] = 0xff;
- raw_gid[12] = 0xfe;
- raw_gid[13] = mac[3];
- raw_gid[14] = mac[4];
- raw_gid[15] = mac[5];
-}
-
-static inline void
-usnic_write_gid_if_id_from_mac(char *mac, char *raw_gid)
-{
- raw_gid[8] = mac[0]^2;
- raw_gid[9] = mac[1];
- raw_gid[10] = mac[2];
- raw_gid[11] = 0xff;
- raw_gid[12] = 0xfe;
- raw_gid[13] = mac[3];
- raw_gid[14] = mac[4];
- raw_gid[15] = mac[5];
+ addrconf_addr_eui48(&raw_gid[8], mac);
}
#endif /* USNIC_COMMON_UTIL_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h
index 3a8add9ddf46..b2ac22be0731 100644
--- a/drivers/infiniband/hw/usnic/usnic_fwd.h
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.h
@@ -36,6 +36,7 @@
#include <linux/if.h>
#include <linux/netdevice.h>
+#include <linux/if_ether.h>
#include <linux/pci.h>
#include <linux/in.h>
@@ -97,7 +98,7 @@ static inline void usnic_fwd_init_usnic_filter(struct filter *filter,
uint32_t usnic_id)
{
filter->type = FILTER_USNIC_ID;
- filter->u.usnic.ethtype = USNIC_ROCE_ETHERTYPE;
+ filter->u.usnic.ethtype = ETH_P_IBOE;
filter->u.usnic.flags = FILTER_FIELD_USNIC_ETHTYPE |
FILTER_FIELD_USNIC_ID |
FILTER_FIELD_USNIC_PROTO;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 0a89a955550b..c0c1e8b027b1 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -321,7 +321,9 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = usnic_ib_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_USNIC;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
@@ -380,7 +382,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP;
us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT;
us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
- us_ibdev->ib_dev.dma_device = &dev->dev;
+ us_ibdev->ib_dev.dev.parent = &dev->dev;
us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index 80ef3f8998c8..32956f9f5715 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -44,6 +44,7 @@
#include "usnic_vnic.h"
#include "usnic_ib_verbs.h"
#include "usnic_log.h"
+#include "usnic_ib_sysfs.h"
static ssize_t usnic_ib_show_board(struct device *device,
struct device_attribute *attr,
@@ -80,7 +81,7 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr,
left = PAGE_SIZE;
mutex_lock(&us_ibdev->usdev_lock);
- if (atomic_read(&us_ibdev->vf_cnt.refcount) > 0) {
+ if (kref_read(&us_ibdev->vf_cnt) > 0) {
char *busname;
/*
@@ -99,7 +100,7 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr,
PCI_FUNC(us_ibdev->pdev->devfn),
netdev_name(us_ibdev->netdev),
us_ibdev->ufdev->mac,
- atomic_read(&us_ibdev->vf_cnt.refcount));
+ kref_read(&us_ibdev->vf_cnt));
UPDATE_PTR_LEFT(n, ptr, left);
for (res_type = USNIC_VNIC_RES_TYPE_EOL;
@@ -147,7 +148,7 @@ usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
return scnprintf(buf, PAGE_SIZE, "%u\n",
- atomic_read(&us_ibdev->vf_cnt.refcount));
+ kref_read(&us_ibdev->vf_cnt));
}
static ssize_t
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 74819a7951e2..4996984885c2 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -46,6 +46,7 @@
#include "usnic_log.h"
#include "usnic_uiom.h"
#include "usnic_transport.h"
+#include "usnic_ib_verbs.h"
#define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM
@@ -151,7 +152,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
struct usnic_vnic *vnic;
struct usnic_ib_qp_grp *qp_grp;
struct device *dev, **dev_list;
- int i, found = 0;
+ int i;
BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
@@ -173,8 +174,13 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
us_ibdev->ib_dev.name,
pci_name(usnic_vnic_get_pdev(
vnic)));
- found = 1;
- break;
+ qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev,
+ vf, pd,
+ res_spec,
+ trans_spec);
+
+ spin_unlock(&vf->lock);
+ goto qp_grp_check;
}
spin_unlock(&vf->lock);
@@ -182,34 +188,30 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
usnic_uiom_free_dev_list(dev_list);
}
- if (!found) {
- /* Try to find resources on an unused vf */
- list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
- spin_lock(&vf->lock);
- vnic = vf->vnic;
- if (vf->qp_grp_ref_cnt == 0 &&
- usnic_vnic_check_room(vnic, res_spec) == 0) {
- found = 1;
- break;
- }
+ /* Try to find resources on an unused vf */
+ list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
+ spin_lock(&vf->lock);
+ vnic = vf->vnic;
+ if (vf->qp_grp_ref_cnt == 0 &&
+ usnic_vnic_check_room(vnic, res_spec) == 0) {
+ qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf,
+ pd, res_spec,
+ trans_spec);
+
spin_unlock(&vf->lock);
+ goto qp_grp_check;
}
+ spin_unlock(&vf->lock);
}
- if (!found) {
- usnic_info("No free qp grp found on %s\n",
- us_ibdev->ib_dev.name);
- return ERR_PTR(-ENOMEM);
- }
+ usnic_info("No free qp grp found on %s\n", us_ibdev->ib_dev.name);
+ return ERR_PTR(-ENOMEM);
- qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf, pd, res_spec,
- trans_spec);
- spin_unlock(&vf->lock);
+qp_grp_check:
if (IS_ERR_OR_NULL(qp_grp)) {
usnic_err("Failed to allocate qp_grp\n");
return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM);
}
-
return qp_grp;
}
@@ -291,11 +293,11 @@ int usnic_ib_query_device(struct ib_device *ibdev,
qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
props->max_qp = qp_per_vf *
- atomic_read(&us_ibdev->vf_cnt.refcount);
+ kref_read(&us_ibdev->vf_cnt);
props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] *
- atomic_read(&us_ibdev->vf_cnt.refcount);
+ kref_read(&us_ibdev->vf_cnt);
props->max_pd = USNIC_UIOM_MAX_PD_CNT;
props->max_mr = USNIC_UIOM_MAX_MR_CNT;
props->local_ca_ack_delay = 0;
@@ -330,7 +332,7 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
mutex_lock(&us_ibdev->usdev_lock);
__ethtool_get_link_ksettings(us_ibdev->netdev, &cmd);
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->lid = 0;
props->lmc = 1;
@@ -738,7 +740,7 @@ int usnic_ib_mmap(struct ib_ucontext *context,
/* In ib callbacks section - Start of stub funcs */
struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 0ed8e072329e..172e43b6fa95 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -75,7 +75,7 @@ int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
int usnic_ib_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma);
struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata);
int usnic_ib_destroy_ah(struct ib_ah *ah);
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 1ccee6ea5bc3..c49db7c33979 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -34,7 +34,8 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
#include <linux/hugetlb.h>
#include <linux/iommu.h>
#include <linux/workqueue.h>
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 71e1d55d69d6..8e2f0a11690f 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -69,6 +69,9 @@
*/
#define PCI_DEVICE_ID_VMWARE_PVRDMA 0x0820
+#define PVRDMA_NUM_RING_PAGES 4
+#define PVRDMA_QP_NUM_HEADER_PAGES 1
+
struct pvrdma_dev;
struct pvrdma_page_dir {
@@ -196,13 +199,7 @@ struct pvrdma_dev {
spinlock_t cmd_lock; /* Command lock. */
struct semaphore cmd_sema;
struct completion cmd_done;
- struct {
- enum pvrdma_intr_type type; /* Intr type */
- struct msix_entry msix_entry[PVRDMA_MAX_INTERRUPTS];
- irq_handler_t handler[PVRDMA_MAX_INTERRUPTS];
- u8 enabled[PVRDMA_MAX_INTERRUPTS];
- u8 size;
- } intr;
+ unsigned int nr_vectors;
/* RDMA-related device information. */
union ib_gid *sgid_tbl;
@@ -443,10 +440,10 @@ void pvrdma_global_route_to_ib(struct ib_global_route *dst,
const struct pvrdma_global_route *src);
void ib_global_route_to_pvrdma(struct pvrdma_global_route *dst,
const struct ib_global_route *src);
-void pvrdma_ah_attr_to_ib(struct ib_ah_attr *dst,
- const struct pvrdma_ah_attr *src);
-void ib_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst,
- const struct ib_ah_attr *src);
+void pvrdma_ah_attr_to_rdma(struct rdma_ah_attr *dst,
+ const struct pvrdma_ah_attr *src);
+void rdma_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst,
+ const struct rdma_ah_attr *src);
int pvrdma_uar_table_init(struct pvrdma_dev *dev);
void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index e429ca5b16aa..69bda611d313 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -373,7 +373,7 @@ retry:
wc->sl = cqe->sl;
wc->dlid_path_bits = cqe->dlid_path_bits;
wc->port_num = cqe->port_num;
- wc->vendor_err = 0;
+ wc->vendor_err = cqe->vendor_err;
/* Update shared ring state */
pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
index c06768635d65..09078ccfaec7 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
@@ -132,7 +132,7 @@ enum pvrdma_pci_resource {
enum pvrdma_device_ctl {
PVRDMA_DEVICE_CTL_ACTIVATE, /* Activate device. */
- PVRDMA_DEVICE_CTL_QUIESCE, /* Quiesce device. */
+ PVRDMA_DEVICE_CTL_UNQUIESCE, /* Unquiesce device. */
PVRDMA_DEVICE_CTL_RESET, /* Reset device. */
};
@@ -149,12 +149,6 @@ enum pvrdma_intr_cause {
PVRDMA_INTR_CAUSE_CQ = (1 << PVRDMA_INTR_VECTOR_CQ),
};
-enum pvrdma_intr_type {
- PVRDMA_INTR_TYPE_INTX, /* Legacy. */
- PVRDMA_INTR_TYPE_MSI, /* MSI. */
- PVRDMA_INTR_TYPE_MSIX, /* MSI-X. */
-};
-
enum pvrdma_gos_bits {
PVRDMA_GOS_BITS_UNK, /* Unknown. */
PVRDMA_GOS_BITS_32, /* 32-bit. */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index bd8fbd3d2032..34ebc7615411 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -56,7 +56,7 @@
#include "pvrdma.h"
#define DRV_NAME "vmw_pvrdma"
-#define DRV_VERSION "1.0.0.0-k"
+#define DRV_VERSION "1.0.1.0-k"
static DEFINE_MUTEX(pvrdma_device_list_lock);
static LIST_HEAD(pvrdma_device_list);
@@ -132,13 +132,14 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = pvrdma_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
@@ -172,7 +173,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
dev->flags = 0;
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.num_comp_vectors = 1;
- dev->ib_dev.dma_device = &dev->pdev->dev;
+ dev->ib_dev.dev.parent = &dev->pdev->dev;
dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -282,7 +283,7 @@ static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id)
dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n");
- if (dev->intr.type != PVRDMA_INTR_TYPE_MSIX) {
+ if (!dev->pdev->msix_enabled) {
/* Legacy intr */
icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR);
if (icr == 0)
@@ -489,31 +490,13 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static void pvrdma_disable_msi_all(struct pvrdma_dev *dev)
-{
- if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX)
- pci_disable_msix(dev->pdev);
- else if (dev->intr.type == PVRDMA_INTR_TYPE_MSI)
- pci_disable_msi(dev->pdev);
-}
-
static void pvrdma_free_irq(struct pvrdma_dev *dev)
{
int i;
dev_dbg(&dev->pdev->dev, "freeing interrupts\n");
-
- if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX) {
- for (i = 0; i < dev->intr.size; i++) {
- if (dev->intr.enabled[i]) {
- free_irq(dev->intr.msix_entry[i].vector, dev);
- dev->intr.enabled[i] = 0;
- }
- }
- } else if (dev->intr.type == PVRDMA_INTR_TYPE_INTX ||
- dev->intr.type == PVRDMA_INTR_TYPE_MSI) {
- free_irq(dev->pdev->irq, dev);
- }
+ for (i = 0; i < dev->nr_vectors; i++)
+ free_irq(pci_irq_vector(dev->pdev, i), dev);
}
static void pvrdma_enable_intrs(struct pvrdma_dev *dev)
@@ -528,126 +511,48 @@ static void pvrdma_disable_intrs(struct pvrdma_dev *dev)
pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0);
}
-static int pvrdma_enable_msix(struct pci_dev *pdev, struct pvrdma_dev *dev)
-{
- int i;
- int ret;
-
- for (i = 0; i < PVRDMA_MAX_INTERRUPTS; i++) {
- dev->intr.msix_entry[i].entry = i;
- dev->intr.msix_entry[i].vector = i;
-
- switch (i) {
- case 0:
- /* CMD ring handler */
- dev->intr.handler[i] = pvrdma_intr0_handler;
- break;
- case 1:
- /* Async event ring handler */
- dev->intr.handler[i] = pvrdma_intr1_handler;
- break;
- default:
- /* Completion queue handler */
- dev->intr.handler[i] = pvrdma_intrx_handler;
- break;
- }
- }
-
- ret = pci_enable_msix(pdev, dev->intr.msix_entry,
- PVRDMA_MAX_INTERRUPTS);
- if (!ret) {
- dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
- dev->intr.size = PVRDMA_MAX_INTERRUPTS;
- } else if (ret > 0) {
- ret = pci_enable_msix(pdev, dev->intr.msix_entry, ret);
- if (!ret) {
- dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
- dev->intr.size = ret;
- } else {
- dev->intr.size = 0;
- }
- }
-
- dev_dbg(&pdev->dev, "using interrupt type %d, size %d\n",
- dev->intr.type, dev->intr.size);
-
- return ret;
-}
-
static int pvrdma_alloc_intrs(struct pvrdma_dev *dev)
{
- int ret = 0;
- int i;
+ struct pci_dev *pdev = dev->pdev;
+ int ret = 0, i;
- if (pci_find_capability(dev->pdev, PCI_CAP_ID_MSIX) &&
- pvrdma_enable_msix(dev->pdev, dev)) {
- /* Try MSI */
- ret = pci_enable_msi(dev->pdev);
- if (!ret) {
- dev->intr.type = PVRDMA_INTR_TYPE_MSI;
- } else {
- /* Legacy INTR */
- dev->intr.type = PVRDMA_INTR_TYPE_INTX;
- }
+ ret = pci_alloc_irq_vectors(pdev, 1, PVRDMA_MAX_INTERRUPTS,
+ PCI_IRQ_MSIX);
+ if (ret < 0) {
+ ret = pci_alloc_irq_vectors(pdev, 1, 1,
+ PCI_IRQ_MSI | PCI_IRQ_LEGACY);
+ if (ret < 0)
+ return ret;
}
+ dev->nr_vectors = ret;
- /* Request First IRQ */
- switch (dev->intr.type) {
- case PVRDMA_INTR_TYPE_INTX:
- case PVRDMA_INTR_TYPE_MSI:
- ret = request_irq(dev->pdev->irq, pvrdma_intr0_handler,
- IRQF_SHARED, DRV_NAME, dev);
- if (ret) {
- dev_err(&dev->pdev->dev,
- "failed to request interrupt\n");
- goto disable_msi;
- }
- break;
- case PVRDMA_INTR_TYPE_MSIX:
- ret = request_irq(dev->intr.msix_entry[0].vector,
- pvrdma_intr0_handler, 0, DRV_NAME, dev);
- if (ret) {
- dev_err(&dev->pdev->dev,
- "failed to request interrupt 0\n");
- goto disable_msi;
- }
- dev->intr.enabled[0] = 1;
- break;
- default:
- /* Not reached */
- break;
+ ret = request_irq(pci_irq_vector(dev->pdev, 0), pvrdma_intr0_handler,
+ pdev->msix_enabled ? 0 : IRQF_SHARED, DRV_NAME, dev);
+ if (ret) {
+ dev_err(&dev->pdev->dev,
+ "failed to request interrupt 0\n");
+ goto out_free_vectors;
}
- /* For MSIX: request intr for each vector */
- if (dev->intr.size > 1) {
- ret = request_irq(dev->intr.msix_entry[1].vector,
- pvrdma_intr1_handler, 0, DRV_NAME, dev);
+ for (i = 1; i < dev->nr_vectors; i++) {
+ ret = request_irq(pci_irq_vector(dev->pdev, i),
+ i == 1 ? pvrdma_intr1_handler :
+ pvrdma_intrx_handler,
+ 0, DRV_NAME, dev);
if (ret) {
dev_err(&dev->pdev->dev,
- "failed to request interrupt 1\n");
- goto free_irq;
- }
- dev->intr.enabled[1] = 1;
-
- for (i = 2; i < dev->intr.size; i++) {
- ret = request_irq(dev->intr.msix_entry[i].vector,
- pvrdma_intrx_handler, 0,
- DRV_NAME, dev);
- if (ret) {
- dev_err(&dev->pdev->dev,
- "failed to request interrupt %d\n", i);
- goto free_irq;
- }
- dev->intr.enabled[i] = 1;
+ "failed to request interrupt %d\n", i);
+ goto free_irqs;
}
}
return 0;
-free_irq:
- pvrdma_free_irq(dev);
-disable_msi:
- pvrdma_disable_msi_all(dev);
+free_irqs:
+ while (--i >= 0)
+ free_irq(pci_irq_vector(dev->pdev, i), dev);
+out_free_vectors:
+ pci_free_irq_vectors(pdev);
return ret;
}
@@ -755,7 +660,16 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
break;
case NETDEV_UP:
- pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
+ pvrdma_write_reg(dev, PVRDMA_REG_CTL,
+ PVRDMA_DEVICE_CTL_UNQUIESCE);
+
+ mb();
+
+ if (pvrdma_read_reg(dev, PVRDMA_REG_ERR))
+ dev_err(&dev->pdev->dev,
+ "failed to activate device during link up\n");
+ else
+ pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
break;
default:
dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
@@ -953,7 +867,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
dev->dsr->resp_slot_dma = (u64)slot_dma;
/* Async event ring */
- dev->dsr->async_ring_pages.num_pages = 4;
+ dev->dsr->async_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES;
ret = pvrdma_page_dir_init(dev, &dev->async_pdir,
dev->dsr->async_ring_pages.num_pages, true);
if (ret)
@@ -962,7 +876,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma;
/* CQ notification ring */
- dev->dsr->cq_ring_pages.num_pages = 4;
+ dev->dsr->cq_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES;
ret = pvrdma_page_dir_init(dev, &dev->cq_pdir,
dev->dsr->cq_ring_pages.num_pages, true);
if (ret)
@@ -1091,7 +1005,7 @@ err_free_uar_table:
pvrdma_uar_table_cleanup(dev);
err_free_intrs:
pvrdma_free_irq(dev);
- pvrdma_disable_msi_all(dev);
+ pci_free_irq_vectors(pdev);
err_free_cq_ring:
pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
err_free_async_ring:
@@ -1141,7 +1055,7 @@ static void pvrdma_pci_remove(struct pci_dev *pdev)
pvrdma_disable_intrs(dev);
pvrdma_free_irq(dev);
- pvrdma_disable_msi_all(dev);
+ pci_free_irq_vectors(pdev);
/* Deactivate pvrdma device */
pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
index 948b5ccd2a70..ec6a4ca1eeb7 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
@@ -194,7 +194,7 @@ int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir,
len = sg_dma_len(sg) >> PAGE_SHIFT;
for (j = 0; j < len; j++) {
dma_addr_t addr = sg_dma_address(sg) +
- umem->page_size * j;
+ (j << umem->page_shift);
ret = pvrdma_page_dir_insert_dma(pdir, i, addr);
if (ret)
@@ -277,28 +277,29 @@ void ib_global_route_to_pvrdma(struct pvrdma_global_route *dst,
dst->traffic_class = src->traffic_class;
}
-void pvrdma_ah_attr_to_ib(struct ib_ah_attr *dst,
- const struct pvrdma_ah_attr *src)
+void pvrdma_ah_attr_to_rdma(struct rdma_ah_attr *dst,
+ const struct pvrdma_ah_attr *src)
{
- pvrdma_global_route_to_ib(&dst->grh, &src->grh);
- dst->dlid = src->dlid;
- dst->sl = src->sl;
- dst->src_path_bits = src->src_path_bits;
- dst->static_rate = src->static_rate;
- dst->ah_flags = src->ah_flags;
- dst->port_num = src->port_num;
- memcpy(&dst->dmac, &src->dmac, sizeof(dst->dmac));
+ dst->type = RDMA_AH_ATTR_TYPE_ROCE;
+ pvrdma_global_route_to_ib(rdma_ah_retrieve_grh(dst), &src->grh);
+ rdma_ah_set_dlid(dst, src->dlid);
+ rdma_ah_set_sl(dst, src->sl);
+ rdma_ah_set_path_bits(dst, src->src_path_bits);
+ rdma_ah_set_static_rate(dst, src->static_rate);
+ rdma_ah_set_ah_flags(dst, src->ah_flags);
+ rdma_ah_set_port_num(dst, src->port_num);
+ memcpy(dst->roce.dmac, &src->dmac, ETH_ALEN);
}
-void ib_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst,
- const struct ib_ah_attr *src)
+void rdma_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst,
+ const struct rdma_ah_attr *src)
{
- ib_global_route_to_pvrdma(&dst->grh, &src->grh);
- dst->dlid = src->dlid;
- dst->sl = src->sl;
- dst->src_path_bits = src->src_path_bits;
- dst->static_rate = src->static_rate;
- dst->ah_flags = src->ah_flags;
- dst->port_num = src->port_num;
- memcpy(&dst->dmac, &src->dmac, sizeof(dst->dmac));
+ ib_global_route_to_pvrdma(&dst->grh, rdma_ah_read_grh(src));
+ dst->dlid = rdma_ah_get_dlid(src);
+ dst->sl = rdma_ah_get_sl(src);
+ dst->src_path_bits = rdma_ah_get_path_bits(src);
+ dst->static_rate = rdma_ah_get_static_rate(src);
+ dst->ah_flags = rdma_ah_get_ah_flags(src);
+ dst->port_num = rdma_ah_get_port_num(src);
+ memcpy(&dst->dmac, src->roce.dmac, sizeof(dst->dmac));
}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index c8c01e558125..ed34d5a581fa 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -151,7 +151,7 @@ static int pvrdma_set_rq_size(struct pvrdma_dev *dev,
}
static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
- enum ib_qp_type type, struct pvrdma_qp *qp)
+ struct pvrdma_qp *qp)
{
if (req_cap->max_send_wr > dev->dsr->caps.max_qp_wr ||
req_cap->max_send_sge > dev->dsr->caps.max_sge) {
@@ -170,8 +170,9 @@ static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
sizeof(struct pvrdma_sge) *
qp->sq.max_sg);
/* Note: one extra page for the header. */
- qp->npages_send = 1 + (qp->sq.wqe_cnt * qp->sq.wqe_size +
- PAGE_SIZE - 1) / PAGE_SIZE;
+ qp->npages_send = PVRDMA_QP_NUM_HEADER_PAGES +
+ (qp->sq.wqe_cnt * qp->sq.wqe_size + PAGE_SIZE - 1) /
+ PAGE_SIZE;
return 0;
}
@@ -276,8 +277,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
qp->is_kernel = true;
ret = pvrdma_set_sq_size(to_vdev(pd->device),
- &init_attr->cap,
- init_attr->qp_type, qp);
+ &init_attr->cap, qp);
if (ret)
goto err_qp;
@@ -289,7 +289,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
qp->npages = qp->npages_send + qp->npages_recv;
/* Skip header page. */
- qp->sq.offset = PAGE_SIZE;
+ qp->sq.offset = PVRDMA_QP_NUM_HEADER_PAGES * PAGE_SIZE;
/* Recv queue pages are after send pages. */
qp->rq.offset = qp->npages_send * PAGE_SIZE;
@@ -342,7 +342,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type);
cmd->access_flags = IB_ACCESS_LOCAL_WRITE;
cmd->total_chunks = qp->npages;
- cmd->send_chunks = qp->npages_send - 1;
+ cmd->send_chunks = qp->npages_send - PVRDMA_QP_NUM_HEADER_PAGES;
cmd->pdir_dma = qp->pdir.dir_dma;
dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n",
@@ -533,8 +533,8 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
cmd->attrs.alt_port_num = attr->alt_port_num;
cmd->attrs.alt_timeout = attr->alt_timeout;
ib_qp_cap_to_pvrdma(&cmd->attrs.cap, &attr->cap);
- ib_ah_attr_to_pvrdma(&cmd->attrs.ah_attr, &attr->ah_attr);
- ib_ah_attr_to_pvrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr);
+ rdma_ah_attr_to_pvrdma(&cmd->attrs.ah_attr, &attr->ah_attr);
+ rdma_ah_attr_to_pvrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr);
ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_MODIFY_QP_RESP);
if (ret < 0) {
@@ -555,13 +555,13 @@ out:
return ret;
}
-static inline void *get_sq_wqe(struct pvrdma_qp *qp, int n)
+static inline void *get_sq_wqe(struct pvrdma_qp *qp, unsigned int n)
{
return pvrdma_page_dir_get_ptr(&qp->pdir,
qp->sq.offset + n * qp->sq.wqe_size);
}
-static inline void *get_rq_wqe(struct pvrdma_qp *qp, int n)
+static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n)
{
return pvrdma_page_dir_get_ptr(&qp->pdir,
qp->rq.offset + n * qp->rq.wqe_size);
@@ -599,9 +599,7 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
unsigned long flags;
struct pvrdma_sq_wqe_hdr *wqe_hdr;
struct pvrdma_sge *sge;
- int i, index;
- int nreq;
- int ret;
+ int i, ret;
/*
* In states lower than RTS, we can fail immediately. In other states,
@@ -614,9 +612,8 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_lock_irqsave(&qp->sq.lock, flags);
- index = pvrdma_idx(&qp->sq.ring->prod_tail, qp->sq.wqe_cnt);
- for (nreq = 0; wr; nreq++, wr = wr->next) {
- unsigned int tail;
+ while (wr) {
+ unsigned int tail = 0;
if (unlikely(!pvrdma_idx_ring_has_space(
qp->sq.ring, qp->sq.wqe_cnt, &tail))) {
@@ -681,7 +678,7 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
}
- wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, index);
+ wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, tail);
memset(wqe_hdr, 0, sizeof(*wqe_hdr));
wqe_hdr->wr_id = wr->wr_id;
wqe_hdr->num_sge = wr->num_sge;
@@ -772,12 +769,11 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
/* Make sure wqe is written before index update */
smp_wmb();
- index++;
- if (unlikely(index >= qp->sq.wqe_cnt))
- index = 0;
/* Update shared sq ring */
pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail,
qp->sq.wqe_cnt);
+
+ wr = wr->next;
}
ret = 0;
@@ -807,7 +803,6 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct pvrdma_qp *qp = to_vqp(ibqp);
struct pvrdma_rq_wqe_hdr *wqe_hdr;
struct pvrdma_sge *sge;
- int index, nreq;
int ret = 0;
int i;
@@ -822,9 +817,8 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_lock_irqsave(&qp->rq.lock, flags);
- index = pvrdma_idx(&qp->rq.ring->prod_tail, qp->rq.wqe_cnt);
- for (nreq = 0; wr; nreq++, wr = wr->next) {
- unsigned int tail;
+ while (wr) {
+ unsigned int tail = 0;
if (unlikely(wr->num_sge > qp->rq.max_sg ||
wr->num_sge < 0)) {
@@ -844,7 +838,7 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
goto out;
}
- wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, index);
+ wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, tail);
wqe_hdr->wr_id = wr->wr_id;
wqe_hdr->num_sge = wr->num_sge;
wqe_hdr->total_len = 0;
@@ -860,12 +854,11 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
/* Make sure wqe is written before index update */
smp_wmb();
- index++;
- if (unlikely(index >= qp->rq.wqe_cnt))
- index = 0;
/* Update shared rq ring */
pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail,
qp->rq.wqe_cnt);
+
+ wr = wr->next;
}
spin_unlock_irqrestore(&qp->rq.lock, flags);
@@ -945,8 +938,8 @@ int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->alt_port_num = resp->attrs.alt_port_num;
attr->alt_timeout = resp->attrs.alt_timeout;
pvrdma_qp_cap_to_ib(&attr->cap, &resp->attrs.cap);
- pvrdma_ah_attr_to_ib(&attr->ah_attr, &resp->attrs.ah_attr);
- pvrdma_ah_attr_to_ib(&attr->alt_ah_attr, &resp->attrs.alt_ah_attr);
+ pvrdma_ah_attr_to_rdma(&attr->ah_attr, &resp->attrs.ah_attr);
+ pvrdma_ah_attr_to_rdma(&attr->alt_ah_attr, &resp->attrs.alt_ah_attr);
qp->state = attr->qp_state;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index c2aa52638dcb..28517042011d 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -135,7 +135,7 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port,
return err;
}
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->state = pvrdma_port_state_to_ib(resp->attrs.state);
props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu);
@@ -275,7 +275,7 @@ int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
}
mutex_lock(&vdev->port_mutex);
- ret = pvrdma_query_port(ibdev, port, &attr);
+ ret = ib_query_port(ibdev, port, &attr);
if (ret)
goto out;
@@ -520,20 +520,20 @@ int pvrdma_dealloc_pd(struct ib_pd *pd)
*
* @return: the ib_ah pointer on success, otherwise errno.
*/
-struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
struct pvrdma_dev *dev = to_vdev(pd->device);
struct pvrdma_ah *ah;
- enum rdma_link_layer ll;
+ const struct ib_global_route *grh;
+ u8 port_num = rdma_ah_get_port_num(ah_attr);
- if (!(ah_attr->ah_flags & IB_AH_GRH))
+ if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
return ERR_PTR(-EINVAL);
- ll = rdma_port_get_link_layer(pd->device, ah_attr->port_num);
-
- if (ll != IB_LINK_LAYER_ETHERNET ||
- rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw))
+ grh = rdma_ah_read_grh(ah_attr);
+ if ((ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE) ||
+ rdma_is_multicast_addr((struct in6_addr *)grh->dgid.raw))
return ERR_PTR(-EINVAL);
if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah))
@@ -545,15 +545,15 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
return ERR_PTR(-ENOMEM);
}
- ah->av.port_pd = to_vpd(pd)->pd_handle | (ah_attr->port_num << 24);
- ah->av.src_path_bits = ah_attr->src_path_bits;
+ ah->av.port_pd = to_vpd(pd)->pd_handle | (port_num << 24);
+ ah->av.src_path_bits = rdma_ah_get_path_bits(ah_attr);
ah->av.src_path_bits |= 0x80;
- ah->av.gid_index = ah_attr->grh.sgid_index;
- ah->av.hop_limit = ah_attr->grh.hop_limit;
- ah->av.sl_tclass_flowlabel = (ah_attr->grh.traffic_class << 20) |
- ah_attr->grh.flow_label;
- memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
- memcpy(ah->av.dmac, ah_attr->dmac, 6);
+ ah->av.gid_index = grh->sgid_index;
+ ah->av.hop_limit = grh->hop_limit;
+ ah->av.sl_tclass_flowlabel = (grh->traffic_class << 20) |
+ grh->flow_label;
+ memcpy(ah->av.dgid, grh->dgid.raw, 16);
+ memcpy(ah->av.dmac, ah_attr->roce.dmac, ETH_ALEN);
ah->ibah.device = pd->device;
ah->ibah.pd = pd;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index bfbe96b56255..002a9b066e70 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -417,7 +417,7 @@ int pvrdma_resize_cq(struct ib_cq *ibcq, int entries,
int pvrdma_destroy_cq(struct ib_cq *cq);
int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
-struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
struct ib_udata *udata);
int pvrdma_destroy_ah(struct ib_ah *ah);
struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig
index 1da8d01a6855..fdd001ce13d8 100644
--- a/drivers/infiniband/sw/rdmavt/Kconfig
+++ b/drivers/infiniband/sw/rdmavt/Kconfig
@@ -1,5 +1,6 @@
config INFINIBAND_RDMAVT
tristate "RDMA verbs transport library"
depends on 64BIT
+ select DMA_VIRT_OPS
---help---
This is a common software verbs provider for RDMA networks.
diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile
index ccaa7992ac97..78b276a90401 100644
--- a/drivers/infiniband/sw/rdmavt/Makefile
+++ b/drivers/infiniband/sw/rdmavt/Makefile
@@ -7,7 +7,7 @@
#
obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o
-rdmavt-y := vt.o ah.o cq.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o srq.o \
- trace.o
+rdmavt-y := vt.o ah.o cq.o mad.o mcast.o mmap.o mr.o pd.o qp.o \
+ rc.o srq.o trace.o
CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index 16c446142c2a..a96d4aa80ae8 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -60,32 +60,35 @@
* Return: 0 on success
*/
int rvt_check_ah(struct ib_device *ibdev,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
int err;
+ int port_num = rdma_ah_get_port_num(ah_attr);
struct ib_port_attr port_attr;
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
- enum rdma_link_layer link = rdma_port_get_link_layer(ibdev,
- ah_attr->port_num);
+ enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num);
+ u32 dlid = rdma_ah_get_dlid(ah_attr);
+ u8 ah_flags = rdma_ah_get_ah_flags(ah_attr);
+ u8 static_rate = rdma_ah_get_static_rate(ah_attr);
- err = ib_query_port(ibdev, ah_attr->port_num, &port_attr);
+ err = ib_query_port(ibdev, port_num, &port_attr);
if (err)
return -EINVAL;
- if (ah_attr->port_num < 1 ||
- ah_attr->port_num > ibdev->phys_port_cnt)
+ if (port_num < 1 ||
+ port_num > ibdev->phys_port_cnt)
return -EINVAL;
- if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
- ib_rate_to_mbps(ah_attr->static_rate) < 0)
+ if (static_rate != IB_RATE_PORT_CURRENT &&
+ ib_rate_to_mbps(static_rate) < 0)
return -EINVAL;
- if ((ah_attr->ah_flags & IB_AH_GRH) &&
- ah_attr->grh.sgid_index >= port_attr.gid_tbl_len)
+ if ((ah_flags & IB_AH_GRH) &&
+ rdma_ah_read_grh(ah_attr)->sgid_index >= port_attr.gid_tbl_len)
return -EINVAL;
if (link != IB_LINK_LAYER_ETHERNET) {
- if (ah_attr->dlid == 0)
+ if (dlid == 0)
return -EINVAL;
- if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) &&
- ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) &&
- !(ah_attr->ah_flags & IB_AH_GRH))
+ if (dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) &&
+ dlid != be16_to_cpu(IB_LID_PERMISSIVE) &&
+ !(ah_flags & IB_AH_GRH))
return -EINVAL;
}
if (rdi->driver_f.check_ah)
@@ -104,7 +107,7 @@ EXPORT_SYMBOL(rvt_check_ah);
* Return: newly allocated ah
*/
struct ib_ah *rvt_create_ah(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
struct rvt_ah *ah;
struct rvt_dev_info *dev = ib_to_rvt(pd->device);
@@ -119,7 +122,7 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd,
spin_lock_irqsave(&dev->n_ahs_lock, flags);
if (dev->n_ahs_allocated == dev->dparms.props.max_ah) {
- spin_unlock(&dev->n_ahs_lock);
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
kfree(ah);
return ERR_PTR(-ENOMEM);
}
@@ -167,7 +170,7 @@ int rvt_destroy_ah(struct ib_ah *ibah)
*
* Return: 0 on success
*/
-int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct rvt_ah *ah = ibah_to_rvtah(ibah);
@@ -186,7 +189,7 @@ int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
*
* Return: always 0
*/
-int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct rvt_ah *ah = ibah_to_rvtah(ibah);
diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h
index e9c36be87d79..16105af99189 100644
--- a/drivers/infiniband/sw/rdmavt/ah.h
+++ b/drivers/infiniband/sw/rdmavt/ah.h
@@ -51,9 +51,9 @@
#include <rdma/rdma_vt.h>
struct ib_ah *rvt_create_ah(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr);
+ struct rdma_ah_attr *ah_attr);
int rvt_destroy_ah(struct ib_ah *ibah);
-int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
-int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
+int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
#endif /* DEF_RVTAH_H */
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 7aa7a4e312f1..0ae2ff8cf81e 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -50,6 +50,7 @@
#include <linux/kthread.h>
#include "cq.h"
#include "vt.h"
+#include "trace.h"
/**
* rvt_cq_enter - add a new entry to the completion queue
@@ -93,6 +94,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
}
return;
}
+ trace_rvt_cq_enter(cq, entry, head);
if (cq->ip) {
wc->uqueue[head].wr_id = entry->wr_id;
wc->uqueue[head].status = entry->status;
@@ -482,6 +484,7 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
if (tail == wc->head)
break;
/* The kernel doesn't need a RMB since it has the lock. */
+ trace_rvt_cq_poll(cq, &wc->kqueue[tail], npolled);
*entry = wc->kqueue[tail];
if (tail >= cq->ibcq.cqe)
tail = 0;
diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c
deleted file mode 100644
index f2cefb0d9180..000000000000
--- a/drivers/infiniband/sw/rdmavt/dma.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-#include <linux/types.h>
-#include <linux/scatterlist.h>
-#include <rdma/ib_verbs.h>
-
-#include "dma.h"
-
-#define BAD_DMA_ADDRESS ((u64)0)
-
-/*
- * The following functions implement driver specific replacements
- * for the ib_dma_*() functions.
- *
- * These functions return kernel virtual addresses instead of
- * device bus addresses since the driver uses the CPU to copy
- * data instead of using hardware DMA.
- */
-
-static int rvt_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
- return dma_addr == BAD_DMA_ADDRESS;
-}
-
-static u64 rvt_dma_map_single(struct ib_device *dev, void *cpu_addr,
- size_t size, enum dma_data_direction direction)
-{
- if (WARN_ON(!valid_dma_direction(direction)))
- return BAD_DMA_ADDRESS;
-
- return (u64)cpu_addr;
-}
-
-static void rvt_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- /* This is a stub, nothing to be done here */
-}
-
-static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction)
-{
- u64 addr;
-
- if (WARN_ON(!valid_dma_direction(direction)))
- return BAD_DMA_ADDRESS;
-
- addr = (u64)page_address(page);
- if (addr)
- addr += offset;
-
- return addr;
-}
-
-static void rvt_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- /* This is a stub, nothing to be done here */
-}
-
-static int rvt_map_sg(struct ib_device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction)
-{
- struct scatterlist *sg;
- u64 addr;
- int i;
- int ret = nents;
-
- if (WARN_ON(!valid_dma_direction(direction)))
- return 0;
-
- for_each_sg(sgl, sg, nents, i) {
- addr = (u64)page_address(sg_page(sg));
- if (!addr) {
- ret = 0;
- break;
- }
- sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
- sg->dma_length = sg->length;
-#endif
- }
- return ret;
-}
-
-static void rvt_unmap_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- /* This is a stub, nothing to be done here */
-}
-
-static int rvt_map_sg_attrs(struct ib_device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction,
- unsigned long attrs)
-{
- return rvt_map_sg(dev, sgl, nents, direction);
-}
-
-static void rvt_unmap_sg_attrs(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- return rvt_unmap_sg(dev, sg, nents, direction);
-}
-
-static void rvt_sync_single_for_cpu(struct ib_device *dev, u64 addr,
- size_t size, enum dma_data_direction dir)
-{
-}
-
-static void rvt_sync_single_for_device(struct ib_device *dev, u64 addr,
- size_t size,
- enum dma_data_direction dir)
-{
-}
-
-static void *rvt_dma_alloc_coherent(struct ib_device *dev, size_t size,
- u64 *dma_handle, gfp_t flag)
-{
- struct page *p;
- void *addr = NULL;
-
- p = alloc_pages(flag, get_order(size));
- if (p)
- addr = page_address(p);
- if (dma_handle)
- *dma_handle = (u64)addr;
- return addr;
-}
-
-static void rvt_dma_free_coherent(struct ib_device *dev, size_t size,
- void *cpu_addr, u64 dma_handle)
-{
- free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops rvt_default_dma_mapping_ops = {
- .mapping_error = rvt_mapping_error,
- .map_single = rvt_dma_map_single,
- .unmap_single = rvt_dma_unmap_single,
- .map_page = rvt_dma_map_page,
- .unmap_page = rvt_dma_unmap_page,
- .map_sg = rvt_map_sg,
- .unmap_sg = rvt_unmap_sg,
- .map_sg_attrs = rvt_map_sg_attrs,
- .unmap_sg_attrs = rvt_unmap_sg_attrs,
- .sync_single_for_cpu = rvt_sync_single_for_cpu,
- .sync_single_for_device = rvt_sync_single_for_device,
- .alloc_coherent = rvt_dma_alloc_coherent,
- .free_coherent = rvt_dma_free_coherent
-};
diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c
index f6e99778d7ca..d6981dc04adb 100644
--- a/drivers/infiniband/sw/rdmavt/mad.c
+++ b/drivers/infiniband/sw/rdmavt/mad.c
@@ -74,9 +74,9 @@ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
u16 *out_mad_pkey_index)
{
/*
- * MAD processing is quite different between hfi1 and qib. Therfore this
- * is expected to be provided by the driver. Other drivers in the future
- * may chose to implement this but it should not be made into a
+ * MAD processing is quite different between hfi1 and qib. Therefore
+ * this is expected to be provided by the driver. Other drivers in the
+ * future may choose to implement this but it should not be made into a
* requirement.
*/
if (ibport_num_to_idx(ibdev, port_num) < 0)
@@ -160,7 +160,7 @@ void rvt_free_mad_agents(struct rvt_dev_info *rdi)
ib_unregister_mad_agent(agent);
}
if (rvp->sm_ah) {
- ib_destroy_ah(&rvp->sm_ah->ibah);
+ rdma_destroy_ah(&rvp->sm_ah->ibah);
rvp->sm_ah = NULL;
}
diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
index 05c8c2afb0e3..1f12b69a0d07 100644
--- a/drivers/infiniband/sw/rdmavt/mcast.c
+++ b/drivers/infiniband/sw/rdmavt/mcast.c
@@ -100,10 +100,11 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp)
/**
* mcast_alloc - allocate the multicast GID structure
* @mgid: the multicast GID
+ * @lid: the muilticast LID (host order)
*
* A list of QPs will be attached to this structure.
*/
-static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid)
+static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid, u16 lid)
{
struct rvt_mcast *mcast;
@@ -111,7 +112,9 @@ static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid)
if (!mcast)
goto bail;
- mcast->mgid = *mgid;
+ mcast->mcast_addr.mgid = *mgid;
+ mcast->mcast_addr.lid = lid;
+
INIT_LIST_HEAD(&mcast->qp_list);
init_waitqueue_head(&mcast->wait);
atomic_set(&mcast->refcount, 0);
@@ -131,15 +134,19 @@ static void rvt_mcast_free(struct rvt_mcast *mcast)
}
/**
- * rvt_mcast_find - search the global table for the given multicast GID
+ * rvt_mcast_find - search the global table for the given multicast GID/LID
+ * NOTE: It is valid to have 1 MLID with multiple MGIDs. It is not valid
+ * to have 1 MGID with multiple MLIDs.
* @ibp: the IB port structure
* @mgid: the multicast GID to search for
+ * @lid: the multicast LID portion of the multicast address (host order)
*
* The caller is responsible for decrementing the reference count if found.
*
* Return: NULL if not found.
*/
-struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid)
+struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid,
+ u16 lid)
{
struct rb_node *n;
unsigned long flags;
@@ -153,15 +160,18 @@ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid)
mcast = rb_entry(n, struct rvt_mcast, rb_node);
- ret = memcmp(mgid->raw, mcast->mgid.raw,
- sizeof(union ib_gid));
+ ret = memcmp(mgid->raw, mcast->mcast_addr.mgid.raw,
+ sizeof(*mgid));
if (ret < 0) {
n = n->rb_left;
} else if (ret > 0) {
n = n->rb_right;
} else {
- atomic_inc(&mcast->refcount);
- found = mcast;
+ /* MGID/MLID must match */
+ if (mcast->mcast_addr.lid == lid) {
+ atomic_inc(&mcast->refcount);
+ found = mcast;
+ }
break;
}
}
@@ -177,7 +187,8 @@ EXPORT_SYMBOL(rvt_mcast_find);
*
* Return: zero if both were added. Return EEXIST if the GID was already in
* the table but the QP was added. Return ESRCH if the QP was already
- * attached and neither structure was added.
+ * attached and neither structure was added. Return EINVAL if the MGID was
+ * found, but the MLID did NOT match.
*/
static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp,
struct rvt_mcast *mcast, struct rvt_mcast_qp *mqp)
@@ -195,8 +206,9 @@ static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp,
pn = *n;
tmcast = rb_entry(pn, struct rvt_mcast, rb_node);
- ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
- sizeof(union ib_gid));
+ ret = memcmp(mcast->mcast_addr.mgid.raw,
+ tmcast->mcast_addr.mgid.raw,
+ sizeof(mcast->mcast_addr.mgid));
if (ret < 0) {
n = &pn->rb_left;
continue;
@@ -206,6 +218,11 @@ static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp,
continue;
}
+ if (tmcast->mcast_addr.lid != mcast->mcast_addr.lid) {
+ ret = EINVAL;
+ goto bail;
+ }
+
/* Search the QP list to see if this is already there. */
list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
if (p->qp == mqp->qp) {
@@ -276,7 +293,7 @@ int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
* Allocate data structures since its better to do this outside of
* spin locks and it will most likely be needed.
*/
- mcast = rvt_mcast_alloc(gid);
+ mcast = rvt_mcast_alloc(gid, lid);
if (!mcast)
return -ENOMEM;
@@ -296,6 +313,10 @@ int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
/* Exceeded the maximum number of mcast groups. */
ret = -ENOMEM;
goto bail_mqp;
+ case EINVAL:
+ /* Invalid MGID/MLID pair */
+ ret = -EINVAL;
+ goto bail_mqp;
default:
break;
}
@@ -344,14 +365,20 @@ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
mcast = rb_entry(n, struct rvt_mcast, rb_node);
- ret = memcmp(gid->raw, mcast->mgid.raw,
- sizeof(union ib_gid));
- if (ret < 0)
+ ret = memcmp(gid->raw, mcast->mcast_addr.mgid.raw,
+ sizeof(*gid));
+ if (ret < 0) {
n = n->rb_left;
- else if (ret > 0)
+ } else if (ret > 0) {
n = n->rb_right;
- else
+ } else {
+ /* MGID/MLID must match */
+ if (mcast->mcast_addr.lid != lid) {
+ spin_unlock_irq(&ibp->lock);
+ return -EINVAL;
+ }
break;
+ }
}
/* Search the QP list. */
diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c
index e202b8142759..6b712eecbd37 100644
--- a/drivers/infiniband/sw/rdmavt/mmap.c
+++ b/drivers/infiniband/sw/rdmavt/mmap.c
@@ -170,9 +170,9 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi,
spin_lock_irq(&rdi->mmap_offset_lock);
if (rdi->mmap_offset == 0)
- rdi->mmap_offset = PAGE_SIZE;
+ rdi->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA);
ip->offset = rdi->mmap_offset;
- rdi->mmap_offset += size;
+ rdi->mmap_offset += ALIGN(size, SHMLBA);
spin_unlock_irq(&rdi->mmap_offset_lock);
INIT_LIST_HEAD(&ip->pending_mmaps);
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 52fd15276ee6..aa5f9ea318e4 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -120,10 +120,19 @@ static void rvt_deinit_mregion(struct rvt_mregion *mr)
mr->mapsz = 0;
while (i)
kfree(mr->map[--i]);
+ percpu_ref_exit(&mr->refcount);
+}
+
+static void __rvt_mregion_complete(struct percpu_ref *ref)
+{
+ struct rvt_mregion *mr = container_of(ref, struct rvt_mregion,
+ refcount);
+
+ complete(&mr->comp);
}
static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
- int count)
+ int count, unsigned int percpu_flags)
{
int m, i = 0;
struct rvt_dev_info *dev = ib_to_rvt(pd->device);
@@ -133,19 +142,23 @@ static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
for (; i < m; i++) {
mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL,
dev->dparms.node);
- if (!mr->map[i]) {
- rvt_deinit_mregion(mr);
- return -ENOMEM;
- }
+ if (!mr->map[i])
+ goto bail;
mr->mapsz++;
}
init_completion(&mr->comp);
/* count returning the ptr to user */
- atomic_set(&mr->refcount, 1);
+ if (percpu_ref_init(&mr->refcount, &__rvt_mregion_complete,
+ percpu_flags, GFP_KERNEL))
+ goto bail;
+
atomic_set(&mr->lkey_invalid, 0);
mr->pd = pd;
mr->max_segs = count;
return 0;
+bail:
+ rvt_deinit_mregion(mr);
+ return -ENOMEM;
}
/**
@@ -178,10 +191,10 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
tmr = rcu_access_pointer(dev->dma_mr);
if (!tmr) {
- rcu_assign_pointer(dev->dma_mr, mr);
mr->lkey_published = 1;
- } else {
- rvt_put_mr(mr);
+ /* Insure published written first */
+ rcu_assign_pointer(dev->dma_mr, mr);
+ rvt_get_mr(mr);
}
goto success;
}
@@ -212,8 +225,9 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
mr->lkey |= 1 << 8;
rkt->gen++;
}
- rcu_assign_pointer(rkt->table[r], mr);
mr->lkey_published = 1;
+ /* Insure published written first */
+ rcu_assign_pointer(rkt->table[r], mr);
success:
spin_unlock_irqrestore(&rkt->lock, flags);
out:
@@ -239,22 +253,26 @@ static void rvt_free_lkey(struct rvt_mregion *mr)
int freed = 0;
spin_lock_irqsave(&rkt->lock, flags);
- if (!mr->lkey_published)
- goto out;
- if (lkey == 0) {
- RCU_INIT_POINTER(dev->dma_mr, NULL);
+ if (!lkey) {
+ if (mr->lkey_published) {
+ mr->lkey_published = 0;
+ /* insure published is written before pointer */
+ rcu_assign_pointer(dev->dma_mr, NULL);
+ rvt_put_mr(mr);
+ }
} else {
+ if (!mr->lkey_published)
+ goto out;
r = lkey >> (32 - dev->dparms.lkey_table_size);
- RCU_INIT_POINTER(rkt->table[r], NULL);
+ mr->lkey_published = 0;
+ /* insure published is written before pointer */
+ rcu_assign_pointer(rkt->table[r], NULL);
}
- mr->lkey_published = 0;
freed++;
out:
spin_unlock_irqrestore(&rkt->lock, flags);
- if (freed) {
- synchronize_rcu();
- rvt_put_mr(mr);
- }
+ if (freed)
+ percpu_ref_kill(&mr->refcount);
}
static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
@@ -269,7 +287,7 @@ static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
if (!mr)
goto bail;
- rval = rvt_init_mregion(&mr->mr, pd, count);
+ rval = rvt_init_mregion(&mr->mr, pd, count, 0);
if (rval)
goto bail;
/*
@@ -294,8 +312,8 @@ bail:
static void __rvt_free_mr(struct rvt_mr *mr)
{
- rvt_deinit_mregion(&mr->mr);
rvt_free_lkey(&mr->mr);
+ rvt_deinit_mregion(&mr->mr);
kfree(mr);
}
@@ -305,8 +323,8 @@ static void __rvt_free_mr(struct rvt_mr *mr)
* @acc: access flags
*
* Return: the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see dma.c).
+ * Note that all DMA addresses should be created via the functions in
+ * struct dma_virt_ops.
*/
struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
{
@@ -323,7 +341,7 @@ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
goto bail;
}
- rval = rvt_init_mregion(&mr->mr, pd, 0);
+ rval = rvt_init_mregion(&mr->mr, pd, 0, 0);
if (rval) {
ret = ERR_PTR(rval);
goto bail;
@@ -390,8 +408,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->mr.access_flags = mr_access_flags;
mr->umem = umem;
- if (is_power_of_2(umem->page_size))
- mr->mr.page_shift = ilog2(umem->page_size);
+ mr->mr.page_shift = umem->page_shift;
m = 0;
n = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
@@ -403,8 +420,9 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto bail_inval;
}
mr->mr.map[m]->segs[n].vaddr = vaddr;
- mr->mr.map[m]->segs[n].length = umem->page_size;
- trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size);
+ mr->mr.map[m]->segs[n].length = BIT(umem->page_shift);
+ trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr,
+ BIT(umem->page_shift));
n++;
if (n == RVT_SEGSZ) {
m++;
@@ -445,8 +463,8 @@ int rvt_dereg_mr(struct ib_mr *ibmr)
timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ);
if (!timeout) {
rvt_pr_err(rdi,
- "rvt_dereg_mr timeout mr %p pd %p refcount %u\n",
- mr, mr->mr.pd, atomic_read(&mr->mr.refcount));
+ "rvt_dereg_mr timeout mr %p pd %p\n",
+ mr, mr->mr.pd);
rvt_get_mr(&mr->mr);
ret = -EBUSY;
goto out;
@@ -623,7 +641,8 @@ struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
if (!fmr)
goto bail;
- rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages);
+ rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages,
+ PERCPU_REF_INIT_ATOMIC);
if (rval)
goto bail;
@@ -674,11 +693,12 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
struct rvt_fmr *fmr = to_ifmr(ibfmr);
struct rvt_lkey_table *rkt;
unsigned long flags;
- int m, n, i;
+ int m, n;
+ unsigned long i;
u32 ps;
struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device);
- i = atomic_read(&fmr->mr.refcount);
+ i = atomic_long_read(&fmr->mr.refcount.count);
if (i > 2)
return -EBUSY;
@@ -782,7 +802,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
/*
* We use LKEY == zero for kernel virtual addresses
- * (see rvt_get_dma_mr and dma.c).
+ * (see rvt_get_dma_mr() and dma_virt_ops).
*/
rcu_read_lock();
if (sge->lkey == 0) {
@@ -805,16 +825,21 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
goto ok;
}
mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
- if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
- mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
+ if (!mr)
goto bail;
+ rvt_get_mr(mr);
+ if (!READ_ONCE(mr->lkey_published))
+ goto bail_unref;
+
+ if (unlikely(atomic_read(&mr->lkey_invalid) ||
+ mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
+ goto bail_unref;
off = sge->addr - mr->user_base;
if (unlikely(sge->addr < mr->user_base ||
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc))
- goto bail;
- rvt_get_mr(mr);
+ goto bail_unref;
rcu_read_unlock();
off += mr->offset;
@@ -850,6 +875,8 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
isge->n = n;
ok:
return 1;
+bail_unref:
+ rvt_put_mr(mr);
bail:
rcu_read_unlock();
return 0;
@@ -880,7 +907,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
/*
* We use RKEY == zero for kernel virtual addresses
- * (see rvt_get_dma_mr and dma.c).
+ * (see rvt_get_dma_mr() and dma_virt_ops).
*/
rcu_read_lock();
if (rkey == 0) {
@@ -905,15 +932,20 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
}
mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
- if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
- mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+ if (!mr)
goto bail;
+ rvt_get_mr(mr);
+ /* insure mr read is before test */
+ if (!READ_ONCE(mr->lkey_published))
+ goto bail_unref;
+ if (unlikely(atomic_read(&mr->lkey_invalid) ||
+ mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+ goto bail_unref;
off = vaddr - mr->iova;
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0))
- goto bail;
- rvt_get_mr(mr);
+ goto bail_unref;
rcu_read_unlock();
off += mr->offset;
@@ -949,6 +981,8 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
sge->n = n;
ok:
return 1;
+bail_unref:
+ rvt_put_mr(mr);
bail:
rcu_read_unlock();
return 0;
diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c
index d1292f324c67..8a89afff3363 100644
--- a/drivers/infiniband/sw/rdmavt/pd.c
+++ b/drivers/infiniband/sw/rdmavt/pd.c
@@ -90,7 +90,7 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
spin_unlock(&dev->n_pds_lock);
/* ib_alloc_pd() will initialize pd->ibpd. */
- pd->user = udata ? 1 : 0;
+ pd->user = !!udata;
ret = &pd->ibpd;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 2a13ac660f2b..8876ee7bc326 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -51,10 +51,51 @@
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_hdrs.h>
#include "qp.h"
#include "vt.h"
#include "trace.h"
+static void rvt_rc_timeout(unsigned long arg);
+
+/*
+ * Convert the AETH RNR timeout code into the number of microseconds.
+ */
+static const u32 ib_rvt_rnr_table[32] = {
+ 655360, /* 00: 655.36 */
+ 10, /* 01: .01 */
+ 20, /* 02 .02 */
+ 30, /* 03: .03 */
+ 40, /* 04: .04 */
+ 60, /* 05: .06 */
+ 80, /* 06: .08 */
+ 120, /* 07: .12 */
+ 160, /* 08: .16 */
+ 240, /* 09: .24 */
+ 320, /* 0A: .32 */
+ 480, /* 0B: .48 */
+ 640, /* 0C: .64 */
+ 960, /* 0D: .96 */
+ 1280, /* 0E: 1.28 */
+ 1920, /* 0F: 1.92 */
+ 2560, /* 10: 2.56 */
+ 3840, /* 11: 3.84 */
+ 5120, /* 12: 5.12 */
+ 7680, /* 13: 7.68 */
+ 10240, /* 14: 10.24 */
+ 15360, /* 15: 15.36 */
+ 20480, /* 16: 20.48 */
+ 30720, /* 17: 30.72 */
+ 40960, /* 18: 40.96 */
+ 61440, /* 19: 61.44 */
+ 81920, /* 1A: 81.92 */
+ 122880, /* 1B: 122.88 */
+ 163840, /* 1C: 163.84 */
+ 245760, /* 1D: 245.76 */
+ 327680, /* 1E: 327.68 */
+ 491520 /* 1F: 491.52 */
+};
+
/*
* Note that it is OK to post send work requests in the SQE and ERR
* states; rvt_do_send() will process them and generate error
@@ -76,28 +117,10 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
};
EXPORT_SYMBOL(ib_rvt_state_ops);
-/*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_rvt_wc_opcode[] = {
- [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
- [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
- [IB_WR_SEND] = IB_WC_SEND,
- [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
- [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
- [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
- [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
- [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
- [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
- [IB_WR_REG_MR] = IB_WC_REG_MR
-};
-EXPORT_SYMBOL(ib_rvt_wc_opcode);
-
static void get_map_page(struct rvt_qpn_table *qpt,
- struct rvt_qpn_map *map,
- gfp_t gfp)
+ struct rvt_qpn_map *map)
{
- unsigned long page = get_zeroed_page(gfp);
+ unsigned long page = get_zeroed_page(GFP_KERNEL);
/*
* Free the page if someone raced with us installing it.
@@ -149,7 +172,7 @@ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt)
rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end);
for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) {
if (!map->page) {
- get_map_page(qpt, map, GFP_KERNEL);
+ get_map_page(qpt, map);
if (!map->page) {
ret = -ENOMEM;
break;
@@ -200,7 +223,8 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi)
if (!rdi->driver_f.free_all_qps ||
!rdi->driver_f.qp_priv_alloc ||
!rdi->driver_f.qp_priv_free ||
- !rdi->driver_f.notify_qp_reset)
+ !rdi->driver_f.notify_qp_reset ||
+ !rdi->driver_f.notify_restart_rc)
return -EINVAL;
/* allocate parent object */
@@ -317,14 +341,14 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
* Return: The queue pair number
*/
static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
- enum ib_qp_type type, u8 port_num, gfp_t gfp)
+ enum ib_qp_type type, u8 port_num)
{
u32 i, offset, max_scan, qpn;
struct rvt_qpn_map *map;
u32 ret;
if (rdi->driver_f.alloc_qpn)
- return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, gfp);
+ return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num);
if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
unsigned n;
@@ -349,7 +373,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
max_scan = qpt->nmaps - !offset;
for (i = 0;;) {
if (unlikely(!map->page)) {
- get_map_page(qpt, map, gfp);
+ get_map_page(qpt, map);
if (unlikely(!map->page))
break;
}
@@ -587,6 +611,7 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
/* Let drivers flush their waitlist */
rdi->driver_f.flush_qp_waiters(qp);
+ rvt_stop_rc_timers(qp);
qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
spin_unlock(&qp->s_lock);
spin_unlock(&qp->s_hlock);
@@ -594,7 +619,7 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
/* Stop the send queue and the retry timer */
rdi->driver_f.stop_send_queue(qp);
-
+ rvt_del_timers_sync(qp);
/* Wait for things to stop */
rdi->driver_f.quiesce_qp(qp);
@@ -646,7 +671,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
struct ib_qp *ret = ERR_PTR(-ENOMEM);
struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
void *priv = NULL;
- gfp_t gfp;
size_t sqsize;
if (!rdi)
@@ -654,18 +678,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
- init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
+ init_attr->create_flags)
return ERR_PTR(-EINVAL);
- /* GFP_NOIO is applicable to RC QP's only */
-
- if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
- init_attr->qp_type != IB_QPT_RC)
- return ERR_PTR(-EINVAL);
-
- gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
- GFP_NOIO : GFP_KERNEL;
-
/* Check receive queue parameters if no SRQ is specified. */
if (!init_attr->srq) {
if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
@@ -693,14 +708,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
sz = sizeof(struct rvt_sge) *
init_attr->cap.max_send_sge +
sizeof(struct rvt_swqe);
- if (gfp == GFP_NOIO)
- swq = __vmalloc(
- sqsize * sz,
- gfp | __GFP_ZERO, PAGE_KERNEL);
- else
- swq = vzalloc_node(
- sqsize * sz,
- rdi->dparms.node);
+ swq = vzalloc_node(sqsize * sz, rdi->dparms.node);
if (!swq)
return ERR_PTR(-ENOMEM);
@@ -715,7 +723,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
} else if (init_attr->cap.max_recv_sge > 1)
sg_list_sz = sizeof(*qp->r_sg_list) *
(init_attr->cap.max_recv_sge - 1);
- qp = kzalloc_node(sz + sg_list_sz, gfp, rdi->dparms.node);
+ qp = kzalloc_node(sz + sg_list_sz, GFP_KERNEL,
+ rdi->dparms.node);
if (!qp)
goto bail_swq;
@@ -725,17 +734,22 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
kzalloc_node(
sizeof(*qp->s_ack_queue) *
rvt_max_atomic(rdi),
- gfp,
+ GFP_KERNEL,
rdi->dparms.node);
if (!qp->s_ack_queue)
goto bail_qp;
}
+ /* initialize timers needed for rc qp */
+ setup_timer(&qp->s_timer, rvt_rc_timeout, (unsigned long)qp);
+ hrtimer_init(&qp->s_rnr_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ qp->s_rnr_timer.function = rvt_rc_rnr_retry;
/*
* Driver needs to set up it's private QP structure and do any
* initialization that is needed.
*/
- priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp);
+ priv = rdi->driver_f.qp_priv_alloc(rdi, qp);
if (IS_ERR(priv)) {
ret = priv;
goto bail_qp;
@@ -755,11 +769,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
qp->r_rq.wq = vmalloc_user(
sizeof(struct rvt_rwq) +
qp->r_rq.size * sz);
- else if (gfp == GFP_NOIO)
- qp->r_rq.wq = __vmalloc(
- sizeof(struct rvt_rwq) +
- qp->r_rq.size * sz,
- gfp | __GFP_ZERO, PAGE_KERNEL);
else
qp->r_rq.wq = vzalloc_node(
sizeof(struct rvt_rwq) +
@@ -793,7 +802,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
init_attr->qp_type,
- init_attr->port_num, gfp);
+ init_attr->port_num);
if (err < 0) {
ret = ERR_PTR(err);
goto bail_rq_wq;
@@ -1073,14 +1082,15 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto inval;
if (attr_mask & IB_QP_AV) {
- if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
+ if (rdma_ah_get_dlid(&attr->ah_attr) >=
+ be16_to_cpu(IB_MULTICAST_LID_BASE))
goto inval;
if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr))
goto inval;
}
if (attr_mask & IB_QP_ALT_PATH) {
- if (attr->alt_ah_attr.dlid >=
+ if (rdma_ah_get_dlid(&attr->alt_ah_attr) >=
be16_to_cpu(IB_MULTICAST_LID_BASE))
goto inval;
if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
@@ -1209,7 +1219,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_AV) {
qp->remote_ah_attr = attr->ah_attr;
- qp->s_srate = attr->ah_attr.static_rate;
+ qp->s_srate = rdma_ah_get_static_rate(&attr->ah_attr);
qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
}
@@ -1222,7 +1232,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp->s_mig_state = attr->path_mig_state;
if (mig) {
qp->remote_ah_attr = qp->alt_ah_attr;
- qp->port_num = qp->alt_ah_attr.port_num;
+ qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
qp->s_pkey_index = qp->s_alt_pkey_index;
}
}
@@ -1248,9 +1258,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_TIMEOUT) {
qp->timeout = attr->timeout;
- qp->timeout_jiffies =
- usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
- 1000UL);
+ qp->timeout_jiffies = rvt_timeout_to_jiffies(qp->timeout);
}
if (attr_mask & IB_QP_QKEY)
@@ -1393,7 +1401,8 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->timeout = qp->timeout;
attr->retry_cnt = qp->s_retry_cnt;
attr->rnr_retry = qp->s_rnr_retry_cnt;
- attr->alt_port_num = qp->alt_ah_attr.port_num;
+ attr->alt_port_num =
+ rdma_ah_get_port_num(&qp->alt_ah_attr);
attr->alt_timeout = qp->alt_timeout;
init_attr->event_handler = qp->ibqp.event_handler;
@@ -1741,11 +1750,14 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
0);
qp->s_next_psn = wqe->lpsn + 1;
}
- trace_rvt_post_one_wr(qp, wqe);
- if (unlikely(reserved_op))
+ if (unlikely(reserved_op)) {
+ wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
rvt_qp_wqe_reserve(qp, wqe);
- else
+ } else {
+ wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
qp->s_avail--;
+ }
+ trace_rvt_post_one_wr(qp, wqe);
smp_wmb(); /* see request builders */
qp->s_head = next;
@@ -1868,3 +1880,188 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
}
return 0;
}
+
+/**
+ * qp_comm_est - handle trap with QP established
+ * @qp: the QP
+ */
+void rvt_comm_est(struct rvt_qp *qp)
+{
+ qp->r_flags |= RVT_R_COMM_EST;
+ if (qp->ibqp.event_handler) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_COMM_EST;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+}
+EXPORT_SYMBOL(rvt_comm_est);
+
+void rvt_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
+{
+ unsigned long flags;
+ int lastwqe;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ lastwqe = rvt_error_qp(qp, err);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+}
+EXPORT_SYMBOL(rvt_rc_error);
+
+/*
+ * rvt_rnr_tbl_to_usec - return index into ib_rvt_rnr_table
+ * @index - the index
+ * return usec from an index into ib_rvt_rnr_table
+ */
+unsigned long rvt_rnr_tbl_to_usec(u32 index)
+{
+ return ib_rvt_rnr_table[(index & IB_AETH_CREDIT_MASK)];
+}
+EXPORT_SYMBOL(rvt_rnr_tbl_to_usec);
+
+static inline unsigned long rvt_aeth_to_usec(u32 aeth)
+{
+ return ib_rvt_rnr_table[(aeth >> IB_AETH_CREDIT_SHIFT) &
+ IB_AETH_CREDIT_MASK];
+}
+
+/*
+ * rvt_add_retry_timer - add/start a retry timer
+ * @qp - the QP
+ * add a retry timer on the QP
+ */
+void rvt_add_retry_timer(struct rvt_qp *qp)
+{
+ struct ib_qp *ibqp = &qp->ibqp;
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+ lockdep_assert_held(&qp->s_lock);
+ qp->s_flags |= RVT_S_TIMER;
+ /* 4.096 usec. * (1 << qp->timeout) */
+ qp->s_timer.expires = jiffies + qp->timeout_jiffies +
+ rdi->busy_jiffies;
+ add_timer(&qp->s_timer);
+}
+EXPORT_SYMBOL(rvt_add_retry_timer);
+
+/**
+ * rvt_add_rnr_timer - add/start an rnr timer
+ * @qp - the QP
+ * @aeth - aeth of RNR timeout, simulated aeth for loopback
+ * add an rnr timer on the QP
+ */
+void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth)
+{
+ u32 to;
+
+ lockdep_assert_held(&qp->s_lock);
+ qp->s_flags |= RVT_S_WAIT_RNR;
+ to = rvt_aeth_to_usec(aeth);
+ hrtimer_start(&qp->s_rnr_timer,
+ ns_to_ktime(1000 * to), HRTIMER_MODE_REL);
+}
+EXPORT_SYMBOL(rvt_add_rnr_timer);
+
+/**
+ * rvt_stop_rc_timers - stop all timers
+ * @qp - the QP
+ * stop any pending timers
+ */
+void rvt_stop_rc_timers(struct rvt_qp *qp)
+{
+ lockdep_assert_held(&qp->s_lock);
+ /* Remove QP from all timers */
+ if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+ qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
+ del_timer(&qp->s_timer);
+ hrtimer_try_to_cancel(&qp->s_rnr_timer);
+ }
+}
+EXPORT_SYMBOL(rvt_stop_rc_timers);
+
+/**
+ * rvt_stop_rnr_timer - stop an rnr timer
+ * @qp - the QP
+ *
+ * stop an rnr timer and return if the timer
+ * had been pending.
+ */
+static int rvt_stop_rnr_timer(struct rvt_qp *qp)
+{
+ int rval = 0;
+
+ lockdep_assert_held(&qp->s_lock);
+ /* Remove QP from rnr timer */
+ if (qp->s_flags & RVT_S_WAIT_RNR) {
+ qp->s_flags &= ~RVT_S_WAIT_RNR;
+ rval = hrtimer_try_to_cancel(&qp->s_rnr_timer);
+ }
+ return rval;
+}
+
+/**
+ * rvt_del_timers_sync - wait for any timeout routines to exit
+ * @qp - the QP
+ */
+void rvt_del_timers_sync(struct rvt_qp *qp)
+{
+ del_timer_sync(&qp->s_timer);
+ hrtimer_cancel(&qp->s_rnr_timer);
+}
+EXPORT_SYMBOL(rvt_del_timers_sync);
+
+/**
+ * This is called from s_timer for missing responses.
+ */
+static void rvt_rc_timeout(unsigned long arg)
+{
+ struct rvt_qp *qp = (struct rvt_qp *)arg;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->r_lock, flags);
+ spin_lock(&qp->s_lock);
+ if (qp->s_flags & RVT_S_TIMER) {
+ struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
+
+ qp->s_flags &= ~RVT_S_TIMER;
+ rvp->n_rc_timeouts++;
+ del_timer(&qp->s_timer);
+ trace_rvt_rc_timeout(qp, qp->s_last_psn + 1);
+ if (rdi->driver_f.notify_restart_rc)
+ rdi->driver_f.notify_restart_rc(qp,
+ qp->s_last_psn + 1,
+ 1);
+ rdi->driver_f.schedule_send(qp);
+ }
+ spin_unlock(&qp->s_lock);
+ spin_unlock_irqrestore(&qp->r_lock, flags);
+}
+
+/*
+ * This is called from s_timer for RNR timeouts.
+ */
+enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t)
+{
+ struct rvt_qp *qp = container_of(t, struct rvt_qp, s_rnr_timer);
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ rvt_stop_rnr_timer(qp);
+ rdi->driver_f.schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return HRTIMER_NORESTART;
+}
+EXPORT_SYMBOL(rvt_rc_rnr_retry);
diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c
new file mode 100644
index 000000000000..6131cc558bdb
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/rc.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+#include <rdma/ib_hdrs.h>
+
+/*
+ * Convert the AETH credit code into the number of credits.
+ */
+static const u16 credit_table[31] = {
+ 0, /* 0 */
+ 1, /* 1 */
+ 2, /* 2 */
+ 3, /* 3 */
+ 4, /* 4 */
+ 6, /* 5 */
+ 8, /* 6 */
+ 12, /* 7 */
+ 16, /* 8 */
+ 24, /* 9 */
+ 32, /* A */
+ 48, /* B */
+ 64, /* C */
+ 96, /* D */
+ 128, /* E */
+ 192, /* F */
+ 256, /* 10 */
+ 384, /* 11 */
+ 512, /* 12 */
+ 768, /* 13 */
+ 1024, /* 14 */
+ 1536, /* 15 */
+ 2048, /* 16 */
+ 3072, /* 17 */
+ 4096, /* 18 */
+ 6144, /* 19 */
+ 8192, /* 1A */
+ 12288, /* 1B */
+ 16384, /* 1C */
+ 24576, /* 1D */
+ 32768 /* 1E */
+};
+
+/**
+ * rvt_compute_aeth - compute the AETH (syndrome + MSN)
+ * @qp: the queue pair to compute the AETH for
+ *
+ * Returns the AETH.
+ */
+__be32 rvt_compute_aeth(struct rvt_qp *qp)
+{
+ u32 aeth = qp->r_msn & IB_MSN_MASK;
+
+ if (qp->ibqp.srq) {
+ /*
+ * Shared receive queues don't generate credits.
+ * Set the credit field to the invalid value.
+ */
+ aeth |= IB_AETH_CREDIT_INVAL << IB_AETH_CREDIT_SHIFT;
+ } else {
+ u32 min, max, x;
+ u32 credits;
+ struct rvt_rwq *wq = qp->r_rq.wq;
+ u32 head;
+ u32 tail;
+
+ /* sanity check pointers before trusting them */
+ head = wq->head;
+ if (head >= qp->r_rq.size)
+ head = 0;
+ tail = wq->tail;
+ if (tail >= qp->r_rq.size)
+ tail = 0;
+ /*
+ * Compute the number of credits available (RWQEs).
+ * There is a small chance that the pair of reads are
+ * not atomic, which is OK, since the fuzziness is
+ * resolved as further ACKs go out.
+ */
+ credits = head - tail;
+ if ((int)credits < 0)
+ credits += qp->r_rq.size;
+ /*
+ * Binary search the credit table to find the code to
+ * use.
+ */
+ min = 0;
+ max = 31;
+ for (;;) {
+ x = (min + max) / 2;
+ if (credit_table[x] == credits)
+ break;
+ if (credit_table[x] > credits) {
+ max = x;
+ } else {
+ if (min == x)
+ break;
+ min = x;
+ }
+ }
+ aeth |= x << IB_AETH_CREDIT_SHIFT;
+ }
+ return cpu_to_be32(aeth);
+}
+EXPORT_SYMBOL(rvt_compute_aeth);
+
+/**
+ * rvt_get_credit - flush the send work queue of a QP
+ * @qp: the qp who's send work queue to flush
+ * @aeth: the Acknowledge Extended Transport Header
+ *
+ * The QP s_lock should be held.
+ */
+void rvt_get_credit(struct rvt_qp *qp, u32 aeth)
+{
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+ u32 credit = (aeth >> IB_AETH_CREDIT_SHIFT) & IB_AETH_CREDIT_MASK;
+
+ lockdep_assert_held(&qp->s_lock);
+ /*
+ * If the credit is invalid, we can send
+ * as many packets as we like. Otherwise, we have to
+ * honor the credit field.
+ */
+ if (credit == IB_AETH_CREDIT_INVAL) {
+ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
+ qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
+ if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+ qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
+ rdi->driver_f.schedule_send(qp);
+ }
+ }
+ } else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
+ /* Compute new LSN (i.e., MSN + credit) */
+ credit = (aeth + credit_table[credit]) & IB_MSN_MASK;
+ if (rvt_cmp_msn(credit, qp->s_lsn) > 0) {
+ qp->s_lsn = credit;
+ if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+ qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
+ rdi->driver_f.schedule_send(qp);
+ }
+ }
+ }
+}
+EXPORT_SYMBOL(rvt_get_credit);
diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h
index e2d23acb6a7d..bb4b1e710f22 100644
--- a/drivers/infiniband/sw/rdmavt/trace.h
+++ b/drivers/infiniband/sw/rdmavt/trace.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -52,3 +52,5 @@
#include "trace_qp.h"
#include "trace_tx.h"
#include "trace_mr.h"
+#include "trace_cq.h"
+#include "trace_rc.h"
diff --git a/drivers/infiniband/sw/rdmavt/trace_cq.h b/drivers/infiniband/sw/rdmavt/trace_cq.h
new file mode 100644
index 000000000000..a315850aa9bb
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_cq.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_CQ_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_CQ_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdmavt_cq.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_cq
+
+#define wc_opcode_name(opcode) { IB_WC_##opcode, #opcode }
+#define show_wc_opcode(opcode) \
+__print_symbolic(opcode, \
+ wc_opcode_name(SEND), \
+ wc_opcode_name(RDMA_WRITE), \
+ wc_opcode_name(RDMA_READ), \
+ wc_opcode_name(COMP_SWAP), \
+ wc_opcode_name(FETCH_ADD), \
+ wc_opcode_name(LSO), \
+ wc_opcode_name(LOCAL_INV), \
+ wc_opcode_name(REG_MR), \
+ wc_opcode_name(MASKED_COMP_SWAP), \
+ wc_opcode_name(RECV), \
+ wc_opcode_name(RECV_RDMA_WITH_IMM))
+
+#define CQ_PRN \
+"[%s] idx %u wr_id %llx status %u opcode %u,%s length %u qpn %x"
+
+DECLARE_EVENT_CLASS(
+ rvt_cq_entry_template,
+ TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
+ TP_ARGS(cq, wc, idx),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(cq->rdi)
+ __field(u64, wr_id)
+ __field(u32, status)
+ __field(u32, opcode)
+ __field(u32, qpn)
+ __field(u32, length)
+ __field(u32, idx)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(cq->rdi)
+ __entry->wr_id = wc->wr_id;
+ __entry->status = wc->status;
+ __entry->opcode = wc->opcode;
+ __entry->length = wc->byte_len;
+ __entry->qpn = wc->qp->qp_num;
+ __entry->idx = idx;
+ ),
+ TP_printk(
+ CQ_PRN,
+ __get_str(dev),
+ __entry->idx,
+ __entry->wr_id,
+ __entry->status,
+ __entry->opcode, show_wc_opcode(__entry->opcode),
+ __entry->length,
+ __entry->qpn
+ )
+);
+
+DEFINE_EVENT(
+ rvt_cq_entry_template, rvt_cq_enter,
+ TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
+ TP_ARGS(cq, wc, idx));
+
+DEFINE_EVENT(
+ rvt_cq_entry_template, rvt_cq_poll,
+ TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
+ TP_ARGS(cq, wc, idx));
+
+#endif /* __RVT_TRACE_CQ_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_cq
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/sw/rdmavt/dma.h b/drivers/infiniband/sw/rdmavt/trace_rc.h
index 979f07e09195..995276933a55 100644
--- a/drivers/infiniband/sw/rdmavt/dma.h
+++ b/drivers/infiniband/sw/rdmavt/trace_rc.h
@@ -1,8 +1,5 @@
-#ifndef DEF_RDMAVTDMA_H
-#define DEF_RDMAVTDMA_H
-
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -47,7 +44,66 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
+#if !defined(__RVT_TRACE_RC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_RC_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_rc
+
+DECLARE_EVENT_CLASS(rvt_rc_template,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, s_flags)
+ __field(u32, psn)
+ __field(u32, s_psn)
+ __field(u32, s_next_psn)
+ __field(u32, s_sending_psn)
+ __field(u32, s_sending_hpsn)
+ __field(u32, r_psn)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->s_flags = qp->s_flags;
+ __entry->psn = psn;
+ __entry->s_psn = qp->s_psn;
+ __entry->s_next_psn = qp->s_next_psn;
+ __entry->s_sending_psn = qp->s_sending_psn;
+ __entry->s_sending_hpsn = qp->s_sending_hpsn;
+ __entry->r_psn = qp->r_psn;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->s_flags,
+ __entry->psn,
+ __entry->s_psn,
+ __entry->s_next_psn,
+ __entry->s_sending_psn,
+ __entry->s_sending_hpsn,
+ __entry->r_psn
+ )
+);
+
+DEFINE_EVENT(rvt_rc_template, rvt_rc_timeout,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
-extern struct ib_dma_mapping_ops rvt_default_dma_mapping_ops;
+#endif /* __RVT_TRACE_RC_H */
-#endif /* DEF_RDMAVTDMA_H */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_rc
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h
index 0e03173662d8..a613a2223751 100644
--- a/drivers/infiniband/sw/rdmavt/trace_tx.h
+++ b/drivers/infiniband/sw/rdmavt/trace_tx.h
@@ -71,10 +71,20 @@ __print_symbolic(opcode, \
wr_opcode_name(RDMA_READ_WITH_INV), \
wr_opcode_name(LOCAL_INV), \
wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \
- wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
+ wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD), \
+ wr_opcode_name(RESERVED1), \
+ wr_opcode_name(RESERVED2), \
+ wr_opcode_name(RESERVED3), \
+ wr_opcode_name(RESERVED4), \
+ wr_opcode_name(RESERVED5), \
+ wr_opcode_name(RESERVED6), \
+ wr_opcode_name(RESERVED7), \
+ wr_opcode_name(RESERVED8), \
+ wr_opcode_name(RESERVED9), \
+ wr_opcode_name(RESERVED10))
#define POS_PRN \
-"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
+"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u"
TRACE_EVENT(
rvt_post_one_wr,
@@ -83,7 +93,9 @@ TRACE_EVENT(
TP_STRUCT__entry(
RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
__field(u64, wr_id)
+ __field(struct rvt_swqe *, wqe)
__field(u32, qpn)
+ __field(u32, qpt)
__field(u32, psn)
__field(u32, lpsn)
__field(u32, length)
@@ -92,11 +104,17 @@ TRACE_EVENT(
__field(u32, avail)
__field(u32, head)
__field(u32, last)
+ __field(u32, ssn)
+ __field(int, send_flags)
+ __field(pid_t, pid)
+ __field(int, num_sge)
),
TP_fast_assign(
RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->wqe = wqe;
__entry->wr_id = wqe->wr.wr_id;
__entry->qpn = qp->ibqp.qp_num;
+ __entry->qpt = qp->ibqp.qp_type;
__entry->psn = wqe->psn;
__entry->lpsn = wqe->lpsn;
__entry->length = wqe->length;
@@ -105,20 +123,30 @@ TRACE_EVENT(
__entry->avail = qp->s_avail;
__entry->head = qp->s_head;
__entry->last = qp->s_last;
+ __entry->pid = qp->pid;
+ __entry->ssn = wqe->ssn;
+ __entry->send_flags = wqe->wr.send_flags;
+ __entry->num_sge = wqe->wr.num_sge;
),
TP_printk(
POS_PRN,
__get_str(dev),
+ __entry->wqe,
__entry->wr_id,
+ __entry->send_flags,
__entry->qpn,
+ __entry->qpt,
__entry->psn,
__entry->lpsn,
+ __entry->ssn,
__entry->length,
__entry->opcode, show_wr_opcode(__entry->opcode),
__entry->size,
__entry->avail,
__entry->head,
- __entry->last
+ __entry->last,
+ __entry->pid,
+ __entry->num_sge
)
);
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index d430c2f7cec4..0d7c6bb551d9 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -47,6 +47,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
#include "vt.h"
#include "trace.h"
@@ -165,7 +166,7 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port_num,
return -EINVAL;
rvp = rdi->ports[port_index];
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->sm_lid = rvp->sm_lid;
props->sm_sl = rvp->sm_sl;
props->port_cap_flags = rvp->port_cap_flags;
@@ -326,13 +327,14 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num,
if (port_index < 0)
return -EINVAL;
- err = rvt_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = rdi->dparms.core_cap_flags;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = rdi->dparms.core_cap_flags;
immutable->max_mad_size = rdi->dparms.max_mad_size;
return 0;
@@ -777,8 +779,7 @@ int rvt_register_device(struct rvt_dev_info *rdi)
}
/* DMA Operations */
- rdi->ibdev.dma_ops =
- rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops;
+ rdi->ibdev.dev.dma_ops = rdi->ibdev.dev.dma_ops ? : &dma_virt_ops;
/* Protection Domain */
spin_lock_init(&rdi->n_pds_lock);
diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h
index 6b01eaa4461b..f363505312be 100644
--- a/drivers/infiniband/sw/rdmavt/vt.h
+++ b/drivers/infiniband/sw/rdmavt/vt.h
@@ -50,7 +50,6 @@
#include <rdma/rdma_vt.h>
#include <linux/pci.h>
-#include "dma.h"
#include "pd.h"
#include "qp.h"
#include "ah.h"
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index 1e4e628fe7b0..320bffc980d8 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -2,6 +2,8 @@ config RDMA_RXE
tristate "Software RDMA over Ethernet (RoCE) driver"
depends on INET && PCI && INFINIBAND
depends on NET_UDP_TUNNEL
+ depends on CRYPTO_CRC32
+ select DMA_VIRT_OPS
---help---
This driver implements the InfiniBand RDMA transport over
the Linux network stack. It enables a system with a
@@ -21,4 +23,4 @@ config RDMA_RXE
To configure and work with soft-RoCE driver please use the
following wiki page under "configure Soft-RoCE (RXE)" section:
- https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
+ https://github.com/linux-rdma/rdma-core/blob/master/Documentation/rxe.md
diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile
index 3b3fb9d1c470..3f12beb7076f 100644
--- a/drivers/infiniband/sw/rxe/Makefile
+++ b/drivers/infiniband/sw/rxe/Makefile
@@ -14,11 +14,11 @@ rdma_rxe-y := \
rxe_qp.o \
rxe_cq.o \
rxe_mr.o \
- rxe_dma.o \
rxe_opcode.o \
rxe_mmap.o \
rxe_icrc.o \
rxe_mcast.o \
rxe_task.o \
rxe_net.o \
- rxe_sysfs.o
+ rxe_sysfs.o \
+ rxe_hw_counters.o
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index ab6c3c25d7ff..c21c913f911a 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <net/addrconf.h>
#include "rxe.h"
#include "rxe_loc.h"
@@ -64,6 +65,8 @@ static void rxe_cleanup(struct rxe_dev *rxe)
rxe_pool_cleanup(&rxe->mc_elem_pool);
rxe_cleanup_ports(rxe);
+
+ crypto_free_shash(rxe->tfm);
}
/* called when all references have been dropped */
@@ -178,7 +181,8 @@ static int rxe_init_ports(struct rxe_dev *rxe)
return -ENOMEM;
port->pkey_tbl[0] = 0xffff;
- port->port_guid = rxe->ifc_ops->port_guid(rxe);
+ addrconf_addr_eui48((unsigned char *)&port->port_guid,
+ rxe->ndev->dev_addr);
spin_lock_init(&port->port_lock);
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index a696af81e4a5..1ac5b8551a4d 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -50,6 +50,7 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
+#include <crypto/hash.h>
#include "rxe_net.h"
#include "rxe_opcode.h"
@@ -64,6 +65,28 @@
#define RXE_ROCE_V2_SPORT (0xc000)
+static inline u32 rxe_crc32(struct rxe_dev *rxe,
+ u32 crc, void *next, size_t len)
+{
+ u32 retval;
+ int err;
+
+ SHASH_DESC_ON_STACK(shash, rxe->tfm);
+
+ shash->tfm = rxe->tfm;
+ shash->flags = 0;
+ *(u32 *)shash_desc_ctx(shash) = crc;
+ err = crypto_shash_update(shash, next, len);
+ if (unlikely(err)) {
+ pr_warn_ratelimited("failed crc calculation, err: %d\n", err);
+ return crc32_le(crc, next, len);
+ }
+
+ retval = *(u32 *)shash_desc_ctx(shash);
+ barrier_data(shash_desc_ctx(shash));
+ return retval;
+}
+
int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
int rxe_add(struct rxe_dev *rxe, unsigned int mtu);
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 604f6fee96bd..5bddf469361b 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -34,21 +34,22 @@
#include "rxe.h"
#include "rxe_loc.h"
-int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr)
+int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
{
struct rxe_port *port;
- if (attr->port_num != 1) {
- pr_info("invalid port_num = %d\n", attr->port_num);
+ if (rdma_ah_get_port_num(attr) != 1) {
+ pr_info("invalid port_num = %d\n", rdma_ah_get_port_num(attr));
return -EINVAL;
}
port = &rxe->port;
- if (attr->ah_flags & IB_AH_GRH) {
- if (attr->grh.sgid_index > port->attr.gid_tbl_len) {
- pr_info("invalid sgid index = %d\n",
- attr->grh.sgid_index);
+ if (rdma_ah_get_ah_flags(attr) & IB_AH_GRH) {
+ u8 sgid_index = rdma_ah_read_grh(attr)->sgid_index;
+
+ if (sgid_index > port->attr.gid_tbl_len) {
+ pr_info("invalid sgid index = %d\n", sgid_index);
return -EINVAL;
}
}
@@ -57,30 +58,33 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr)
}
int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num,
- struct rxe_av *av, struct ib_ah_attr *attr)
+ struct rxe_av *av, struct rdma_ah_attr *attr)
{
memset(av, 0, sizeof(*av));
- memcpy(&av->grh, &attr->grh, sizeof(attr->grh));
+ memcpy(&av->grh, rdma_ah_read_grh(attr),
+ sizeof(*rdma_ah_read_grh(attr)));
av->port_num = port_num;
return 0;
}
int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av,
- struct ib_ah_attr *attr)
+ struct rdma_ah_attr *attr)
{
- memcpy(&attr->grh, &av->grh, sizeof(av->grh));
- attr->port_num = av->port_num;
+ attr->type = RDMA_AH_ATTR_TYPE_ROCE;
+ memcpy(rdma_ah_retrieve_grh(attr), &av->grh, sizeof(av->grh));
+ rdma_ah_set_ah_flags(attr, IB_AH_GRH);
+ rdma_ah_set_port_num(attr, av->port_num);
return 0;
}
int rxe_av_fill_ip_info(struct rxe_dev *rxe,
struct rxe_av *av,
- struct ib_ah_attr *attr,
+ struct rdma_ah_attr *attr,
struct ib_gid_attr *sgid_attr,
union ib_gid *sgid)
{
rdma_gid2ip(&av->sgid_addr._sockaddr, sgid);
- rdma_gid2ip(&av->dgid_addr._sockaddr, &attr->grh.dgid);
+ rdma_gid2ip(&av->dgid_addr._sockaddr, &rdma_ah_read_grh(attr)->dgid);
av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid);
return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index d369f24425f9..9eb12c2e3c74 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -154,6 +154,8 @@ void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp,
skb_queue_tail(&qp->resp_pkts, skb);
must_sched = skb_queue_len(&qp->resp_pkts) > 1;
+ if (must_sched != 0)
+ rxe_counter_inc(rxe, RXE_CNT_COMPLETER_SCHED);
rxe_run_task(&qp->comp.task, must_sched);
}
@@ -236,6 +238,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
{
unsigned int mask = pkt->mask;
u8 syn;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
/* Check the sequence only */
switch (qp->comp.opcode) {
@@ -254,7 +257,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
}
break;
default:
- WARN_ON(1);
+ WARN_ON_ONCE(1);
}
/* Check operation validity. */
@@ -298,6 +301,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
return COMPST_WRITE_SEND;
case AETH_RNR_NAK:
+ rxe_counter_inc(rxe, RXE_CNT_RCV_RNR);
return COMPST_RNR_RETRY;
case AETH_NAK:
@@ -307,6 +311,8 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
* before
*/
if (psn_compare(pkt->psn, qp->comp.psn) > 0) {
+ rxe_counter_inc(rxe,
+ RXE_CNT_RCV_SEQ_ERR);
qp->comp.psn = pkt->psn;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
@@ -412,13 +418,21 @@ static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
}
}
+/*
+ * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS
+ * ---------8<---------8<-------------
+ * ...Note that if a completion error occurs, a Work Completion
+ * will always be generated, even if the signaling
+ * indicator requests an Unsignaled Completion.
+ * ---------8<---------8<-------------
+ */
static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
{
struct rxe_cqe cqe;
if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- (qp->req.state == QP_STATE_ERROR)) {
+ wqe->status != IB_WC_SUCCESS) {
make_send_cqe(qp, wqe, &cqe);
advance_consumer(qp->sq.queue);
rxe_cq_post(qp->scq, &cqe, 0);
@@ -503,57 +517,41 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,
return COMPST_GET_WQE;
}
-int rxe_completer(void *arg)
+static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
{
- struct rxe_qp *qp = (struct rxe_qp *)arg;
- struct rxe_send_wqe *wqe = wqe;
- struct sk_buff *skb = NULL;
- struct rxe_pkt_info *pkt = NULL;
- enum comp_state state;
-
- rxe_add_ref(qp);
-
- if (!qp->valid) {
- while ((skb = skb_dequeue(&qp->resp_pkts))) {
- rxe_drop_ref(qp);
- kfree_skb(skb);
- }
- skb = NULL;
- pkt = NULL;
-
- while (queue_head(qp->sq.queue))
- advance_consumer(qp->sq.queue);
+ struct sk_buff *skb;
+ struct rxe_send_wqe *wqe;
- goto exit;
+ while ((skb = skb_dequeue(&qp->resp_pkts))) {
+ rxe_drop_ref(qp);
+ kfree_skb(skb);
}
- if (qp->req.state == QP_STATE_ERROR) {
- while ((skb = skb_dequeue(&qp->resp_pkts))) {
- rxe_drop_ref(qp);
- kfree_skb(skb);
- }
- skb = NULL;
- pkt = NULL;
-
- while ((wqe = queue_head(qp->sq.queue))) {
+ while ((wqe = queue_head(qp->sq.queue))) {
+ if (notify) {
wqe->status = IB_WC_WR_FLUSH_ERR;
do_complete(qp, wqe);
+ } else {
+ advance_consumer(qp->sq.queue);
}
-
- goto exit;
}
+}
- if (qp->req.state == QP_STATE_RESET) {
- while ((skb = skb_dequeue(&qp->resp_pkts))) {
- rxe_drop_ref(qp);
- kfree_skb(skb);
- }
- skb = NULL;
- pkt = NULL;
+int rxe_completer(void *arg)
+{
+ struct rxe_qp *qp = (struct rxe_qp *)arg;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ struct rxe_send_wqe *wqe = wqe;
+ struct sk_buff *skb = NULL;
+ struct rxe_pkt_info *pkt = NULL;
+ enum comp_state state;
- while (queue_head(qp->sq.queue))
- advance_consumer(qp->sq.queue);
+ rxe_add_ref(qp);
+ if (!qp->valid || qp->req.state == QP_STATE_ERROR ||
+ qp->req.state == QP_STATE_RESET) {
+ rxe_drain_resp_pkts(qp, qp->valid &&
+ qp->req.state == QP_STATE_ERROR);
goto exit;
}
@@ -639,6 +637,7 @@ int rxe_completer(void *arg)
if (pkt) {
rxe_drop_ref(pkt->qp);
kfree_skb(skb);
+ skb = NULL;
}
goto done;
@@ -662,6 +661,7 @@ int rxe_completer(void *arg)
qp->qp_timeout_jiffies)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
+ WARN_ON_ONCE(skb);
goto exit;
case COMPST_ERROR_RETRY:
@@ -674,8 +674,10 @@ int rxe_completer(void *arg)
*/
/* there is nothing to retry in this case */
- if (!wqe || (wqe->state == wqe_state_posted))
+ if (!wqe || (wqe->state == wqe_state_posted)) {
+ WARN_ON_ONCE(skb);
goto exit;
+ }
if (qp->comp.retry_cnt > 0) {
if (qp->comp.retry_cnt != 7)
@@ -688,8 +690,10 @@ int rxe_completer(void *arg)
if (psn_compare(qp->req.psn,
qp->comp.psn) > 0) {
/* tell the requester to retry the
- * send send queue next time around
+ * send queue next time around
*/
+ rxe_counter_inc(rxe,
+ RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
rxe_run_task(&qp->req.task, 1);
}
@@ -697,11 +701,14 @@ int rxe_completer(void *arg)
if (pkt) {
rxe_drop_ref(pkt->qp);
kfree_skb(skb);
+ skb = NULL;
}
+ WARN_ON_ONCE(skb);
goto exit;
} else {
+ rxe_counter_inc(rxe, RXE_CNT_RETRY_EXCEEDED);
wqe->status = IB_WC_RETRY_EXC_ERR;
state = COMPST_ERROR;
}
@@ -718,22 +725,30 @@ int rxe_completer(void *arg)
mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK));
+ rxe_drop_ref(pkt->qp);
+ kfree_skb(skb);
+ skb = NULL;
goto exit;
} else {
+ rxe_counter_inc(rxe,
+ RXE_CNT_RNR_RETRY_EXCEEDED);
wqe->status = IB_WC_RNR_RETRY_EXC_ERR;
state = COMPST_ERROR;
}
break;
case COMPST_ERROR:
+ WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS);
do_complete(qp, wqe);
rxe_qp_error(qp);
if (pkt) {
rxe_drop_ref(pkt->qp);
kfree_skb(skb);
+ skb = NULL;
}
+ WARN_ON_ONCE(skb);
goto exit;
}
}
@@ -742,6 +757,7 @@ exit:
/* we come here if we are done with processing and want the task to
* exit from the loop calling us
*/
+ WARN_ON_ONCE(skb);
rxe_drop_ref(qp);
return -EAGAIN;
@@ -749,6 +765,7 @@ done:
/* we come here if we have processed a packet we want the task to call
* us again to see if there is anything else to do
*/
+ WARN_ON_ONCE(skb);
rxe_drop_ref(qp);
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index e5e6a5e7dee9..49fe42c23f4d 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -156,9 +156,9 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
return 0;
}
-void rxe_cq_cleanup(void *arg)
+void rxe_cq_cleanup(struct rxe_pool_entry *arg)
{
- struct rxe_cq *cq = arg;
+ struct rxe_cq *cq = container_of(arg, typeof(*cq), pelem);
if (cq->queue)
rxe_queue_cleanup(cq->queue);
diff --git a/drivers/infiniband/sw/rxe/rxe_dma.c b/drivers/infiniband/sw/rxe/rxe_dma.c
deleted file mode 100644
index a0f8af5851ae..000000000000
--- a/drivers/infiniband/sw/rxe/rxe_dma.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
- * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "rxe.h"
-#include "rxe_loc.h"
-
-#define DMA_BAD_ADDER ((u64)0)
-
-static int rxe_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
- return dma_addr == DMA_BAD_ADDER;
-}
-
-static u64 rxe_dma_map_single(struct ib_device *dev,
- void *cpu_addr, size_t size,
- enum dma_data_direction direction)
-{
- WARN_ON(!valid_dma_direction(direction));
- return (uintptr_t)cpu_addr;
-}
-
-static void rxe_dma_unmap_single(struct ib_device *dev,
- u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- WARN_ON(!valid_dma_direction(direction));
-}
-
-static u64 rxe_dma_map_page(struct ib_device *dev,
- struct page *page,
- unsigned long offset,
- size_t size, enum dma_data_direction direction)
-{
- u64 addr;
-
- WARN_ON(!valid_dma_direction(direction));
-
- if (offset + size > PAGE_SIZE) {
- addr = DMA_BAD_ADDER;
- goto done;
- }
-
- addr = (uintptr_t)page_address(page);
- if (addr)
- addr += offset;
-
-done:
- return addr;
-}
-
-static void rxe_dma_unmap_page(struct ib_device *dev,
- u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- WARN_ON(!valid_dma_direction(direction));
-}
-
-static int rxe_map_sg(struct ib_device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction)
-{
- struct scatterlist *sg;
- u64 addr;
- int i;
- int ret = nents;
-
- WARN_ON(!valid_dma_direction(direction));
-
- for_each_sg(sgl, sg, nents, i) {
- addr = (uintptr_t)page_address(sg_page(sg));
- if (!addr) {
- ret = 0;
- break;
- }
- sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
- sg->dma_length = sg->length;
-#endif
- }
-
- return ret;
-}
-
-static void rxe_unmap_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- WARN_ON(!valid_dma_direction(direction));
-}
-
-static int rxe_map_sg_attrs(struct ib_device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction,
- unsigned long attrs)
-{
- return rxe_map_sg(dev, sgl, nents, direction);
-}
-
-static void rxe_unmap_sg_attrs(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- rxe_unmap_sg(dev, sg, nents, direction);
-}
-
-static void rxe_sync_single_for_cpu(struct ib_device *dev,
- u64 addr,
- size_t size, enum dma_data_direction dir)
-{
-}
-
-static void rxe_sync_single_for_device(struct ib_device *dev,
- u64 addr,
- size_t size, enum dma_data_direction dir)
-{
-}
-
-static void *rxe_dma_alloc_coherent(struct ib_device *dev, size_t size,
- u64 *dma_handle, gfp_t flag)
-{
- struct page *p;
- void *addr = NULL;
-
- p = alloc_pages(flag, get_order(size));
- if (p)
- addr = page_address(p);
-
- if (dma_handle)
- *dma_handle = (uintptr_t)addr;
-
- return addr;
-}
-
-static void rxe_dma_free_coherent(struct ib_device *dev, size_t size,
- void *cpu_addr, u64 dma_handle)
-{
- free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops rxe_dma_mapping_ops = {
- .mapping_error = rxe_mapping_error,
- .map_single = rxe_dma_map_single,
- .unmap_single = rxe_dma_unmap_single,
- .map_page = rxe_dma_map_page,
- .unmap_page = rxe_dma_unmap_page,
- .map_sg = rxe_map_sg,
- .unmap_sg = rxe_unmap_sg,
- .map_sg_attrs = rxe_map_sg_attrs,
- .unmap_sg_attrs = rxe_unmap_sg_attrs,
- .sync_single_for_cpu = rxe_sync_single_for_cpu,
- .sync_single_for_device = rxe_sync_single_for_device,
- .alloc_coherent = rxe_dma_alloc_coherent,
- .free_coherent = rxe_dma_free_coherent
-};
diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h
index d57b5e956ceb..6cb18406f5b8 100644
--- a/drivers/infiniband/sw/rxe/rxe_hdr.h
+++ b/drivers/infiniband/sw/rxe/rxe_hdr.h
@@ -53,8 +53,16 @@ struct rxe_pkt_info {
};
/* Macros should be used only for received skb */
-#define SKB_TO_PKT(skb) ((struct rxe_pkt_info *)(skb)->cb)
-#define PKT_TO_SKB(pkt) container_of((void *)(pkt), struct sk_buff, cb)
+static inline struct rxe_pkt_info *SKB_TO_PKT(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(struct rxe_pkt_info) > sizeof(skb->cb));
+ return (void *)skb->cb;
+}
+
+static inline struct sk_buff *PKT_TO_SKB(struct rxe_pkt_info *pkt)
+{
+ return container_of((void *)pkt, struct sk_buff, cb);
+}
/*
* IBA header types and methods
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
new file mode 100644
index 000000000000..7ef90aad7dfd
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_hw_counters.h"
+
+const char * const rxe_counter_name[] = {
+ [RXE_CNT_SENT_PKTS] = "sent_pkts",
+ [RXE_CNT_RCVD_PKTS] = "rcvd_pkts",
+ [RXE_CNT_DUP_REQ] = "duplicate_request",
+ [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_sequence",
+ [RXE_CNT_RCV_RNR] = "rcvd_rnr_err",
+ [RXE_CNT_SND_RNR] = "send_rnr_err",
+ [RXE_CNT_RCV_SEQ_ERR] = "rcvd_seq_err",
+ [RXE_CNT_COMPLETER_SCHED] = "ack_deffered",
+ [RXE_CNT_RETRY_EXCEEDED] = "retry_exceeded_err",
+ [RXE_CNT_RNR_RETRY_EXCEEDED] = "retry_rnr_exceeded_err",
+ [RXE_CNT_COMP_RETRY] = "completer_retry_err",
+ [RXE_CNT_SEND_ERR] = "send_err",
+};
+
+int rxe_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port, int index)
+{
+ struct rxe_dev *dev = to_rdev(ibdev);
+ unsigned int cnt;
+
+ if (!port || !stats)
+ return -EINVAL;
+
+ for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_name); cnt++)
+ stats->value[cnt] = dev->stats_counters[cnt];
+
+ return ARRAY_SIZE(rxe_counter_name);
+}
+
+struct rdma_hw_stats *rxe_ib_alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_name) != RXE_NUM_OF_COUNTERS);
+ /* We support only per port stats */
+ if (!port_num)
+ return NULL;
+
+ return rdma_alloc_hw_stats_struct(rxe_counter_name,
+ ARRAY_SIZE(rxe_counter_name),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
new file mode 100644
index 000000000000..f44df1b76742
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_HW_COUNTERS_H
+#define RXE_HW_COUNTERS_H
+
+/*
+ * when adding counters to enum also add
+ * them to rxe_counter_name[] vector.
+ */
+enum rxe_counters {
+ RXE_CNT_SENT_PKTS,
+ RXE_CNT_RCVD_PKTS,
+ RXE_CNT_DUP_REQ,
+ RXE_CNT_OUT_OF_SEQ_REQ,
+ RXE_CNT_RCV_RNR,
+ RXE_CNT_SND_RNR,
+ RXE_CNT_RCV_SEQ_ERR,
+ RXE_CNT_COMPLETER_SCHED,
+ RXE_CNT_RETRY_EXCEEDED,
+ RXE_CNT_RNR_RETRY_EXCEEDED,
+ RXE_CNT_COMP_RETRY,
+ RXE_CNT_SEND_ERR,
+ RXE_NUM_OF_COUNTERS
+};
+
+struct rdma_hw_stats *rxe_ib_alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num);
+int rxe_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port, int index);
+#endif /* RXE_HW_COUNTERS_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c
index 413b56b23a06..39e0be31aab1 100644
--- a/drivers/infiniband/sw/rxe/rxe_icrc.c
+++ b/drivers/infiniband/sw/rxe/rxe_icrc.c
@@ -87,10 +87,10 @@ u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb)
bth->qpn |= cpu_to_be32(~BTH_QPN_MASK);
length = hdr_size + RXE_BTH_BYTES;
- crc = crc32_le(crc, pshdr, length);
+ crc = rxe_crc32(pkt->rxe, crc, pshdr, length);
/* And finish to compute the CRC on the remainder of the headers. */
- crc = crc32_le(crc, pkt->hdr + RXE_BTH_BYTES,
- rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES);
+ crc = rxe_crc32(pkt->rxe, crc, pkt->hdr + RXE_BTH_BYTES,
+ rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES);
return crc;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index efe4c6a35442..d6299edf9a5b 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -36,17 +36,17 @@
/* rxe_av.c */
-int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr);
+int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr);
int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num,
- struct rxe_av *av, struct ib_ah_attr *attr);
+ struct rxe_av *av, struct rdma_ah_attr *attr);
int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av,
- struct ib_ah_attr *attr);
+ struct rdma_ah_attr *attr);
int rxe_av_fill_ip_info(struct rxe_dev *rxe,
struct rxe_av *av,
- struct ib_ah_attr *attr,
+ struct rdma_ah_attr *attr,
struct ib_gid_attr *sgid_attr,
union ib_gid *sgid);
@@ -64,7 +64,7 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, struct ib_udata *udata);
int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);
-void rxe_cq_cleanup(void *arg);
+void rxe_cq_cleanup(struct rxe_pool_entry *arg);
/* rxe_mcast.c */
int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
@@ -78,7 +78,7 @@ int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
void rxe_drop_all_mcast_groups(struct rxe_qp *qp);
-void rxe_mc_cleanup(void *arg);
+void rxe_mc_cleanup(struct rxe_pool_entry *arg);
/* rxe_mmap.c */
struct rxe_mmap_info {
@@ -137,10 +137,24 @@ int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length);
int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
u64 *page, int num_pages, u64 iova);
-void rxe_mem_cleanup(void *arg);
+void rxe_mem_cleanup(struct rxe_pool_entry *arg);
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
+/* rxe_net.c */
+int rxe_loopback(struct sk_buff *skb);
+int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb);
+struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
+ int paylen, struct rxe_pkt_info *pkt);
+int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb, u32 *crc);
+enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num);
+const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num);
+struct device *rxe_dma_device(struct rxe_dev *rxe);
+int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid);
+int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid);
+
/* rxe_qp.c */
int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init);
@@ -162,7 +176,7 @@ void rxe_qp_error(struct rxe_qp *qp);
void rxe_qp_destroy(struct rxe_qp *qp);
-void rxe_qp_cleanup(void *arg);
+void rxe_qp_cleanup(struct rxe_pool_entry *arg);
static inline int qp_num(struct rxe_qp *qp)
{
@@ -221,10 +235,9 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
struct ib_udata *udata);
-extern struct ib_dma_mapping_ops rxe_dma_mapping_ops;
-
void rxe_release(struct kref *kref);
+void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify);
int rxe_completer(void *arg);
int rxe_requester(void *arg);
int rxe_responder(void *arg);
@@ -256,13 +269,14 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
if (pkt->mask & RXE_LOOPBACK_MASK) {
memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));
- err = rxe->ifc_ops->loopback(skb);
+ err = rxe_loopback(skb);
} else {
- err = rxe->ifc_ops->send(rxe, pkt, skb);
+ err = rxe_send(rxe, pkt, skb);
}
if (err) {
rxe->xmit_errors++;
+ rxe_counter_inc(rxe, RXE_CNT_SEND_ERR);
return err;
}
@@ -272,6 +286,7 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
rxe_run_task(&qp->comp.task, 1);
}
+ rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
goto done;
drop:
diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index fa95544ca7e0..522a7942c56c 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -61,7 +61,7 @@ int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
rxe_add_key(grp, mgid);
- err = rxe->ifc_ops->mcast_add(rxe, mgid);
+ err = rxe_mcast_add(rxe, mgid);
if (err)
goto err2;
@@ -180,11 +180,11 @@ void rxe_drop_all_mcast_groups(struct rxe_qp *qp)
}
}
-void rxe_mc_cleanup(void *arg)
+void rxe_mc_cleanup(struct rxe_pool_entry *arg)
{
- struct rxe_mc_grp *grp = arg;
+ struct rxe_mc_grp *grp = container_of(arg, typeof(*grp), pelem);
struct rxe_dev *rxe = grp->rxe;
rxe_drop_key(grp);
- rxe->ifc_ops->mcast_delete(rxe, &grp->mgid);
+ rxe_mcast_delete(rxe, &grp->mgid);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index c572a4c09359..bd812e00988e 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -156,10 +156,10 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe,
spin_lock_bh(&rxe->mmap_offset_lock);
if (rxe->mmap_offset == 0)
- rxe->mmap_offset = PAGE_SIZE;
+ rxe->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA);
ip->info.offset = rxe->mmap_offset;
- rxe->mmap_offset += size;
+ rxe->mmap_offset += ALIGN(size, SHMLBA);
spin_unlock_bh(&rxe->mmap_offset_lock);
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index d0faca294006..e37cc89987e1 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -59,9 +59,11 @@ int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length)
case RXE_MEM_TYPE_MR:
case RXE_MEM_TYPE_FMR:
- return ((iova < mem->iova) ||
- ((iova + length) > (mem->iova + mem->length))) ?
- -EFAULT : 0;
+ if (iova < mem->iova ||
+ length > mem->length ||
+ iova > mem->iova + mem->length - length)
+ return -EFAULT;
+ return 0;
default:
return -EFAULT;
@@ -89,9 +91,9 @@ static void rxe_mem_init(int access, struct rxe_mem *mem)
mem->map_shift = ilog2(RXE_BUF_PER_MAP);
}
-void rxe_mem_cleanup(void *arg)
+void rxe_mem_cleanup(struct rxe_pool_entry *arg)
{
- struct rxe_mem *mem = arg;
+ struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem);
int i;
if (mem->umem)
@@ -123,7 +125,7 @@ static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf)
goto err2;
}
- WARN_ON(!is_power_of_2(RXE_BUF_PER_MAP));
+ BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
mem->map_shift = ilog2(RXE_BUF_PER_MAP);
mem->map_mask = RXE_BUF_PER_MAP - 1;
@@ -189,10 +191,8 @@ int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
goto err1;
}
- WARN_ON(!is_power_of_2(umem->page_size));
-
- mem->page_shift = ilog2(umem->page_size);
- mem->page_mask = umem->page_size - 1;
+ mem->page_shift = umem->page_shift;
+ mem->page_mask = BIT(umem->page_shift) - 1;
num_buf = 0;
map = mem->map;
@@ -208,7 +208,7 @@ int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
}
buf->addr = (uintptr_t)vaddr;
- buf->size = umem->page_size;
+ buf->size = BIT(umem->page_shift);
num_buf++;
buf++;
@@ -368,14 +368,15 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
((void *)(uintptr_t)iova) : addr;
if (crcp)
- *crcp = crc32_le(*crcp, src, length);
+ *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device),
+ *crcp, src, length);
memcpy(dest, src, length);
return 0;
}
- WARN_ON(!mem->map);
+ WARN_ON_ONCE(!mem->map);
err = mem_check_range(mem, iova, length);
if (err) {
@@ -401,7 +402,8 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
bytes = length;
if (crcp)
- crc = crc32_le(crc, src, bytes);
+ crc = rxe_crc32(to_rdev(mem->pd->ibpd.device),
+ crc, src, bytes);
memcpy(dest, src, bytes);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 4abdeb359fb4..08f3f90d2912 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -84,47 +84,19 @@ struct rxe_dev *get_rxe_by_name(const char *name)
struct rxe_recv_sockets recv_sockets;
-static __be64 rxe_mac_to_eui64(struct net_device *ndev)
-{
- unsigned char *mac_addr = ndev->dev_addr;
- __be64 eui64;
- unsigned char *dst = (unsigned char *)&eui64;
-
- dst[0] = mac_addr[0] ^ 2;
- dst[1] = mac_addr[1];
- dst[2] = mac_addr[2];
- dst[3] = 0xff;
- dst[4] = 0xfe;
- dst[5] = mac_addr[3];
- dst[6] = mac_addr[4];
- dst[7] = mac_addr[5];
-
- return eui64;
-}
-
-static __be64 node_guid(struct rxe_dev *rxe)
-{
- return rxe_mac_to_eui64(rxe->ndev);
-}
-
-static __be64 port_guid(struct rxe_dev *rxe)
-{
- return rxe_mac_to_eui64(rxe->ndev);
-}
-
-static struct device *dma_device(struct rxe_dev *rxe)
+struct device *rxe_dma_device(struct rxe_dev *rxe)
{
struct net_device *ndev;
ndev = rxe->ndev;
- if (ndev->priv_flags & IFF_802_1Q_VLAN)
+ if (is_vlan_dev(ndev))
ndev = vlan_dev_real_dev(ndev);
return ndev->dev.parent;
}
-static int mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
+int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
{
int err;
unsigned char ll_addr[ETH_ALEN];
@@ -135,7 +107,7 @@ static int mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
return err;
}
-static int mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
+int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
{
int err;
unsigned char ll_addr[ETH_ALEN];
@@ -210,6 +182,39 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
#endif
+static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
+ struct rxe_qp *qp,
+ struct rxe_av *av)
+{
+ struct dst_entry *dst = NULL;
+
+ if (qp_type(qp) == IB_QPT_RC)
+ dst = sk_dst_get(qp->sk->sk);
+
+ if (!dst || !(dst->obsolete && dst->ops->check(dst, 0))) {
+ if (dst)
+ dst_release(dst);
+
+ if (av->network_type == RDMA_NETWORK_IPV4) {
+ struct in_addr *saddr;
+ struct in_addr *daddr;
+
+ saddr = &av->sgid_addr._sockaddr_in.sin_addr;
+ daddr = &av->dgid_addr._sockaddr_in.sin_addr;
+ dst = rxe_find_route4(rxe->ndev, saddr, daddr);
+ } else if (av->network_type == RDMA_NETWORK_IPV6) {
+ struct in6_addr *saddr6;
+ struct in6_addr *daddr6;
+
+ saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr;
+ daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr;
+ dst = rxe_find_route6(rxe->ndev, saddr6, daddr6);
+ }
+ }
+
+ return dst;
+}
+
static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct udphdr *udph;
@@ -243,8 +248,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
{
int err;
struct socket *sock;
- struct udp_port_cfg udp_cfg = {0};
- struct udp_tunnel_sock_cfg tnl_cfg = {0};
+ struct udp_port_cfg udp_cfg = { };
+ struct udp_tunnel_sock_cfg tnl_cfg = { };
if (ipv6) {
udp_cfg.family = AF_INET6;
@@ -301,7 +306,7 @@ static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb,
skb_scrub_packet(skb, xnet);
skb_clear_hash(skb);
- skb_dst_set(skb, dst);
+ skb_dst_set(skb, dst_clone(dst));
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
skb_push(skb, sizeof(struct iphdr));
@@ -349,13 +354,14 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb, struct rxe_av *av)
{
+ struct rxe_qp *qp = pkt->qp;
struct dst_entry *dst;
bool xnet = false;
__be16 df = htons(IP_DF);
struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;
struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;
- dst = rxe_find_route4(rxe->ndev, saddr, daddr);
+ dst = rxe_find_route(rxe, qp, av);
if (!dst) {
pr_err("Host not reachable\n");
return -EHOSTUNREACH;
@@ -369,17 +375,24 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,
av->grh.traffic_class, av->grh.hop_limit, df, xnet);
+
+ if (qp_type(qp) == IB_QPT_RC)
+ sk_dst_set(qp->sk->sk, dst);
+ else
+ dst_release(dst);
+
return 0;
}
static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb, struct rxe_av *av)
{
- struct dst_entry *dst;
+ struct rxe_qp *qp = pkt->qp;
+ struct dst_entry *dst = NULL;
struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
- dst = rxe_find_route6(rxe->ndev, saddr, daddr);
+ dst = rxe_find_route(rxe, qp, av);
if (!dst) {
pr_err("Host not reachable\n");
return -EHOSTUNREACH;
@@ -394,11 +407,17 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP,
av->grh.traffic_class,
av->grh.hop_limit);
+
+ if (qp_type(qp) == IB_QPT_RC)
+ sk_dst_set(qp->sk->sk, dst);
+ else
+ dst_release(dst);
+
return 0;
}
-static int prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb, u32 *crc)
+int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb, u32 *crc)
{
int err = 0;
struct rxe_av *av = rxe_get_av(pkt);
@@ -422,10 +441,11 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb)
if (unlikely(qp->need_req_skb &&
skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
rxe_run_task(&qp->req.task, 1);
+
+ rxe_drop_ref(qp);
}
-static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb)
+int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb)
{
struct sk_buff *nskb;
struct rxe_av *av;
@@ -455,13 +475,14 @@ static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return -EAGAIN;
}
+ rxe_add_ref(pkt->qp);
atomic_inc(&pkt->qp->skb_out);
kfree_skb(skb);
return 0;
}
-static int loopback(struct sk_buff *skb)
+int rxe_loopback(struct sk_buff *skb)
{
return rxe_rcv(skb);
}
@@ -471,8 +492,8 @@ static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)
return rxe->port.port_guid == av->grh.dgid.global.interface_id;
}
-static struct sk_buff *init_packet(struct rxe_dev *rxe, struct rxe_av *av,
- int paylen, struct rxe_pkt_info *pkt)
+struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
+ int paylen, struct rxe_pkt_info *pkt)
{
unsigned int hdr_len;
struct sk_buff *skb;
@@ -511,31 +532,16 @@ static struct sk_buff *init_packet(struct rxe_dev *rxe, struct rxe_av *av,
* this is required by rxe_cfg to match rxe devices in
* /sys/class/infiniband up with their underlying ethernet devices
*/
-static char *parent_name(struct rxe_dev *rxe, unsigned int port_num)
+const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num)
{
return rxe->ndev->name;
}
-static enum rdma_link_layer link_layer(struct rxe_dev *rxe,
- unsigned int port_num)
+enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num)
{
return IB_LINK_LAYER_ETHERNET;
}
-static struct rxe_ifc_ops ifc_ops = {
- .node_guid = node_guid,
- .port_guid = port_guid,
- .dma_device = dma_device,
- .mcast_add = mcast_add,
- .mcast_delete = mcast_delete,
- .prepare = prepare,
- .send = send,
- .loopback = loopback,
- .init_packet = init_packet,
- .parent_name = parent_name,
- .link_layer = link_layer,
-};
-
struct rxe_dev *rxe_net_add(struct net_device *ndev)
{
int err;
@@ -545,7 +551,6 @@ struct rxe_dev *rxe_net_add(struct net_device *ndev)
if (!rxe)
return NULL;
- rxe->ifc_ops = &ifc_ops;
rxe->ndev = ndev;
err = rxe_add(rxe, ndev->mtu);
@@ -658,7 +663,7 @@ struct notifier_block rxe_net_notifier = {
.notifier_call = rxe_notify,
};
-int rxe_net_ipv4_init(void)
+static int rxe_net_ipv4_init(void)
{
recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
htons(ROCE_V2_UDP_DPORT), false);
@@ -671,7 +676,7 @@ int rxe_net_ipv4_init(void)
return 0;
}
-int rxe_net_ipv6_init(void)
+static int rxe_net_ipv6_init(void)
{
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 13ed2cc6eaa2..1b596fbbe251 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -114,7 +114,6 @@ enum rxe_device_param {
RXE_MAX_UCONTEXT = 512,
RXE_NUM_PORT = 1,
- RXE_NUM_COMP_VECTORS = 1,
RXE_MIN_QP_INDEX = 16,
RXE_MAX_QP_INDEX = 0x00020000,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index d723947a8542..75d11ee635ec 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -102,7 +102,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
},
};
-static inline char *pool_name(struct rxe_pool *pool)
+static inline const char *pool_name(struct rxe_pool *pool)
{
return rxe_type_info[pool->type].name;
}
@@ -112,13 +112,6 @@ static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)
return rxe_type_info[pool->type].cache;
}
-static inline enum rxe_elem_type rxe_type(void *arg)
-{
- struct rxe_pool_entry *elem = arg;
-
- return elem->pool->type;
-}
-
int rxe_cache_init(void)
{
int err;
@@ -273,6 +266,7 @@ static u32 alloc_index(struct rxe_pool *pool)
if (index >= range)
index = find_first_zero_bit(pool->table, range);
+ WARN_ON_ONCE(index >= range);
set_bit(index, pool->table);
pool->last = index;
return index + pool->min_index;
@@ -461,7 +455,7 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
out:
spin_unlock_irqrestore(&pool->pool_lock, flags);
- return node ? (void *)elem : NULL;
+ return node ? elem : NULL;
}
void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
@@ -497,5 +491,5 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
out:
spin_unlock_irqrestore(&pool->pool_lock, flags);
- return node ? ((void *)elem) : NULL;
+ return node ? elem : NULL;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 4d04830adcae..47df28e43acf 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -57,10 +57,12 @@ enum rxe_elem_type {
RXE_NUM_TYPES, /* keep me last */
};
+struct rxe_pool_entry;
+
struct rxe_type_info {
- char *name;
+ const char *name;
size_t size;
- void (*cleanup)(void *obj);
+ void (*cleanup)(struct rxe_pool_entry *obj);
enum rxe_pool_flags flags;
u32 max_index;
u32 min_index;
@@ -91,7 +93,7 @@ struct rxe_pool {
spinlock_t pool_lock; /* pool spinlock */
size_t elem_size;
struct kref ref_cnt;
- void (*cleanup)(void *obj);
+ void (*cleanup)(struct rxe_pool_entry *obj);
enum rxe_pool_state state;
enum rxe_pool_flags flags;
enum rxe_elem_type type;
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 44b2108253bd..80ccc7c7c341 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -273,15 +273,11 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
rxe_init_task(rxe, &qp->comp.task, qp,
rxe_completer, "comp");
- init_timer(&qp->rnr_nak_timer);
- qp->rnr_nak_timer.function = rnr_nak_timer;
- qp->rnr_nak_timer.data = (unsigned long)qp;
-
- init_timer(&qp->retrans_timer);
- qp->retrans_timer.function = retransmit_timer;
- qp->retrans_timer.data = (unsigned long)qp;
qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
-
+ if (init->qp_type == IB_QPT_RC) {
+ setup_timer(&qp->rnr_nak_timer, rnr_nak_timer, (unsigned long)qp);
+ setup_timer(&qp->retrans_timer, retransmit_timer, (unsigned long)qp);
+ }
return 0;
}
@@ -635,8 +631,8 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
if (mask & IB_QP_AV) {
ib_get_cached_gid(&rxe->ib_dev, 1,
- attr->ah_attr.grh.sgid_index, &sgid,
- &sgid_attr);
+ rdma_ah_read_grh(&attr->ah_attr)->sgid_index,
+ &sgid, &sgid_attr);
rxe_av_from_attr(rxe, attr->port_num, &qp->pri_av,
&attr->ah_attr);
rxe_av_fill_ip_info(rxe, &qp->pri_av, &attr->ah_attr,
@@ -646,9 +642,11 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
}
if (mask & IB_QP_ALT_PATH) {
- ib_get_cached_gid(&rxe->ib_dev, 1,
- attr->alt_ah_attr.grh.sgid_index, &sgid,
- &sgid_attr);
+ u8 sgid_index =
+ rdma_ah_read_grh(&attr->alt_ah_attr)->sgid_index;
+
+ ib_get_cached_gid(&rxe->ib_dev, 1, sgid_index,
+ &sgid, &sgid_attr);
rxe_av_from_attr(rxe, attr->alt_port_num, &qp->alt_av,
&attr->alt_ah_attr);
@@ -809,8 +807,10 @@ void rxe_qp_destroy(struct rxe_qp *qp)
qp->qp_timeout_jiffies = 0;
rxe_cleanup_task(&qp->resp.task);
- del_timer_sync(&qp->retrans_timer);
- del_timer_sync(&qp->rnr_nak_timer);
+ if (qp_type(qp) == IB_QPT_RC) {
+ del_timer_sync(&qp->retrans_timer);
+ del_timer_sync(&qp->rnr_nak_timer);
+ }
rxe_cleanup_task(&qp->req.task);
rxe_cleanup_task(&qp->comp.task);
@@ -824,9 +824,9 @@ void rxe_qp_destroy(struct rxe_qp *qp)
}
/* called when the last reference to the qp is dropped */
-void rxe_qp_cleanup(void *arg)
+void rxe_qp_cleanup(struct rxe_pool_entry *arg)
{
- struct rxe_qp *qp = arg;
+ struct rxe_qp *qp = container_of(arg, typeof(*qp), pelem);
rxe_drop_all_mcast_groups(qp);
@@ -851,6 +851,14 @@ void rxe_qp_cleanup(void *arg)
qp->resp.mr = NULL;
}
+ if (qp_type(qp) == IB_QPT_RC) {
+ struct dst_entry *dst = NULL;
+
+ dst = sk_dst_get(qp->sk->sk);
+ if (dst)
+ dst_release(dst);
+ }
+
free_rd_atomic_resources(qp);
kernel_sock_shutdown(qp->sk, SHUT_RDWR);
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 252b4d637d45..fb8c83e055e1 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -387,9 +387,9 @@ int rxe_rcv(struct sk_buff *skb)
pack_icrc = be32_to_cpu(*icrcp);
calc_icrc = rxe_icrc_hdr(pkt, skb);
- calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt),
- payload_size(pkt));
- calc_icrc = cpu_to_be32(~calc_icrc);
+ calc_icrc = rxe_crc32(rxe, calc_icrc, (u8 *)payload_addr(pkt),
+ payload_size(pkt));
+ calc_icrc = (__force u32)cpu_to_be32(~calc_icrc);
if (unlikely(calc_icrc != pack_icrc)) {
if (skb->protocol == htons(ETH_P_IPV6))
pr_warn_ratelimited("bad ICRC from %pI6c\n",
@@ -403,6 +403,8 @@ int rxe_rcv(struct sk_buff *skb)
goto drop;
}
+ rxe_counter_inc(rxe, RXE_CNT_RCVD_PKTS);
+
if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN))
rxe_rcv_mcast_pkt(rxe, skb);
else
@@ -417,4 +419,3 @@ drop:
kfree_skb(skb);
return 0;
}
-EXPORT_SYMBOL(rxe_rcv);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 73d4a97603a1..7ee465d1a1e1 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -32,6 +32,7 @@
*/
#include <linux/skbuff.h>
+#include <crypto/hash.h>
#include "rxe.h"
#include "rxe_loc.h"
@@ -361,19 +362,14 @@ static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
return -EAGAIN;
}
-static inline int get_mtu(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+static inline int get_mtu(struct rxe_qp *qp)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
- struct rxe_port *port;
- struct rxe_av *av;
if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC))
return qp->mtu;
- av = &wqe->av;
- port = &rxe->port;
-
- return port->mtu_cap;
+ return rxe->port.mtu_cap;
}
static struct sk_buff *init_req_packet(struct rxe_qp *qp,
@@ -409,7 +405,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
/* init skb */
av = rxe_get_av(pkt);
- skb = rxe->ifc_ops->init_packet(rxe, av, paylen, pkt);
+ skb = rxe_init_packet(rxe, av, paylen, pkt);
if (unlikely(!skb))
return NULL;
@@ -480,7 +476,7 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
u32 *p;
int err;
- err = rxe->ifc_ops->prepare(rxe, pkt, skb, &crc);
+ err = rxe_prepare(rxe, pkt, skb, &crc);
if (err)
return err;
@@ -488,8 +484,7 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (wqe->wr.send_flags & IB_SEND_INLINE) {
u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
- crc = crc32_le(crc, tmp, paylen);
-
+ crc = rxe_crc32(rxe, crc, tmp, paylen);
memcpy(payload_addr(pkt), tmp, paylen);
wqe->dma.resid -= paylen;
@@ -599,9 +594,14 @@ int rxe_requester(void *arg)
rxe_add_ref(qp);
next_wqe:
- if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
+ if (unlikely(!qp->valid))
goto exit;
+ if (unlikely(qp->req.state == QP_STATE_ERROR)) {
+ rxe_drain_req_pkts(qp, true);
+ goto exit;
+ }
+
if (unlikely(qp->req.state == QP_STATE_RESET)) {
qp->req.wqe_index = consumer_index(qp->sq.queue);
qp->req.opcode = -1;
@@ -635,6 +635,7 @@ next_wqe:
goto exit;
}
rmr->state = RXE_MEM_STATE_FREE;
+ rxe_drop_ref(rmr);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
} else if (wqe->wr.opcode == IB_WR_REG_MR) {
@@ -679,7 +680,7 @@ next_wqe:
goto exit;
}
- mtu = get_mtu(qp, wqe);
+ mtu = get_mtu(qp);
payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0;
if (payload > mtu) {
if (qp_type(qp) == IB_QPT_UD) {
@@ -728,11 +729,11 @@ next_wqe:
ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
if (ret) {
qp->need_req_skb = 1;
- kfree_skb(skb);
rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
if (ret == -EAGAIN) {
+ kfree_skb(skb);
rxe_run_task(&qp->req.task, 1);
goto exit;
}
@@ -748,17 +749,8 @@ err:
kfree_skb(skb);
wqe->status = IB_WC_LOC_PROT_ERR;
wqe->state = wqe_state_error;
-
- /*
- * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS
- * ---------8<---------8<-------------
- * ...Note that if a completion error occurs, a Work Completion
- * will always be generated, even if the signaling
- * indicator requests an Unsignaled Completion.
- * ---------8<---------8<-------------
- */
- wqe->wr.send_flags |= IB_SEND_SIGNALED;
__rxe_do_task(&qp->comp.task);
+
exit:
rxe_drop_ref(qp);
return -EAGAIN;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 3435efff8799..a958ee918a49 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -149,6 +149,7 @@ static enum resp_states check_psn(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
int diff = psn_compare(pkt->psn, qp->resp.psn);
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
switch (qp_type(qp)) {
case IB_QPT_RC:
@@ -157,9 +158,11 @@ static enum resp_states check_psn(struct rxe_qp *qp,
return RESPST_CLEANUP;
qp->resp.sent_psn_nak = 1;
+ rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ);
return RESPST_ERR_PSN_OUT_OF_SEQ;
} else if (diff < 0) {
+ rxe_counter_inc(rxe, RXE_CNT_DUP_REQ);
return RESPST_DUPLICATE_REQUEST;
}
@@ -307,7 +310,7 @@ static enum resp_states check_op_valid(struct rxe_qp *qp,
break;
default:
- WARN_ON(1);
+ WARN_ON_ONCE(1);
break;
}
@@ -418,7 +421,7 @@ static enum resp_states check_length(struct rxe_qp *qp,
static enum resp_states check_rkey(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
- struct rxe_mem *mem;
+ struct rxe_mem *mem = NULL;
u64 va;
u32 rkey;
u32 resid;
@@ -459,50 +462,48 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
mem = lookup_mem(qp->pd, access, rkey, lookup_remote);
if (!mem) {
state = RESPST_ERR_RKEY_VIOLATION;
- goto err1;
+ goto err;
}
if (unlikely(mem->state == RXE_MEM_STATE_FREE)) {
state = RESPST_ERR_RKEY_VIOLATION;
- goto err1;
+ goto err;
}
if (mem_check_range(mem, va, resid)) {
state = RESPST_ERR_RKEY_VIOLATION;
- goto err2;
+ goto err;
}
if (pkt->mask & RXE_WRITE_MASK) {
if (resid > mtu) {
if (pktlen != mtu || bth_pad(pkt)) {
state = RESPST_ERR_LENGTH;
- goto err2;
+ goto err;
}
-
- resid = mtu;
} else {
if (pktlen != resid) {
state = RESPST_ERR_LENGTH;
- goto err2;
+ goto err;
}
if ((bth_pad(pkt) != (0x3 & (-resid)))) {
/* This case may not be exactly that
* but nothing else fits.
*/
state = RESPST_ERR_LENGTH;
- goto err2;
+ goto err;
}
}
}
- WARN_ON(qp->resp.mr);
+ WARN_ON_ONCE(qp->resp.mr);
qp->resp.mr = mem;
return RESPST_EXECUTE;
-err2:
- rxe_drop_ref(mem);
-err1:
+err:
+ if (mem)
+ rxe_drop_ref(mem);
return state;
}
@@ -608,7 +609,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
pad = (-payload) & 0x3;
paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
- skb = rxe->ifc_ops->init_packet(rxe, &qp->pri_av, paylen, ack);
+ skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack);
if (!skb)
return NULL;
@@ -637,7 +638,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
if (ack->mask & RXE_ATMACK_MASK)
atmack_set_orig(ack, qp->resp.atomic_orig);
- err = rxe->ifc_ops->prepare(rxe, ack, skb, &crc);
+ err = rxe_prepare(rxe, ack, skb, &crc);
if (err) {
kfree_skb(skb);
return NULL;
@@ -808,12 +809,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
err = process_atomic(qp, pkt);
if (err)
return err;
- } else
+ } else {
/* Unreachable */
- WARN_ON(1);
-
- /* We successfully processed this new request. */
- qp->resp.msn++;
+ WARN_ON_ONCE(1);
+ }
/* next expected psn, read handles this separately */
qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
@@ -821,9 +820,11 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
qp->resp.opcode = pkt->opcode;
qp->resp.status = IB_WC_SUCCESS;
- if (pkt->mask & RXE_COMP_MASK)
+ if (pkt->mask & RXE_COMP_MASK) {
+ /* We successfully processed this new request. */
+ qp->resp.msn++;
return RESPST_COMPLETE;
- else if (qp_type(qp) == IB_QPT_RC)
+ } else if (qp_type(qp) == IB_QPT_RC)
return RESPST_ACKNOWLEDGE;
else
return RESPST_CLEANUP;
@@ -906,6 +907,7 @@ static enum resp_states do_complete(struct rxe_qp *qp,
return RESPST_ERROR;
}
rmr->state = RXE_MEM_STATE_FREE;
+ rxe_drop_ref(rmr);
}
wc->qp = &qp->ibqp;
@@ -993,7 +995,9 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
free_rd_atomic_resource(qp, res);
rxe_advance_resp_resource(qp);
- memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb));
+ memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(ack_pkt));
+ memset((unsigned char *)SKB_TO_PKT(skb) + sizeof(ack_pkt), 0,
+ sizeof(skb->cb) - sizeof(ack_pkt));
res->type = RXE_ATOMIC_MASK;
res->atomic.skb = skb;
@@ -1206,9 +1210,26 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
}
}
+void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
+{
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(&qp->req_pkts))) {
+ rxe_drop_ref(qp);
+ kfree_skb(skb);
+ }
+
+ if (notify)
+ return;
+
+ while (!qp->srq && qp->rq.queue && queue_head(qp->rq.queue))
+ advance_consumer(qp->rq.queue);
+}
+
int rxe_responder(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
enum resp_states state;
struct rxe_pkt_info *pkt = NULL;
int ret = 0;
@@ -1297,6 +1318,7 @@ int rxe_responder(void *arg)
break;
case RESPST_ERR_RNR:
if (qp_type(qp) == IB_QPT_RC) {
+ rxe_counter_inc(rxe, RXE_CNT_SND_RNR);
/* RC - class B */
send_ack(qp, pkt, AETH_RNR_NAK |
(~AETH_TYPE_MASK &
@@ -1373,21 +1395,10 @@ int rxe_responder(void *arg)
goto exit;
- case RESPST_RESET: {
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&qp->req_pkts))) {
- rxe_drop_ref(qp);
- kfree_skb(skb);
- }
-
- while (!qp->srq && qp->rq.queue &&
- queue_head(qp->rq.queue))
- advance_consumer(qp->rq.queue);
-
+ case RESPST_RESET:
+ rxe_drain_req_pkts(qp, false);
qp->resp.wqe = NULL;
goto exit;
- }
case RESPST_ERROR:
qp->resp.goto_error = 0;
@@ -1396,7 +1407,7 @@ int rxe_responder(void *arg)
goto exit;
default:
- WARN_ON(1);
+ WARN_ON_ONCE(1);
}
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index beb7021ff18a..af90a7d42b96 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -31,9 +31,12 @@
* SOFTWARE.
*/
+#include <linux/dma-mapping.h>
+#include <net/addrconf.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
+#include "rxe_hw_counters.h"
static int rxe_query_device(struct ib_device *dev,
struct ib_device_attr *attr,
@@ -86,6 +89,7 @@ static int rxe_query_port(struct ib_device *dev,
port = &rxe->port;
+ /* *attr being zeroed by the caller, avoid zeroing it here */
*attr = port->attr;
mutex_lock(&rxe->usdev_lock);
@@ -168,7 +172,7 @@ static int rxe_query_pkey(struct ib_device *device,
struct rxe_port *port;
if (unlikely(port_num != 1)) {
- dev_warn(device->dma_device, "invalid port_num = %d\n",
+ dev_warn(device->dev.parent, "invalid port_num = %d\n",
port_num);
goto err1;
}
@@ -176,7 +180,7 @@ static int rxe_query_pkey(struct ib_device *device,
port = &rxe->port;
if (unlikely(index >= port->attr.pkey_tbl_len)) {
- dev_warn(device->dma_device, "invalid index = %d\n",
+ dev_warn(device->dev.parent, "invalid index = %d\n",
index);
goto err1;
}
@@ -234,7 +238,7 @@ static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
{
struct rxe_dev *rxe = to_rdev(dev);
- return rxe->ifc_ops->link_layer(rxe, port_num);
+ return rxe_link_layer(rxe, port_num);
}
static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev,
@@ -261,13 +265,14 @@ static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
int err;
struct ib_port_attr attr;
- err = rxe_query_port(dev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ err = ib_query_port(dev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
@@ -292,22 +297,22 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd)
return 0;
}
-static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr,
+static int rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
struct rxe_av *av)
{
int err;
union ib_gid sgid;
struct ib_gid_attr sgid_attr;
- err = ib_get_cached_gid(&rxe->ib_dev, attr->port_num,
- attr->grh.sgid_index, &sgid,
+ err = ib_get_cached_gid(&rxe->ib_dev, rdma_ah_get_port_num(attr),
+ rdma_ah_read_grh(attr)->sgid_index, &sgid,
&sgid_attr);
if (err) {
pr_err("Failed to query sgid. err = %d\n", err);
return err;
}
- err = rxe_av_from_attr(rxe, attr->port_num, av, attr);
+ err = rxe_av_from_attr(rxe, rdma_ah_get_port_num(attr), av, attr);
if (!err)
err = rxe_av_fill_ip_info(rxe, av, attr, &sgid_attr, &sgid);
@@ -316,7 +321,8 @@ static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr,
return err;
}
-static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
+ struct rdma_ah_attr *attr,
struct ib_udata *udata)
{
@@ -351,7 +357,7 @@ err1:
return ERR_PTR(err);
}
-static int rxe_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
+static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
int err;
struct rxe_dev *rxe = to_rdev(ibah->device);
@@ -368,11 +374,13 @@ static int rxe_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
return 0;
}
-static int rxe_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
+static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
struct rxe_dev *rxe = to_rdev(ibah->device);
struct rxe_ah *ah = to_rah(ibah);
+ memset(attr, 0, sizeof(*attr));
+ attr->type = ibah->type;
rxe_av_to_attr(rxe, &ah->av, attr);
return 0;
}
@@ -732,13 +740,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
sge = ibwr->sg_list;
for (i = 0; i < num_sge; i++, sge++) {
- if (qp->is_user && copy_from_user(p, (__user void *)
- (uintptr_t)sge->addr, sge->length))
- return -EFAULT;
-
- else if (!qp->is_user)
- memcpy(p, (void *)(uintptr_t)sge->addr,
- sge->length);
+ memcpy(p, (void *)(uintptr_t)sge->addr,
+ sge->length);
p += sge->length;
}
@@ -911,6 +914,9 @@ static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_unlock_irqrestore(&rq->producer_lock, flags);
+ if (qp->resp.state == QP_STATE_ERROR)
+ rxe_run_task(&qp->resp.task, 1);
+
err1:
return err;
}
@@ -1209,10 +1215,8 @@ static ssize_t rxe_show_parent(struct device *device,
{
struct rxe_dev *rxe = container_of(device, struct rxe_dev,
ib_dev.dev);
- char *name;
- name = rxe->ifc_ops->parent_name(rxe, 1);
- return snprintf(buf, 16, "%s\n", name);
+ return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1));
}
static DEVICE_ATTR(parent, S_IRUGO, rxe_show_parent, NULL);
@@ -1233,11 +1237,14 @@ int rxe_register_device(struct rxe_dev *rxe)
dev->owner = THIS_MODULE;
dev->node_type = RDMA_NODE_IB_CA;
dev->phys_port_cnt = 1;
- dev->num_comp_vectors = RXE_NUM_COMP_VECTORS;
- dev->dma_device = rxe->ifc_ops->dma_device(rxe);
+ dev->num_comp_vectors = num_possible_cpus();
+ dev->dev.parent = rxe_dma_device(rxe);
dev->local_dma_lkey = 0;
- dev->node_guid = rxe->ifc_ops->node_guid(rxe);
- dev->dma_ops = &rxe_dma_mapping_ops;
+ addrconf_addr_eui48((unsigned char *)&dev->node_guid,
+ rxe->ndev->dev_addr);
+ dev->dev.dma_ops = &dma_virt_ops;
+ dma_coerce_mask_and_coherent(&dev->dev,
+ dma_get_required_mask(dev->dev.parent));
dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
@@ -1317,6 +1324,15 @@ int rxe_register_device(struct rxe_dev *rxe)
dev->map_mr_sg = rxe_map_mr_sg;
dev->attach_mcast = rxe_attach_mcast;
dev->detach_mcast = rxe_detach_mcast;
+ dev->get_hw_stats = rxe_ib_get_hw_stats;
+ dev->alloc_hw_stats = rxe_ib_alloc_hw_stats;
+
+ rxe->tfm = crypto_alloc_shash("crc32", 0, 0);
+ if (IS_ERR(rxe->tfm)) {
+ pr_err("failed to allocate crc algorithm err:%ld\n",
+ PTR_ERR(rxe->tfm));
+ return PTR_ERR(rxe->tfm);
+ }
err = ib_register_device(dev, NULL);
if (err) {
@@ -1338,6 +1354,8 @@ int rxe_register_device(struct rxe_dev *rxe)
err2:
ib_unregister_device(dev);
err1:
+ crypto_free_shash(rxe->tfm);
+
return err;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index cac1d52a08f0..5a180fbe40d9 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -38,6 +38,7 @@
#include <rdma/rdma_user_rxe.h>
#include "rxe_pool.h"
#include "rxe_task.h"
+#include "rxe_hw_counters.h"
static inline int pkey_match(u16 key1, u16 key2)
{
@@ -372,26 +373,6 @@ struct rxe_port {
u32 qp_gsi_index;
};
-/* callbacks from rdma_rxe to network interface layer */
-struct rxe_ifc_ops {
- void (*release)(struct rxe_dev *rxe);
- __be64 (*node_guid)(struct rxe_dev *rxe);
- __be64 (*port_guid)(struct rxe_dev *rxe);
- struct device *(*dma_device)(struct rxe_dev *rxe);
- int (*mcast_add)(struct rxe_dev *rxe, union ib_gid *mgid);
- int (*mcast_delete)(struct rxe_dev *rxe, union ib_gid *mgid);
- int (*prepare)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb, u32 *crc);
- int (*send)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb);
- int (*loopback)(struct sk_buff *skb);
- struct sk_buff *(*init_packet)(struct rxe_dev *rxe, struct rxe_av *av,
- int paylen, struct rxe_pkt_info *pkt);
- char *(*parent_name)(struct rxe_dev *rxe, unsigned int port_num);
- enum rdma_link_layer (*link_layer)(struct rxe_dev *rxe,
- unsigned int port_num);
-};
-
struct rxe_dev {
struct ib_device ib_dev;
struct ib_device_attr attr;
@@ -400,8 +381,6 @@ struct rxe_dev {
struct kref ref_cnt;
struct mutex usdev_lock;
- struct rxe_ifc_ops *ifc_ops;
-
struct net_device *ndev;
int xmit_errors;
@@ -423,10 +402,18 @@ struct rxe_dev {
spinlock_t mmap_offset_lock; /* guard mmap_offset */
int mmap_offset;
+ u64 stats_counters[RXE_NUM_OF_COUNTERS];
+
struct rxe_port port;
struct list_head list;
+ struct crypto_shash *tfm;
};
+static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters cnt)
+{
+ rxe->stats_counters[cnt]++;
+}
+
static inline struct rxe_dev *to_rdev(struct ib_device *dev)
{
return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL;
@@ -475,6 +462,6 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
int rxe_register_device(struct rxe_dev *rxe);
int rxe_unregister_device(struct rxe_dev *rxe);
-void rxe_mc_cleanup(void *arg);
+void rxe_mc_cleanup(struct rxe_pool_entry *arg);
#endif /* RXE_VERBS_H */
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile
index f3c7dcf03098..c28af1823a2d 100644
--- a/drivers/infiniband/ulp/Makefile
+++ b/drivers/infiniband/ulp/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_INFINIBAND_SRP) += srp/
obj-$(CONFIG_INFINIBAND_SRPT) += srpt/
obj-$(CONFIG_INFINIBAND_ISER) += iser/
obj-$(CONFIG_INFINIBAND_ISERT) += isert/
+obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic/
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index da12717a3eb7..ff50a7bd66d8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -52,7 +52,6 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_sa.h>
#include <linux/sched.h>
-
/* constants */
enum ipoib_flush_level {
@@ -153,6 +152,13 @@ static inline void skb_add_pseudo_hdr(struct sk_buff *skb)
skb_pull(skb, IPOIB_HARD_LEN);
}
+static inline struct ipoib_dev_priv *ipoib_priv(const struct net_device *dev)
+{
+ struct rdma_netdev *rn = netdev_priv(dev);
+
+ return rn->clnt_priv;
+}
+
/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
struct ipoib_mcast {
struct ib_sa_mcmember_rec mcmember;
@@ -404,6 +410,7 @@ struct ipoib_dev_priv {
struct timer_list poll_timer;
unsigned max_send_sge;
bool sm_fullmember_sendonly_support;
+ const struct net_device_ops *rn_ops;
};
struct ipoib_ah {
@@ -416,7 +423,7 @@ struct ipoib_ah {
struct ipoib_path {
struct net_device *dev;
- struct ib_sa_path_rec pathrec;
+ struct sa_path_rec pathrec;
struct ipoib_ah *ah;
struct sk_buff_head queue;
@@ -472,7 +479,7 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
- struct ib_pd *pd, struct ib_ah_attr *attr);
+ struct ib_pd *pd, struct rdma_ah_attr *attr);
void ipoib_free_ah(struct kref *kref);
static inline void ipoib_put_ah(struct ipoib_ah *ah)
{
@@ -482,27 +489,28 @@ int ipoib_open(struct net_device *dev);
int ipoib_add_pkey_attr(struct net_device *dev);
int ipoib_add_umcast_attr(struct net_device *dev);
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
- struct ipoib_ah *address, u32 qpn);
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
+ struct ib_ah *address, u32 dqpn);
void ipoib_reap_ah(struct work_struct *work);
struct ipoib_path *__path_find(struct net_device *dev, void *gid);
void ipoib_mark_paths_invalid(struct net_device *dev);
void ipoib_flush_paths(struct net_device *dev);
-int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
-struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
-
-int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
+struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
+ const char *format);
+void ipoib_ib_tx_timer_func(unsigned long ctx);
void ipoib_ib_dev_flush_light(struct work_struct *work);
void ipoib_ib_dev_flush_normal(struct work_struct *work);
void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
+int ipoib_ib_dev_open_default(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev);
-int ipoib_ib_dev_up(struct net_device *dev);
-int ipoib_ib_dev_down(struct net_device *dev);
int ipoib_ib_dev_stop(struct net_device *dev);
+void ipoib_ib_dev_up(struct net_device *dev);
+void ipoib_ib_dev_down(struct net_device *dev);
+int ipoib_ib_dev_stop_default(struct net_device *dev);
void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -513,7 +521,7 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work);
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work);
-int ipoib_mcast_start_thread(struct net_device *dev);
+void ipoib_mcast_start_thread(struct net_device *dev);
int ipoib_mcast_stop_thread(struct net_device *dev);
void ipoib_mcast_dev_down(struct net_device *dev);
@@ -562,8 +570,10 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
struct ipoib_path *path);
#endif
-int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
- union ib_gid *mgid, int set_qkey);
+int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey);
+int ipoib_mcast_detach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid);
void ipoib_mcast_remove_list(struct list_head *remove_list);
void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
struct list_head *remove_list);
@@ -587,13 +597,13 @@ void __exit ipoib_netlink_fini(void);
void ipoib_set_umcast(struct net_device *ndev, int umcast_val);
int ipoib_set_mode(struct net_device *dev, const char *buf);
-void ipoib_setup(struct net_device *dev);
+void ipoib_setup_common(struct net_device *dev);
void ipoib_pkey_open(struct ipoib_dev_priv *priv);
void ipoib_drain_cq(struct net_device *dev);
void ipoib_set_ethtool_ops(struct net_device *dev);
-int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);
+void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);
#define IPOIB_FLAGS_RC 0x80
#define IPOIB_FLAGS_UC 0x40
@@ -607,14 +617,14 @@ extern int ipoib_max_conn_qp;
static inline int ipoib_cm_admin_enabled(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return IPOIB_CM_SUPPORTED(dev->dev_addr) &&
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
}
static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return IPOIB_CM_SUPPORTED(hwaddr) &&
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
}
@@ -637,13 +647,13 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t
static inline int ipoib_cm_has_srq(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return !!priv->cm.srq;
}
static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return priv->cm.max_cm_mtu;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 096c4f6fbd65..f87d104837dc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -38,6 +38,8 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/moduleparam.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
#include "ipoib.h"
@@ -91,7 +93,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_recv_wr *bad_wr;
int i, ret;
@@ -117,7 +119,7 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
struct ib_recv_wr *wr,
struct ib_sge *sge, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_recv_wr *bad_wr;
int i, ret;
@@ -144,7 +146,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
u64 mapping[IPOIB_CM_RX_SG],
gfp_t gfp)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sk_buff *skb;
int i;
@@ -195,7 +197,7 @@ partial_error:
static void ipoib_cm_free_rx_ring(struct net_device *dev,
struct ipoib_cm_rx_buf *rx_ring)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i;
for (i = 0; i < ipoib_recvq_size; ++i)
@@ -234,7 +236,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
{
struct ipoib_cm_rx *p = ctx;
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
unsigned long flags;
if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
@@ -250,7 +252,7 @@ static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
struct ipoib_cm_rx *p)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_init_attr attr = {
.event_handler = ipoib_cm_rx_event_handler,
.send_cq = priv->recv_cq, /* For drain WR */
@@ -275,7 +277,7 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
struct ib_cm_id *cm_id, struct ib_qp *qp,
unsigned psn)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
@@ -330,7 +332,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev,
struct ib_recv_wr *wr,
struct ib_sge *sge)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i;
for (i = 0; i < priv->cm.num_frags; ++i)
@@ -348,7 +350,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev,
static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
struct ipoib_cm_rx *rx)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct {
struct ib_recv_wr wr;
struct ib_sge sge[IPOIB_CM_RX_SG];
@@ -363,7 +365,7 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
t = kmalloc(sizeof *t, GFP_KERNEL);
if (!t) {
ret = -ENOMEM;
- goto err_free;
+ goto err_free_1;
}
ipoib_cm_init_rx_wr(dev, &t->wr, t->sge);
@@ -410,6 +412,8 @@ err_count:
err_free:
kfree(t);
+
+err_free_1:
ipoib_cm_free_rx_ring(dev, rx->rx_ring);
return ret;
@@ -419,7 +423,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
struct ib_qp *qp, struct ib_cm_req_event_param *req,
unsigned psn)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_data data = {};
struct ib_cm_rep_param rep = {};
@@ -439,7 +443,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
struct net_device *dev = cm_id->context;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *p;
unsigned psn;
int ret;
@@ -512,7 +516,7 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
/* Fall through */
case IB_CM_REJ_RECEIVED:
p = cm_id->context;
- priv = netdev_priv(p->dev);
+ priv = ipoib_priv(p->dev);
if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
ipoib_warn(priv, "unable to move qp to error state\n");
/* Fall through */
@@ -556,7 +560,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx_buf *rx_ring;
unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
struct sk_buff *skb, *newskb;
@@ -705,7 +709,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_tx_buf *tx_req;
int rc;
unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
@@ -783,7 +787,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_tx *tx = wc->qp->qp_context;
unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
struct ipoib_tx_buf *tx_req;
@@ -820,9 +824,12 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
wc->status != IB_WC_WR_FLUSH_ERR) {
struct ipoib_neigh *neigh;
- ipoib_dbg(priv, "failed cm send event "
- "(status=%d, wrid=%d vend_err %x)\n",
- wc->status, wr_id, wc->vendor_err);
+ if (wc->status != IB_WC_RNR_RETRY_EXC_ERR)
+ ipoib_warn(priv, "failed cm send event (status=%d, wrid=%d vend_err %x)\n",
+ wc->status, wr_id, wc->vendor_err);
+ else
+ ipoib_dbg(priv, "failed cm send event (status=%d, wrid=%d vend_err %x)\n",
+ wc->status, wr_id, wc->vendor_err);
spin_lock_irqsave(&priv->lock, flags);
neigh = tx->neigh;
@@ -849,7 +856,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
int ipoib_cm_dev_open(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
@@ -881,7 +888,7 @@ err_cm:
static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *rx, *n;
LIST_HEAD(list);
@@ -904,7 +911,7 @@ static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
void ipoib_cm_dev_stop(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *p;
unsigned long begin;
int ret;
@@ -948,7 +955,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
break;
}
spin_unlock_irq(&priv->lock);
- msleep(1);
+ usleep_range(1000, 2000);
ipoib_drain_cq(dev);
spin_lock_irq(&priv->lock);
}
@@ -963,7 +970,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
struct ipoib_cm_tx *p = cm_id->context;
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
struct ipoib_cm_data *data = event->private_data;
struct sk_buff_head skqueue;
struct ib_qp_attr qp_attr;
@@ -1015,9 +1022,10 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
while ((skb = __skb_dequeue(&skqueue))) {
skb->dev = p->dev;
- if (dev_queue_xmit(skb))
- ipoib_warn(priv, "dev_queue_xmit failed "
- "to requeue packet\n");
+ ret = dev_queue_xmit(skb);
+ if (ret)
+ ipoib_warn(priv, "%s:dev_queue_xmit failed to re-queue packet, ret:%d\n",
+ __func__, ret);
}
ret = ib_send_cm_rtu(cm_id, NULL, 0);
@@ -1030,7 +1038,7 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_init_attr attr = {
.send_cq = priv->recv_cq,
.recv_cq = priv->recv_cq,
@@ -1040,9 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_RC,
.qp_context = tx,
- .create_flags = IB_QP_CREATE_USE_GFP_NOIO
+ .create_flags = 0
};
-
struct ib_qp *tx_qp;
if (dev->features & NETIF_F_SG)
@@ -1050,10 +1057,6 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
tx_qp = ib_create_qp(priv->pd, &attr);
- if (PTR_ERR(tx_qp) == -EINVAL) {
- attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
- tx_qp = ib_create_qp(priv->pd, &attr);
- }
tx->max_send_sge = attr.cap.max_send_sge;
return tx_qp;
}
@@ -1061,9 +1064,9 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
static int ipoib_cm_send_req(struct net_device *dev,
struct ib_cm_id *id, struct ib_qp *qp,
u32 qpn,
- struct ib_sa_path_rec *pathrec)
+ struct sa_path_rec *pathrec)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_data data = {};
struct ib_cm_req_param req = {};
@@ -1098,7 +1101,7 @@ static int ipoib_cm_send_req(struct net_device *dev,
static int ipoib_cm_modify_tx_init(struct net_device *dev,
struct ib_cm_id *cm_id, struct ib_qp *qp)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index);
@@ -1121,13 +1124,14 @@ static int ipoib_cm_modify_tx_init(struct net_device *dev,
}
static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
- struct ib_sa_path_rec *pathrec)
+ struct sa_path_rec *pathrec)
{
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
+ unsigned int noio_flag;
int ret;
- p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
- GFP_NOIO, PAGE_KERNEL);
+ noio_flag = memalloc_noio_save();
+ p->tx_ring = vzalloc(ipoib_sendq_size * sizeof(*p->tx_ring));
if (!p->tx_ring) {
ret = -ENOMEM;
goto err_tx;
@@ -1135,9 +1139,10 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring);
p->qp = ipoib_cm_create_tx_qp(p->dev, p);
+ memalloc_noio_restore(noio_flag);
if (IS_ERR(p->qp)) {
ret = PTR_ERR(p->qp);
- ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret);
+ ipoib_warn(priv, "failed to create tx qp: %d\n", ret);
goto err_qp;
}
@@ -1151,13 +1156,13 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
ret = ipoib_cm_modify_tx_init(p->dev, p->id, p->qp);
if (ret) {
ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret);
- goto err_modify;
+ goto err_modify_send;
}
ret = ipoib_cm_send_req(p->dev, p->id, p->qp, qpn, pathrec);
if (ret) {
ipoib_warn(priv, "failed to send cm req: %d\n", ret);
- goto err_send_cm;
+ goto err_modify_send;
}
ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n",
@@ -1165,8 +1170,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
return 0;
-err_send_cm:
-err_modify:
+err_modify_send:
ib_destroy_cm_id(p->id);
err_id:
p->id = NULL;
@@ -1180,7 +1184,7 @@ err_tx:
static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
{
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
struct ipoib_tx_buf *tx_req;
unsigned long begin;
@@ -1200,7 +1204,7 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
goto timeout;
}
- msleep(1);
+ usleep_range(1000, 2000);
}
}
@@ -1230,7 +1234,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ib_cm_event *event)
{
struct ipoib_cm_tx *tx = cm_id->context;
- struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
struct net_device *dev = priv->dev;
struct ipoib_neigh *neigh;
unsigned long flags;
@@ -1281,7 +1285,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
struct ipoib_neigh *neigh)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_tx *tx;
tx = kzalloc(sizeof *tx, GFP_ATOMIC);
@@ -1300,7 +1304,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
{
- struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
unsigned long flags;
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
spin_lock_irqsave(&priv->lock, flags);
@@ -1326,7 +1330,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
struct ipoib_path *path;
int ret;
- struct ib_sa_path_rec pathrec;
+ struct sa_path_rec pathrec;
u32 qpn;
netif_tx_lock_bh(dev);
@@ -1388,7 +1392,7 @@ static void ipoib_cm_tx_reap(struct work_struct *work)
while (!list_empty(&priv->cm.reap_list)) {
p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
- list_del(&p->list);
+ list_del_init(&p->list);
spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);
ipoib_cm_tx_destroy(p);
@@ -1435,7 +1439,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
unsigned int mtu)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int e = skb_queue_empty(&priv->cm.skb_queue);
if (skb_dst(skb))
@@ -1484,7 +1488,8 @@ static void ipoib_cm_stale_task(struct work_struct *work)
static ssize_t show_mode(struct device *d, struct device_attribute *attr,
char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d));
+ struct net_device *dev = to_net_dev(d);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
return sprintf(buf, "connected\n");
@@ -1497,7 +1502,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
{
struct net_device *dev = to_net_dev(d);
int ret;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags))
return -EPERM;
@@ -1507,12 +1512,14 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
ret = ipoib_set_mode(dev, buf);
- rtnl_unlock();
-
- if (!ret)
- return count;
+ /* The assumption is that the function ipoib_set_mode returned
+ * with the rtnl held by it, if not the value -EBUSY returned,
+ * then no need to rtnl_unlock
+ */
+ if (ret != -EBUSY)
+ rtnl_unlock();
- return ret;
+ return (!ret || ret == -EBUSY) ? count : ret;
}
static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
@@ -1524,7 +1531,7 @@ int ipoib_cm_add_mode_attr(struct net_device *dev)
static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_srq_init_attr srq_init_attr = {
.srq_type = IB_SRQT_BASIC,
.attr = {
@@ -1553,7 +1560,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
int ipoib_cm_dev_init(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int max_srq_sge, i;
INIT_LIST_HEAD(&priv->cm.passive_ids);
@@ -1614,7 +1621,7 @@ int ipoib_cm_dev_init(struct net_device *dev)
void ipoib_cm_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
if (!priv->cm.srq)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 7b6d40ff1acf..7871379342f4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -60,12 +60,12 @@ static const struct ipoib_stats ipoib_gstrings_stats[] = {
static void ipoib_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
- struct ipoib_dev_priv *priv = netdev_priv(netdev);
+ struct ipoib_dev_priv *priv = ipoib_priv(netdev);
ib_get_device_fw_str(priv->ca, drvinfo->fw_version,
sizeof(drvinfo->fw_version));
- strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
+ strlcpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent),
sizeof(drvinfo->bus_info));
strlcpy(drvinfo->version, ipoib_driver_version,
@@ -77,7 +77,7 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
static int ipoib_get_coalesce(struct net_device *dev,
struct ethtool_coalesce *coal)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs;
coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
@@ -88,7 +88,7 @@ static int ipoib_get_coalesce(struct net_device *dev,
static int ipoib_set_coalesce(struct net_device *dev,
struct ethtool_coalesce *coal)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
/*
@@ -155,7 +155,66 @@ static int ipoib_get_sset_count(struct net_device __always_unused *dev,
return -EOPNOTSUPP;
}
+/* Return lane speed in unit of 1e6 bit/sec */
+static inline int ib_speed_enum_to_int(int speed)
+{
+ switch (speed) {
+ case IB_SPEED_SDR:
+ return SPEED_2500;
+ case IB_SPEED_DDR:
+ return SPEED_5000;
+ case IB_SPEED_QDR:
+ case IB_SPEED_FDR10:
+ return SPEED_10000;
+ case IB_SPEED_FDR:
+ return SPEED_14000;
+ case IB_SPEED_EDR:
+ return SPEED_25000;
+ }
+
+ return SPEED_UNKNOWN;
+}
+
+static int ipoib_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(netdev);
+ struct ib_port_attr attr;
+ int ret, speed, width;
+
+ if (!netif_carrier_ok(netdev)) {
+ cmd->base.speed = SPEED_UNKNOWN;
+ cmd->base.duplex = DUPLEX_UNKNOWN;
+ return 0;
+ }
+
+ ret = ib_query_port(priv->ca, priv->port, &attr);
+ if (ret < 0)
+ return -EINVAL;
+
+ speed = ib_speed_enum_to_int(attr.active_speed);
+ width = ib_width_enum_to_int(attr.active_width);
+
+ if (speed < 0 || width < 0)
+ return -EINVAL;
+
+ /* Except the following are set, the other members of
+ * the struct ethtool_link_settings are initialized to
+ * zero in the function __ethtool_get_link_ksettings.
+ */
+ cmd->base.speed = speed * width;
+ cmd->base.duplex = DUPLEX_FULL;
+
+ cmd->base.phy_address = 0xFF;
+
+ cmd->base.autoneg = AUTONEG_ENABLE;
+ cmd->base.port = PORT_OTHER;
+
+ return 0;
+}
+
static const struct ethtool_ops ipoib_ethtool_ops = {
+ .get_link_ksettings = ipoib_get_link_ksettings,
.get_drvinfo = ipoib_get_drvinfo,
.get_coalesce = ipoib_get_coalesce,
.set_coalesce = ipoib_set_coalesce,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 6bd5740e2691..11f74cbe6660 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -210,16 +210,16 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
seq_printf(file,
"GID: %s\n"
" complete: %6s\n",
- gid_buf, path.pathrec.dlid ? "yes" : "no");
+ gid_buf, sa_path_get_dlid(&path.pathrec) ? "yes" : "no");
- if (path.pathrec.dlid) {
+ if (sa_path_get_dlid(&path.pathrec)) {
rate = ib_rate_to_mbps(path.pathrec.rate);
seq_printf(file,
" DLID: 0x%04x\n"
" SL: %12d\n"
" rate: %8d.%d Gb/sec\n",
- be16_to_cpu(path.pathrec.dlid),
+ be32_to_cpu(sa_path_get_dlid(&path.pathrec)),
path.pathrec.sl,
rate / 1000, rate % 1000);
}
@@ -261,7 +261,7 @@ static const struct file_operations ipoib_path_fops = {
void ipoib_create_debug_files(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
char name[IFNAMSIZ + sizeof "_path"];
snprintf(name, sizeof name, "%s_mcg", dev->name);
@@ -279,10 +279,13 @@ void ipoib_create_debug_files(struct net_device *dev)
void ipoib_delete_debug_files(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n");
+ WARN_ONCE(!priv->path_dentry, "null path debug file\n");
debugfs_remove(priv->mcg_dentry);
debugfs_remove(priv->path_dentry);
+ priv->mcg_dentry = priv->path_dentry = NULL;
}
int ipoib_register_debugfs(void)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5038f9d2d753..57a9655e844d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -52,7 +52,7 @@ MODULE_PARM_DESC(data_debug_level,
#endif
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
- struct ib_pd *pd, struct ib_ah_attr *attr)
+ struct ib_pd *pd, struct rdma_ah_attr *attr)
{
struct ipoib_ah *ah;
struct ib_ah *vah;
@@ -65,13 +65,13 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
ah->last_send = 0;
kref_init(&ah->ref);
- vah = ib_create_ah(pd, attr);
+ vah = rdma_create_ah(pd, attr);
if (IS_ERR(vah)) {
kfree(ah);
ah = (struct ipoib_ah *)vah;
} else {
ah->ah = vah;
- ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah);
+ ipoib_dbg(ipoib_priv(dev), "Created ah %p\n", ah->ah);
}
return ah;
@@ -80,7 +80,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
void ipoib_free_ah(struct kref *kref)
{
struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref);
- struct ipoib_dev_priv *priv = netdev_priv(ah->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ah->dev);
unsigned long flags;
@@ -99,7 +99,7 @@ static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
static int ipoib_ib_post_receive(struct net_device *dev, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_recv_wr *bad_wr;
int ret;
@@ -121,7 +121,7 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id)
static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sk_buff *skb;
int buf_size;
u64 *mapping;
@@ -153,7 +153,7 @@ error:
static int ipoib_ib_post_receives(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i;
for (i = 0; i < ipoib_recvq_size; ++i) {
@@ -172,7 +172,7 @@ static int ipoib_ib_post_receives(struct net_device *dev)
static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
struct sk_buff *skb;
u64 mapping[IPOIB_UD_RX_SG];
@@ -381,7 +381,7 @@ free_res:
static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned int wr_id = wc->wr_id;
struct ipoib_tx_buf *tx_req;
@@ -485,14 +485,14 @@ poll_more:
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
{
struct net_device *dev = dev_ptr;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
napi_schedule(&priv->napi);
}
static void drain_tx_cq(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
netif_tx_lock(dev);
while (poll_tx(priv))
@@ -506,14 +506,14 @@ static void drain_tx_cq(struct net_device *dev)
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev_ptr);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev_ptr);
mod_timer(&priv->poll_timer, jiffies);
}
static inline int post_send(struct ipoib_dev_priv *priv,
unsigned int wr_id,
- struct ib_ah *address, u32 qpn,
+ struct ib_ah *address, u32 dqpn,
struct ipoib_tx_buf *tx_req,
void *head, int hlen)
{
@@ -523,7 +523,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
ipoib_build_sge(priv, tx_req);
priv->tx_wr.wr.wr_id = wr_id;
- priv->tx_wr.remote_qpn = qpn;
+ priv->tx_wr.remote_qpn = dqpn;
priv->tx_wr.ah = address;
if (head) {
@@ -537,10 +537,10 @@ static inline int post_send(struct ipoib_dev_priv *priv,
return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr);
}
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
- struct ipoib_ah *address, u32 qpn)
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
+ struct ib_ah *address, u32 dqpn)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_tx_buf *tx_req;
int hlen, rc;
void *phead;
@@ -554,7 +554,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
} else {
if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
@@ -563,7 +563,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
- return;
+ return -1;
}
phead = NULL;
hlen = 0;
@@ -574,7 +574,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
/* Does skb_linearize return ok without reducing nr_frags? */
if (skb_shinfo(skb)->nr_frags > usable_sge) {
@@ -582,12 +582,13 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
}
- ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
- skb->len, address, qpn);
+ ipoib_dbg_data(priv,
+ "sending packet, length=%d address=%p dqpn=0x%06x\n",
+ skb->len, address, dqpn);
/*
* We put the skb into the tx_ring _before_ we call post_send()
@@ -601,7 +602,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -620,7 +621,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
skb_dst_drop(skb);
rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
- address->ah, qpn, tx_req, phead, hlen);
+ address, dqpn, tx_req, phead, hlen);
if (unlikely(rc)) {
ipoib_warn(priv, "post_send failed, error %d\n", rc);
++dev->stats.tx_errors;
@@ -629,21 +630,24 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
dev_kfree_skb_any(skb);
if (netif_queue_stopped(dev))
netif_wake_queue(dev);
+ rc = 0;
} else {
netif_trans_update(dev);
- address->last_send = priv->tx_head;
+ rc = priv->tx_head;
++priv->tx_head;
}
if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
while (poll_tx(priv))
; /* nothing */
+
+ return rc;
}
static void __ipoib_reap_ah(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_ah *ah, *tah;
LIST_HEAD(remove_list);
unsigned long flags;
@@ -654,7 +658,7 @@ static void __ipoib_reap_ah(struct net_device *dev)
list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
list_del(&ah->list);
- ib_destroy_ah(ah->ah);
+ rdma_destroy_ah(ah->ah);
kfree(ah);
}
@@ -677,7 +681,7 @@ void ipoib_reap_ah(struct work_struct *work)
static void ipoib_flush_ah(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
cancel_delayed_work(&priv->ah_reap_task);
flush_workqueue(priv->wq);
@@ -686,30 +690,124 @@ static void ipoib_flush_ah(struct net_device *dev)
static void ipoib_stop_ah(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
set_bit(IPOIB_STOP_REAPER, &priv->flags);
ipoib_flush_ah(dev);
}
-static void ipoib_ib_tx_timer_func(unsigned long ctx)
+static int recvs_pending(struct net_device *dev)
{
- drain_tx_cq((struct net_device *)ctx);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int pending = 0;
+ int i;
+
+ for (i = 0; i < ipoib_recvq_size; ++i)
+ if (priv->rx_ring[i].skb)
+ ++pending;
+
+ return pending;
}
-int ipoib_ib_dev_open(struct net_device *dev)
+int ipoib_ib_dev_stop_default(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- int ret;
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct ib_qp_attr qp_attr;
+ unsigned long begin;
+ struct ipoib_tx_buf *tx_req;
+ int i;
- ipoib_pkey_dev_check_presence(dev);
+ if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+ napi_disable(&priv->napi);
- if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
- ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
- (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
- return -1;
+ ipoib_cm_dev_stop(dev);
+
+ /*
+ * Move our QP to the error state and then reinitialize in
+ * when all work requests have completed or have been flushed.
+ */
+ qp_attr.qp_state = IB_QPS_ERR;
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
+
+ /* Wait for all sends and receives to complete */
+ begin = jiffies;
+
+ while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
+ if (time_after(jiffies, begin + 5 * HZ)) {
+ ipoib_warn(priv,
+ "timing out; %d sends %d receives not completed\n",
+ priv->tx_head - priv->tx_tail,
+ recvs_pending(dev));
+
+ /*
+ * assume the HW is wedged and just free up
+ * all our pending work requests.
+ */
+ while ((int)priv->tx_tail - (int)priv->tx_head < 0) {
+ tx_req = &priv->tx_ring[priv->tx_tail &
+ (ipoib_sendq_size - 1)];
+ ipoib_dma_unmap_tx(priv, tx_req);
+ dev_kfree_skb_any(tx_req->skb);
+ ++priv->tx_tail;
+ --priv->tx_outstanding;
+ }
+
+ for (i = 0; i < ipoib_recvq_size; ++i) {
+ struct ipoib_rx_buf *rx_req;
+
+ rx_req = &priv->rx_ring[i];
+ if (!rx_req->skb)
+ continue;
+ ipoib_ud_dma_unmap_rx(priv,
+ priv->rx_ring[i].mapping);
+ dev_kfree_skb_any(rx_req->skb);
+ rx_req->skb = NULL;
+ }
+
+ goto timeout;
+ }
+
+ ipoib_drain_cq(dev);
+
+ usleep_range(1000, 2000);
}
+ ipoib_dbg(priv, "All sends and receives done.\n");
+
+timeout:
+ del_timer_sync(&priv->poll_timer);
+ qp_attr.qp_state = IB_QPS_RESET;
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ ipoib_warn(priv, "Failed to modify QP to RESET state\n");
+
+ ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
+
+ return 0;
+}
+
+int ipoib_ib_dev_stop(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ priv->rn_ops->ndo_stop(dev);
+
+ clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+ ipoib_flush_ah(dev);
+
+ return 0;
+}
+
+void ipoib_ib_tx_timer_func(unsigned long ctx)
+{
+ drain_tx_cq((struct net_device *)ctx);
+}
+
+int ipoib_ib_dev_open_default(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret;
+
ret = ipoib_init_qp(dev);
if (ret) {
ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
@@ -719,33 +817,59 @@ int ipoib_ib_dev_open(struct net_device *dev)
ret = ipoib_ib_post_receives(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
- goto dev_stop;
+ goto out;
}
ret = ipoib_cm_dev_open(dev);
if (ret) {
ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
- goto dev_stop;
+ goto out;
+ }
+
+ if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+ napi_enable(&priv->napi);
+
+ return 0;
+out:
+ return -1;
+}
+
+int ipoib_ib_dev_open(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ ipoib_pkey_dev_check_presence(dev);
+
+ if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
+ ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
+ (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
+ return -1;
}
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ));
- if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- napi_enable(&priv->napi);
+ if (priv->rn_ops->ndo_open(dev)) {
+ pr_warn("%s: Failed to open dev\n", dev->name);
+ goto dev_stop;
+ }
+
+ set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
return 0;
+
dev_stop:
- if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- napi_enable(&priv->napi);
+ set_bit(IPOIB_STOP_REAPER, &priv->flags);
+ cancel_delayed_work(&priv->ah_reap_task);
+ set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
ipoib_ib_dev_stop(dev);
return -1;
}
void ipoib_pkey_dev_check_presence(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (!(priv->pkey & 0x7fff) ||
ib_find_pkey(priv->ca, priv->port, priv->pkey,
@@ -755,25 +879,25 @@ void ipoib_pkey_dev_check_presence(struct net_device *dev)
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
}
-int ipoib_ib_dev_up(struct net_device *dev)
+void ipoib_ib_dev_up(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_pkey_dev_check_presence(dev);
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
ipoib_dbg(priv, "PKEY is not assigned.\n");
- return 0;
+ return;
}
set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
- return ipoib_mcast_start_thread(dev);
+ ipoib_mcast_start_thread(dev);
}
-int ipoib_ib_dev_down(struct net_device *dev)
+void ipoib_ib_dev_down(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "downing ib_dev\n");
@@ -784,26 +908,11 @@ int ipoib_ib_dev_down(struct net_device *dev)
ipoib_mcast_dev_flush(dev);
ipoib_flush_paths(dev);
-
- return 0;
-}
-
-static int recvs_pending(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- int pending = 0;
- int i;
-
- for (i = 0; i < ipoib_recvq_size; ++i)
- if (priv->rx_ring[i].skb)
- ++pending;
-
- return pending;
}
void ipoib_drain_cq(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i, n;
/*
@@ -840,109 +949,6 @@ void ipoib_drain_cq(struct net_device *dev)
local_bh_enable();
}
-int ipoib_ib_dev_stop(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ib_qp_attr qp_attr;
- unsigned long begin;
- struct ipoib_tx_buf *tx_req;
- int i;
-
- if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- napi_disable(&priv->napi);
-
- ipoib_cm_dev_stop(dev);
-
- /*
- * Move our QP to the error state and then reinitialize in
- * when all work requests have completed or have been flushed.
- */
- qp_attr.qp_state = IB_QPS_ERR;
- if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
- ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
-
- /* Wait for all sends and receives to complete */
- begin = jiffies;
-
- while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
- if (time_after(jiffies, begin + 5 * HZ)) {
- ipoib_warn(priv, "timing out; %d sends %d receives not completed\n",
- priv->tx_head - priv->tx_tail, recvs_pending(dev));
-
- /*
- * assume the HW is wedged and just free up
- * all our pending work requests.
- */
- while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
- tx_req = &priv->tx_ring[priv->tx_tail &
- (ipoib_sendq_size - 1)];
- ipoib_dma_unmap_tx(priv, tx_req);
- dev_kfree_skb_any(tx_req->skb);
- ++priv->tx_tail;
- --priv->tx_outstanding;
- }
-
- for (i = 0; i < ipoib_recvq_size; ++i) {
- struct ipoib_rx_buf *rx_req;
-
- rx_req = &priv->rx_ring[i];
- if (!rx_req->skb)
- continue;
- ipoib_ud_dma_unmap_rx(priv,
- priv->rx_ring[i].mapping);
- dev_kfree_skb_any(rx_req->skb);
- rx_req->skb = NULL;
- }
-
- goto timeout;
- }
-
- ipoib_drain_cq(dev);
-
- msleep(1);
- }
-
- ipoib_dbg(priv, "All sends and receives done.\n");
-
-timeout:
- del_timer_sync(&priv->poll_timer);
- qp_attr.qp_state = IB_QPS_RESET;
- if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
- ipoib_warn(priv, "Failed to modify QP to RESET state\n");
-
- ipoib_flush_ah(dev);
-
- ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
-
- return 0;
-}
-
-int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
-
- priv->ca = ca;
- priv->port = port;
- priv->qp = NULL;
-
- if (ipoib_transport_dev_init(dev, ca)) {
- printk(KERN_WARNING "%s: ipoib_transport_dev_init failed\n", ca->name);
- return -ENODEV;
- }
-
- setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func,
- (unsigned long) dev);
-
- if (dev->flags & IFF_UP) {
- if (ipoib_ib_dev_open(dev)) {
- ipoib_transport_dev_cleanup(dev);
- return -ENODEV;
- }
- }
-
- return 0;
-}
-
/*
* Takes whatever value which is in pkey index 0 and updates priv->pkey
* returns 0 if the pkey value was changed.
@@ -971,6 +977,19 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
*/
priv->dev->broadcast[8] = priv->pkey >> 8;
priv->dev->broadcast[9] = priv->pkey & 0xff;
+
+ /*
+ * Update the broadcast address in the priv->broadcast object,
+ * in case it already exists, otherwise no one will do that.
+ */
+ if (priv->broadcast) {
+ spin_lock_irq(&priv->lock);
+ memcpy(priv->broadcast->mcmember.mgid.raw,
+ priv->dev->broadcast + 4,
+ sizeof(union ib_gid));
+ spin_unlock_irq(&priv->lock);
+ }
+
return 0;
}
@@ -1220,7 +1239,7 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work)
void ipoib_ib_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "cleaning up ib_dev\n");
/*
@@ -1240,7 +1259,13 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
*/
ipoib_stop_ah(dev);
- ipoib_transport_dev_cleanup(dev);
-}
+ clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ priv->rn_ops->ndo_uninit(dev);
+
+ if (priv->pd) {
+ ib_dealloc_pd(priv->pd);
+ priv->pd = NULL;
+ }
+}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 3ce0765a05ab..4ce315c92b48 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,9 +108,36 @@ static struct ib_client ipoib_client = {
.get_net_dev_by_params = ipoib_get_net_dev_by_params,
};
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static int ipoib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct netdev_notifier_info *ni = ptr;
+ struct net_device *dev = ni->dev;
+
+ if (dev->netdev_ops->ndo_open != ipoib_open)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ ipoib_create_debug_files(dev);
+ break;
+ case NETDEV_CHANGENAME:
+ ipoib_delete_debug_files(dev);
+ ipoib_create_debug_files(dev);
+ break;
+ case NETDEV_UNREGISTER:
+ ipoib_delete_debug_files(dev);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+#endif
+
int ipoib_open(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "bringing up interface\n");
@@ -126,8 +153,7 @@ int ipoib_open(struct net_device *dev)
goto err_disable;
}
- if (ipoib_ib_dev_up(dev))
- goto err_stop;
+ ipoib_ib_dev_up(dev);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
@@ -150,9 +176,6 @@ int ipoib_open(struct net_device *dev)
return 0;
-err_stop:
- ipoib_ib_dev_stop(dev);
-
err_disable:
clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@@ -161,7 +184,7 @@ err_disable:
static int ipoib_stop(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "stopping interface\n");
@@ -199,7 +222,7 @@ static void ipoib_uninit(struct net_device *dev)
static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
@@ -209,7 +232,8 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu
static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret = 0;
/* dev->mtu > 2K ==> connected mode */
if (ipoib_cm_admin_enabled(dev)) {
@@ -229,9 +253,38 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
priv->admin_mtu = new_mtu;
- dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
+ if (priv->mcast_mtu < priv->admin_mtu)
+ ipoib_dbg(priv, "MTU must be smaller than the underlying "
+ "link layer MTU - 4 (%u)\n", priv->mcast_mtu);
- return 0;
+ new_mtu = min(priv->mcast_mtu, priv->admin_mtu);
+
+ if (priv->rn_ops->ndo_change_mtu) {
+ bool carrier_status = netif_carrier_ok(dev);
+
+ netif_carrier_off(dev);
+
+ /* notify lower level on the real mtu */
+ ret = priv->rn_ops->ndo_change_mtu(dev, new_mtu);
+
+ if (carrier_status)
+ netif_carrier_on(dev);
+ } else {
+ dev->mtu = new_mtu;
+ }
+
+ return ret;
+}
+
+static void ipoib_get_stats(struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ if (priv->rn_ops->ndo_get_stats64)
+ priv->rn_ops->ndo_get_stats64(dev, stats);
+ else
+ netdev_stats_to_stats64(stats, &dev->stats);
}
/* Called with an RCU read lock taken */
@@ -468,7 +521,14 @@ static struct net_device *ipoib_get_net_dev_by_params(
int ipoib_set_mode(struct net_device *dev, const char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ if ((test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) &&
+ !strcmp(buf, "connected\n")) ||
+ (!test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) &&
+ !strcmp(buf, "datagram\n"))) {
+ return 0;
+ }
/* flush paths if we switch modes so that connections are restarted */
if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) {
@@ -481,8 +541,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
ipoib_flush_paths(dev);
- rtnl_lock();
- return 0;
+ return (!rtnl_trylock()) ? -EBUSY : 0;
}
if (!strcmp(buf, "datagram\n")) {
@@ -491,8 +550,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
rtnl_unlock();
ipoib_flush_paths(dev);
- rtnl_lock();
- return 0;
+ return (!rtnl_trylock()) ? -EBUSY : 0;
}
return -EINVAL;
@@ -500,7 +558,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
struct ipoib_path *__path_find(struct net_device *dev, void *gid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node *n = priv->path_tree.rb_node;
struct ipoib_path *path;
int ret;
@@ -524,7 +582,7 @@ struct ipoib_path *__path_find(struct net_device *dev, void *gid)
static int __path_add(struct net_device *dev, struct ipoib_path *path)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node **n = &priv->path_tree.rb_node;
struct rb_node *pn = NULL;
struct ipoib_path *tpath;
@@ -559,7 +617,7 @@ static void path_free(struct net_device *dev, struct ipoib_path *path)
while ((skb = __skb_dequeue(&path->queue)))
dev_kfree_skb_irq(skb);
- ipoib_dbg(netdev_priv(dev), "path_free\n");
+ ipoib_dbg(ipoib_priv(dev), "path_free\n");
/* remove all neigh connected to this path */
ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
@@ -593,7 +651,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
int ipoib_path_iter_next(struct ipoib_path_iter *iter)
{
- struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(iter->dev);
struct rb_node *n;
struct ipoib_path *path;
int ret = 1;
@@ -630,95 +688,32 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
void ipoib_mark_paths_invalid(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path, *tp;
spin_lock_irq(&priv->lock);
list_for_each_entry_safe(path, tp, &priv->path_list, list) {
- ipoib_dbg(priv, "mark path LID 0x%04x GID %pI6 invalid\n",
- be16_to_cpu(path->pathrec.dlid),
- path->pathrec.dgid.raw);
+ ipoib_dbg(priv, "mark path LID 0x%08x GID %pI6 invalid\n",
+ be32_to_cpu(sa_path_get_dlid(&path->pathrec)),
+ path->pathrec.dgid.raw);
path->valid = 0;
}
spin_unlock_irq(&priv->lock);
}
-struct classport_info_context {
- struct ipoib_dev_priv *priv;
- struct completion done;
- struct ib_sa_query *sa_query;
-};
-
-static void classport_info_query_cb(int status, struct ib_class_port_info *rec,
- void *context)
+static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
{
- struct classport_info_context *cb_ctx = context;
- struct ipoib_dev_priv *priv;
-
- WARN_ON(!context);
-
- priv = cb_ctx->priv;
-
- if (status || !rec) {
- pr_debug("device: %s failed query classport_info status: %d\n",
- priv->dev->name, status);
- /* keeps the default, will try next mcast_restart */
- priv->sm_fullmember_sendonly_support = false;
- goto out;
- }
-
- if (ib_get_cpi_capmask2(rec) &
- IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT) {
- pr_debug("device: %s enabled fullmember-sendonly for sendonly MCG\n",
- priv->dev->name);
- priv->sm_fullmember_sendonly_support = true;
- } else {
- pr_debug("device: %s disabled fullmember-sendonly for sendonly MCG\n",
- priv->dev->name);
- priv->sm_fullmember_sendonly_support = false;
- }
-
-out:
- complete(&cb_ctx->done);
-}
-
-int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv)
-{
- struct classport_info_context *callback_context;
- int ret;
-
- callback_context = kmalloc(sizeof(*callback_context), GFP_KERNEL);
- if (!callback_context)
- return -ENOMEM;
-
- callback_context->priv = priv;
- init_completion(&callback_context->done);
-
- ret = ib_sa_classport_info_rec_query(&ipoib_sa_client,
- priv->ca, priv->port, 3000,
- GFP_KERNEL,
- classport_info_query_cb,
- callback_context,
- &callback_context->sa_query);
- if (ret < 0) {
- pr_info("%s failed to send ib_sa_classport_info query, ret: %d\n",
- priv->dev->name, ret);
- kfree(callback_context);
- return ret;
- }
-
- /* waiting for the callback to finish before returnning */
- wait_for_completion(&callback_context->done);
- kfree(callback_context);
+ struct ipoib_pseudo_header *phdr;
- return ret;
+ phdr = skb_push(skb, sizeof(*phdr));
+ memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
}
void ipoib_flush_paths(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path, *tp;
LIST_HEAD(remove_list);
unsigned long flags;
@@ -747,12 +742,12 @@ void ipoib_flush_paths(struct net_device *dev)
}
static void path_rec_completion(int status,
- struct ib_sa_path_rec *pathrec,
+ struct sa_path_rec *pathrec,
void *path_ptr)
{
struct ipoib_path *path = path_ptr;
struct net_device *dev = path->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_ah *ah = NULL;
struct ipoib_ah *old_ah = NULL;
struct ipoib_neigh *neigh, *tn;
@@ -762,7 +757,8 @@ static void path_rec_completion(int status,
if (!status)
ipoib_dbg(priv, "PathRec LID 0x%04x for GID %pI6\n",
- be16_to_cpu(pathrec->dlid), pathrec->dgid.raw);
+ be32_to_cpu(sa_path_get_dlid(pathrec)),
+ pathrec->dgid.raw);
else
ipoib_dbg(priv, "PathRec status %d for GID %pI6\n",
status, path->pathrec.dgid.raw);
@@ -770,7 +766,7 @@ static void path_rec_completion(int status,
skb_queue_head_init(&skqueue);
if (!status) {
- struct ib_ah_attr av;
+ struct rdma_ah_attr av;
if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
ah = ipoib_create_ah(dev, priv->pd, &av);
@@ -785,7 +781,8 @@ static void path_rec_completion(int status,
path->ah = ah;
ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
- ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
+ ah, be32_to_cpu(sa_path_get_dlid(pathrec)),
+ pathrec->sl);
while ((skb = __skb_dequeue(&path->queue)))
__skb_queue_tail(&skqueue, skb);
@@ -834,16 +831,18 @@ static void path_rec_completion(int status,
ipoib_put_ah(old_ah);
while ((skb = __skb_dequeue(&skqueue))) {
+ int ret;
skb->dev = dev;
- if (dev_queue_xmit(skb))
- ipoib_warn(priv, "dev_queue_xmit failed "
- "to requeue packet\n");
+ ret = dev_queue_xmit(skb);
+ if (ret)
+ ipoib_warn(priv, "%s: dev_queue_xmit failed to re-queue packet, ret:%d\n",
+ __func__, ret);
}
}
static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path;
if (!priv->broadcast)
@@ -859,6 +858,10 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
INIT_LIST_HEAD(&path->neigh_list);
+ if (rdma_cap_opa_ah(priv->ca, priv->port))
+ path->pathrec.rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ path->pathrec.rec_type = SA_PATH_REC_TYPE_IB;
memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid));
path->pathrec.sgid = priv->local_gid;
path->pathrec.pkey = cpu_to_be16(priv->pkey);
@@ -871,7 +874,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
static int path_rec_start(struct net_device *dev,
struct ipoib_path *path)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "Start path record lookup for %pI6\n",
path->pathrec.dgid.raw);
@@ -902,7 +905,8 @@ static int path_rec_start(struct net_device *dev,
static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_path *path;
struct ipoib_neigh *neigh;
unsigned long flags;
@@ -940,8 +944,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
}
if (skb_queue_len(&neigh->queue) <
IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, IPOIB_PSEUDO_LEN);
+ push_pseudo_header(skb, neigh->daddr);
__skb_queue_tail(&neigh->queue, skb);
} else {
ipoib_warn(priv, "queue length limit %d. Packet drop.\n",
@@ -950,7 +953,8 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
}
} else {
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr));
+ path->ah->last_send = rn->send(dev, skb, path->ah->ah,
+ IPOIB_QPN(daddr));
ipoib_neigh_put(neigh);
return;
}
@@ -959,10 +963,12 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
if (!path->query && path_rec_start(dev, path))
goto err_path;
- if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
+ if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+ push_pseudo_header(skb, neigh->daddr);
__skb_queue_tail(&neigh->queue, skb);
- else
+ } else {
goto err_drop;
+ }
}
spin_unlock_irqrestore(&priv->lock, flags);
@@ -982,7 +988,8 @@ err_drop:
static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
struct ipoib_pseudo_header *phdr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_path *path;
unsigned long flags;
@@ -998,8 +1005,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
}
if (path) {
if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, IPOIB_PSEUDO_LEN);
+ push_pseudo_header(skb, phdr->hwaddr);
__skb_queue_tail(&path->queue, skb);
} else {
++dev->stats.tx_dropped;
@@ -1023,16 +1029,16 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
}
if (path->ah) {
- ipoib_dbg(priv, "Send unicast ARP to %04x\n",
- be16_to_cpu(path->pathrec.dlid));
+ ipoib_dbg(priv, "Send unicast ARP to %08x\n",
+ be32_to_cpu(sa_path_get_dlid(&path->pathrec)));
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+ path->ah->last_send = rn->send(dev, skb, path->ah->ah,
+ IPOIB_QPN(phdr->hwaddr));
return;
} else if ((path->query || !path_rec_start(dev, path)) &&
skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, IPOIB_PSEUDO_LEN);
+ push_pseudo_header(skb, phdr->hwaddr);
__skb_queue_tail(&path->queue, skb);
} else {
++dev->stats.tx_dropped;
@@ -1044,7 +1050,8 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_neigh *neigh;
struct ipoib_pseudo_header *phdr;
struct ipoib_header *header;
@@ -1108,13 +1115,13 @@ send_using_neigh:
goto unref;
}
} else if (neigh->ah) {
- ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(phdr->hwaddr));
+ neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
+ IPOIB_QPN(phdr->hwaddr));
goto unref;
}
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, sizeof(*phdr));
+ push_pseudo_header(skb, phdr->hwaddr);
spin_lock_irqsave(&priv->lock, flags);
__skb_queue_tail(&neigh->queue, skb);
spin_unlock_irqrestore(&priv->lock, flags);
@@ -1131,7 +1138,7 @@ unref:
static void ipoib_timeout(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
jiffies_to_msecs(jiffies - dev_trans_start(dev)));
@@ -1146,10 +1153,9 @@ static int ipoib_hard_header(struct sk_buff *skb,
unsigned short type,
const void *daddr, const void *saddr, unsigned len)
{
- struct ipoib_pseudo_header *phdr;
struct ipoib_header *header;
- header = (struct ipoib_header *) skb_push(skb, sizeof *header);
+ header = skb_push(skb, sizeof *header);
header->proto = htons(type);
header->reserved = 0;
@@ -1159,15 +1165,14 @@ static int ipoib_hard_header(struct sk_buff *skb,
* destination address into skb hard header so we can figure out where
* to send the packet later.
*/
- phdr = (struct ipoib_pseudo_header *) skb_push(skb, sizeof(*phdr));
- memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
+ push_pseudo_header(skb, daddr);
return IPOIB_HARD_LEN;
}
static void ipoib_set_mcast_list(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set");
@@ -1179,7 +1184,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
static int ipoib_get_iflink(const struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
/* parent interface */
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
@@ -1207,7 +1212,7 @@ static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
struct ipoib_neigh *neigh = NULL;
@@ -1286,7 +1291,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
/* remove from path/mc list */
- list_del(&neigh->list);
+ list_del_init(&neigh->list);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
} else {
np = &neigh->hnext;
@@ -1336,7 +1341,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
struct ipoib_neigh *neigh;
@@ -1393,7 +1398,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
{
/* neigh reference count was dropprd to zero */
struct net_device *dev = neigh->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sk_buff *skb;
if (neigh->ah)
ipoib_put_ah(neigh->ah);
@@ -1403,7 +1408,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
}
if (ipoib_cm_get(neigh))
ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
- ipoib_dbg(netdev_priv(dev),
+ ipoib_dbg(ipoib_priv(dev),
"neigh free for %06x %pI6\n",
IPOIB_QPN(neigh->daddr),
neigh->daddr + 4);
@@ -1425,7 +1430,7 @@ static void ipoib_neigh_reclaim(struct rcu_head *rp)
void ipoib_neigh_free(struct ipoib_neigh *neigh)
{
struct net_device *dev = neigh->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
struct ipoib_neigh __rcu **np;
@@ -1450,7 +1455,7 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh)
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
/* remove from parent list */
- list_del(&neigh->list);
+ list_del_init(&neigh->list);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
return;
} else {
@@ -1508,7 +1513,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
unsigned long flags;
@@ -1535,7 +1540,7 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
/* remove from parent list */
- list_del(&neigh->list);
+ list_del_init(&neigh->list);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
} else {
np = &neigh->hnext;
@@ -1577,7 +1582,7 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
/* remove from path/mc list */
- list_del(&neigh->list);
+ list_del_init(&neigh->list);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
}
}
@@ -1594,7 +1599,7 @@ out_unlock:
static void ipoib_neigh_hash_uninit(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int stopped;
ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
@@ -1611,10 +1616,28 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
wait_for_completion(&priv->ntbl.deleted);
}
+static void ipoib_dev_uninit_default(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ ipoib_transport_dev_cleanup(dev);
-int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+ netif_napi_del(&priv->napi);
+
+ ipoib_cm_dev_cleanup(dev);
+
+ kfree(priv->rx_ring);
+ vfree(priv->tx_ring);
+
+ priv->rx_ring = NULL;
+ priv->tx_ring = NULL;
+}
+
+static int ipoib_dev_init_default(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
/* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
@@ -1625,46 +1648,112 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
if (!priv->tx_ring) {
printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
- ca->name, ipoib_sendq_size);
+ priv->ca->name, ipoib_sendq_size);
goto out_rx_ring_cleanup;
}
/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
- if (ipoib_ib_dev_init(dev, ca, port))
+ if (ipoib_transport_dev_init(dev, priv->ca)) {
+ pr_warn("%s: ipoib_transport_dev_init failed\n",
+ priv->ca->name);
goto out_tx_ring_cleanup;
+ }
+
+ /* after qp created set dev address */
+ priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
+ priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
+ priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff;
+
+ setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func,
+ (unsigned long)dev);
+
+ return 0;
+
+out_tx_ring_cleanup:
+ vfree(priv->tx_ring);
+
+out_rx_ring_cleanup:
+ kfree(priv->rx_ring);
+
+out:
+ netif_napi_del(&priv->napi);
+ return -ENOMEM;
+}
+
+int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret = -ENOMEM;
+
+ priv->ca = ca;
+ priv->port = port;
+ priv->qp = NULL;
/*
- * Must be after ipoib_ib_dev_init so we can allocate a per
- * device wq there and use it here
+ * the various IPoIB tasks assume they will never race against
+ * themselves, so always use a single thread workqueue
*/
- if (ipoib_neigh_hash_init(priv) < 0)
+ priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM);
+ if (!priv->wq) {
+ pr_warn("%s: failed to allocate device WQ\n", dev->name);
+ goto out;
+ }
+
+ /* create pd, which used both for control and datapath*/
+ priv->pd = ib_alloc_pd(priv->ca, 0);
+ if (IS_ERR(priv->pd)) {
+ pr_warn("%s: failed to allocate PD\n", ca->name);
+ goto clean_wq;
+ }
+
+ ret = priv->rn_ops->ndo_init(dev);
+ if (ret) {
+ pr_warn("%s failed to init HW resource\n", dev->name);
+ goto out_free_pd;
+ }
+
+ if (ipoib_neigh_hash_init(priv) < 0) {
+ pr_warn("%s failed to init neigh hash\n", dev->name);
goto out_dev_uninit;
+ }
+
+ if (dev->flags & IFF_UP) {
+ if (ipoib_ib_dev_open(dev)) {
+ pr_warn("%s failed to open device\n", dev->name);
+ ret = -ENODEV;
+ goto out_dev_uninit;
+ }
+ }
return 0;
out_dev_uninit:
ipoib_ib_dev_cleanup(dev);
-out_tx_ring_cleanup:
- vfree(priv->tx_ring);
+out_free_pd:
+ if (priv->pd) {
+ ib_dealloc_pd(priv->pd);
+ priv->pd = NULL;
+ }
-out_rx_ring_cleanup:
- kfree(priv->rx_ring);
+clean_wq:
+ if (priv->wq) {
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+ }
out:
- return -ENOMEM;
+ return ret;
}
void ipoib_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
+ struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv;
LIST_HEAD(head);
ASSERT_RTNL();
- ipoib_delete_debug_files(dev);
-
/* Delete any child interfaces first */
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
/* Stop GC on child */
@@ -1674,24 +1763,21 @@ void ipoib_dev_cleanup(struct net_device *dev)
}
unregister_netdevice_many(&head);
- /*
- * Must be before ipoib_ib_dev_cleanup or we delete an in use
- * work queue
- */
ipoib_neigh_hash_uninit(dev);
ipoib_ib_dev_cleanup(dev);
- kfree(priv->rx_ring);
- vfree(priv->tx_ring);
-
- priv->rx_ring = NULL;
- priv->tx_ring = NULL;
+ /* no more works over the priv->wq */
+ if (priv->wq) {
+ flush_workqueue(priv->wq);
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+ }
}
static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return ib_set_vf_link_state(priv->ca, vf, priv->port, link_state);
}
@@ -1699,7 +1785,7 @@ static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_stat
static int ipoib_get_vf_config(struct net_device *dev, int vf,
struct ifla_vf_info *ivf)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int err;
err = ib_get_vf_config(priv->ca, vf, priv->port, ivf);
@@ -1713,7 +1799,7 @@ static int ipoib_get_vf_config(struct net_device *dev, int vf,
static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (type != IFLA_VF_IB_NODE_GUID && type != IFLA_VF_IB_PORT_GUID)
return -EINVAL;
@@ -1724,7 +1810,7 @@ static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
static int ipoib_get_vf_stats(struct net_device *dev, int vf,
struct ifla_vf_stats *vf_stats)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return ib_get_vf_stats(priv->ca, vf, priv->port, vf_stats);
}
@@ -1748,6 +1834,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
.ndo_get_vf_stats = ipoib_get_vf_stats,
.ndo_set_vf_guid = ipoib_set_vf_guid,
.ndo_set_mac_address = ipoib_set_mac,
+ .ndo_get_stats64 = ipoib_get_stats,
};
static const struct net_device_ops ipoib_netdev_ops_vf = {
@@ -1762,21 +1849,12 @@ static const struct net_device_ops ipoib_netdev_ops_vf = {
.ndo_get_iflink = ipoib_get_iflink,
};
-void ipoib_setup(struct net_device *dev)
+void ipoib_setup_common(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
-
- if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
- dev->netdev_ops = &ipoib_netdev_ops_vf;
- else
- dev->netdev_ops = &ipoib_netdev_ops_pf;
-
dev->header_ops = &ipoib_header_ops;
ipoib_set_ethtool_ops(dev);
- netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
-
dev->watchdog_timeo = HZ;
dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
@@ -1790,11 +1868,14 @@ void ipoib_setup(struct net_device *dev)
netif_keep_dst(dev);
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
+}
- priv->dev = dev;
+static void ipoib_build_priv(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ priv->dev = dev;
spin_lock_init(&priv->lock);
-
init_rwsem(&priv->vlan_rwsem);
INIT_LIST_HEAD(&priv->path_list);
@@ -1812,22 +1893,100 @@ void ipoib_setup(struct net_device *dev)
INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
}
-struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
+static const struct net_device_ops ipoib_netdev_default_pf = {
+ .ndo_init = ipoib_dev_init_default,
+ .ndo_uninit = ipoib_dev_uninit_default,
+ .ndo_open = ipoib_ib_dev_open_default,
+ .ndo_stop = ipoib_ib_dev_stop_default,
+};
+
+static struct net_device
+*ipoib_create_netdev_default(struct ib_device *hca,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *))
{
struct net_device *dev;
+ struct rdma_netdev *rn;
- dev = alloc_netdev((int)sizeof(struct ipoib_dev_priv), name,
- NET_NAME_UNKNOWN, ipoib_setup);
+ dev = alloc_netdev((int)sizeof(struct rdma_netdev),
+ name,
+ name_assign_type, setup);
if (!dev)
return NULL;
- return netdev_priv(dev);
+ rn = netdev_priv(dev);
+
+ rn->send = ipoib_send;
+ rn->attach_mcast = ipoib_mcast_attach;
+ rn->detach_mcast = ipoib_mcast_detach;
+ rn->free_rdma_netdev = free_netdev;
+ rn->hca = hca;
+
+ dev->netdev_ops = &ipoib_netdev_default_pf;
+
+ return dev;
+}
+
+static struct net_device *ipoib_get_netdev(struct ib_device *hca, u8 port,
+ const char *name)
+{
+ struct net_device *dev;
+
+ if (hca->alloc_rdma_netdev) {
+ dev = hca->alloc_rdma_netdev(hca, port,
+ RDMA_NETDEV_IPOIB, name,
+ NET_NAME_UNKNOWN,
+ ipoib_setup_common);
+ if (IS_ERR_OR_NULL(dev) && PTR_ERR(dev) != -EOPNOTSUPP)
+ return NULL;
+ }
+
+ if (!hca->alloc_rdma_netdev || PTR_ERR(dev) == -EOPNOTSUPP)
+ dev = ipoib_create_netdev_default(hca, name, NET_NAME_UNKNOWN,
+ ipoib_setup_common);
+
+ return dev;
+}
+
+struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
+ const char *name)
+{
+ struct net_device *dev;
+ struct ipoib_dev_priv *priv;
+ struct rdma_netdev *rn;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return NULL;
+
+ dev = ipoib_get_netdev(hca, port, name);
+ if (!dev)
+ goto free_priv;
+
+ priv->rn_ops = dev->netdev_ops;
+
+ /* fixme : should be after the query_cap */
+ if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
+ dev->netdev_ops = &ipoib_netdev_ops_vf;
+ else
+ dev->netdev_ops = &ipoib_netdev_ops_pf;
+
+ rn = netdev_priv(dev);
+ rn->clnt_priv = priv;
+ ipoib_build_priv(dev);
+
+ return priv;
+free_priv:
+ kfree(priv);
+ return NULL;
}
static ssize_t show_pkey(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+ struct net_device *ndev = to_net_dev(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
return sprintf(buf, "0x%04x\n", priv->pkey);
}
@@ -1836,14 +1995,15 @@ static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
static ssize_t show_umcast(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+ struct net_device *ndev = to_net_dev(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
}
void ipoib_set_umcast(struct net_device *ndev, int umcast_val)
{
- struct ipoib_dev_priv *priv = netdev_priv(ndev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
if (umcast_val > 0) {
set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
@@ -1916,7 +2076,7 @@ static int ipoib_check_lladdr(struct net_device *dev,
static int ipoib_set_mac(struct net_device *dev, void *addr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sockaddr_storage *ss = addr;
int ret;
@@ -1984,20 +2144,18 @@ int ipoib_add_pkey_attr(struct net_device *dev)
return device_create_file(&dev->dev, &dev_attr_pkey);
}
-int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
+void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
{
priv->hca_caps = hca->attrs.device_cap_flags;
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
- priv->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
+ priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
priv->dev->hw_features |= NETIF_F_TSO;
priv->dev->features |= priv->dev->hw_features;
}
-
- return 0;
}
static struct net_device *ipoib_add_port(const char *format,
@@ -2007,11 +2165,11 @@ static struct net_device *ipoib_add_port(const char *format,
struct ib_port_attr attr;
int result = -ENOMEM;
- priv = ipoib_intf_alloc(format);
+ priv = ipoib_intf_alloc(hca, port, format);
if (!priv)
goto alloc_mem_failed;
- SET_NETDEV_DEV(priv->dev, hca->dma_device);
+ SET_NETDEV_DEV(priv->dev, hca->dev.parent);
priv->dev->dev_id = port - 1;
result = ib_query_port(hca, port, &attr);
@@ -2037,9 +2195,7 @@ static struct net_device *ipoib_add_port(const char *format,
goto device_init_failed;
}
- result = ipoib_set_dev_features(priv, hca);
- if (result)
- goto device_init_failed;
+ ipoib_set_dev_features(priv, hca);
/*
* Set the full membership bit, so that we join the right
@@ -2083,8 +2239,7 @@ static struct net_device *ipoib_add_port(const char *format,
goto register_failed;
}
- ipoib_create_debug_files(priv->dev);
-
+ result = -ENOMEM;
if (ipoib_cm_add_mode_attr(priv->dev))
goto sysfs_failed;
if (ipoib_add_pkey_attr(priv->dev))
@@ -2099,7 +2254,6 @@ static struct net_device *ipoib_add_port(const char *format,
return priv->dev;
sysfs_failed:
- ipoib_delete_debug_files(priv->dev);
unregister_netdev(priv->dev);
register_failed:
@@ -2115,6 +2269,7 @@ event_failed:
device_init_failed:
free_netdev(priv->dev);
+ kfree(priv);
alloc_mem_failed:
return ERR_PTR(result);
@@ -2139,7 +2294,7 @@ static void ipoib_add_one(struct ib_device *device)
continue;
dev = ipoib_add_port("ib%d", device, p);
if (!IS_ERR(dev)) {
- priv = netdev_priv(dev);
+ priv = ipoib_priv(dev);
list_add_tail(&priv->list, dev_list);
count++;
}
@@ -2155,13 +2310,15 @@ static void ipoib_add_one(struct ib_device *device)
static void ipoib_remove_one(struct ib_device *device, void *client_data)
{
- struct ipoib_dev_priv *priv, *tmp;
+ struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv;
struct list_head *dev_list = client_data;
if (!dev_list)
return;
list_for_each_entry_safe(priv, tmp, dev_list, list) {
+ struct rdma_netdev *rn = netdev_priv(priv->dev);
+
ib_unregister_event_handler(&priv->event_handler);
flush_workqueue(ipoib_workqueue);
@@ -2178,12 +2335,23 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
flush_workqueue(priv->wq);
unregister_netdev(priv->dev);
- free_netdev(priv->dev);
+ rn->free_rdma_netdev(priv->dev);
+
+ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list)
+ kfree(cpriv);
+
+ kfree(priv);
}
kfree(dev_list);
}
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static struct notifier_block ipoib_netdev_notifier = {
+ .notifier_call = ipoib_netdev_event,
+};
+#endif
+
static int __init ipoib_init_module(void)
{
int ret;
@@ -2236,6 +2404,9 @@ static int __init ipoib_init_module(void)
if (ret)
goto err_client;
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+ register_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
return 0;
err_client:
@@ -2253,6 +2424,9 @@ err_fs:
static void __exit ipoib_cleanup_module(void)
{
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+ unregister_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
ipoib_netlink_fini();
ib_unregister_client(&ipoib_client);
ib_sa_unregister_client(&ipoib_sa_client);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index fddff403d5d2..057f58e6afca 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -114,7 +114,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
struct net_device *dev = mcast->dev;
int tx_dropped = 0;
- ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",
+ ipoib_dbg_mcast(ipoib_priv(dev), "deleting multicast group %pI6\n",
mcast->mcmember.mgid.raw);
/* remove all neigh connected to this mcast */
@@ -158,7 +158,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node *n = priv->multicast_tree.rb_node;
while (n) {
@@ -182,7 +182,7 @@ static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid
static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
while (*n) {
@@ -212,8 +212,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
struct ib_sa_mcmember_rec *mcmember)
{
struct net_device *dev = mcast->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_ah *ah;
+ struct rdma_ah_attr av;
int ret;
int set_qkey = 0;
@@ -260,8 +262,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
return 0;
}
- ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
- &mcast->mcmember.mgid, set_qkey);
+ ret = rn->attach_mcast(dev, priv->ca, &mcast->mcmember.mgid,
+ be16_to_cpu(mcast->mcmember.mlid),
+ set_qkey, priv->qkey);
if (ret < 0) {
ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n",
mcast->mcmember.mgid.raw);
@@ -271,40 +274,34 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
}
}
- {
- struct ib_ah_attr av = {
- .dlid = be16_to_cpu(mcast->mcmember.mlid),
- .port_num = priv->port,
- .sl = mcast->mcmember.sl,
- .ah_flags = IB_AH_GRH,
- .static_rate = mcast->mcmember.rate,
- .grh = {
- .flow_label = be32_to_cpu(mcast->mcmember.flow_label),
- .hop_limit = mcast->mcmember.hop_limit,
- .sgid_index = 0,
- .traffic_class = mcast->mcmember.traffic_class
- }
- };
- av.grh.dgid = mcast->mcmember.mgid;
-
- ah = ipoib_create_ah(dev, priv->pd, &av);
- if (IS_ERR(ah)) {
- ipoib_warn(priv, "ib_address_create failed %ld\n",
- -PTR_ERR(ah));
- /* use original error */
- return PTR_ERR(ah);
- } else {
- spin_lock_irq(&priv->lock);
- mcast->ah = ah;
- spin_unlock_irq(&priv->lock);
-
- ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
- mcast->mcmember.mgid.raw,
- mcast->ah->ah,
- be16_to_cpu(mcast->mcmember.mlid),
- mcast->mcmember.sl);
- }
+ memset(&av, 0, sizeof(av));
+ av.type = rdma_ah_find_type(priv->ca, priv->port);
+ rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid)),
+ rdma_ah_set_port_num(&av, priv->port);
+ rdma_ah_set_sl(&av, mcast->mcmember.sl);
+ rdma_ah_set_static_rate(&av, mcast->mcmember.rate);
+
+ rdma_ah_set_grh(&av, &mcast->mcmember.mgid,
+ be32_to_cpu(mcast->mcmember.flow_label),
+ 0, mcast->mcmember.hop_limit,
+ mcast->mcmember.traffic_class);
+
+ ah = ipoib_create_ah(dev, priv->pd, &av);
+ if (IS_ERR(ah)) {
+ ipoib_warn(priv, "ib_address_create failed %ld\n",
+ -PTR_ERR(ah));
+ /* use original error */
+ return PTR_ERR(ah);
}
+ spin_lock_irq(&priv->lock);
+ mcast->ah = ah;
+ spin_unlock_irq(&priv->lock);
+
+ ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
+ mcast->mcmember.mgid.raw,
+ mcast->ah->ah,
+ be16_to_cpu(mcast->mcmember.mlid),
+ mcast->mcmember.sl);
/* actually send any queued packets */
netif_tx_lock_bh(dev);
@@ -314,9 +311,11 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
netif_tx_unlock_bh(dev);
skb->dev = dev;
- if (dev_queue_xmit(skb))
- ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
+ ret = dev_queue_xmit(skb);
+ if (ret)
+ ipoib_warn(priv, "%s:dev_queue_xmit failed to re-queue packet, ret:%d\n",
+ __func__, ret);
netif_tx_lock_bh(dev);
}
netif_tx_unlock_bh(dev);
@@ -329,7 +328,6 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
carrier_on_task);
struct ib_port_attr attr;
- int ret;
if (ib_query_port(priv->ca, priv->port, &attr) ||
attr.state != IB_PORT_ACTIVE) {
@@ -342,11 +340,9 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
* because the broadcast group must always be joined first and is always
* re-joined if the SM changes substantially.
*/
- ret = ipoib_check_sm_sendonly_fullmember_support(priv);
- if (ret < 0)
- pr_debug("%s failed query sm support for sendonly-fullmember (ret: %d)\n",
- priv->dev->name, ret);
-
+ priv->sm_fullmember_sendonly_support =
+ ib_sa_sendonly_fullmem_support(&ipoib_sa_client,
+ priv->ca, priv->port);
/*
* Take rtnl_lock to avoid racing with ipoib_stop() and
* turning the carrier back on while a device is being
@@ -373,7 +369,7 @@ static int ipoib_mcast_join_complete(int status,
{
struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
@@ -475,7 +471,7 @@ out_locked:
*/
static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_sa_multicast *multicast;
struct ib_sa_mcmember_rec rec = {
.join_state = 1
@@ -487,6 +483,9 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
return -EINVAL;
+ init_completion(&mcast->done);
+ set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+
ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);
rec.mgid = mcast->mcmember.mgid;
@@ -645,8 +644,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
if (mcast->backoff == 1 ||
time_after_eq(jiffies, mcast->delay_until)) {
/* Found the next unjoined group */
- init_completion(&mcast->done);
- set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
if (ipoib_mcast_join(dev, mcast)) {
spin_unlock_irq(&priv->lock);
return;
@@ -666,17 +663,15 @@ out:
queue_delayed_work(priv->wq, &priv->mcast_task,
delay_until - jiffies);
}
- if (mcast) {
- init_completion(&mcast->done);
- set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ if (mcast)
ipoib_mcast_join(dev, mcast);
- }
+
spin_unlock_irq(&priv->lock);
}
-int ipoib_mcast_start_thread(struct net_device *dev)
+void ipoib_mcast_start_thread(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned long flags;
ipoib_dbg_mcast(priv, "starting multicast thread\n");
@@ -684,13 +679,11 @@ int ipoib_mcast_start_thread(struct net_device *dev)
spin_lock_irqsave(&priv->lock, flags);
__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
spin_unlock_irqrestore(&priv->lock, flags);
-
- return 0;
}
int ipoib_mcast_stop_thread(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned long flags;
ipoib_dbg_mcast(priv, "stopping multicast thread\n");
@@ -706,7 +699,8 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
int ret = 0;
if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
@@ -720,8 +714,8 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
mcast->mcmember.mgid.raw);
/* Remove ourselves from the multicast group */
- ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
- be16_to_cpu(mcast->mcmember.mlid));
+ ret = rn->detach_mcast(dev, priv->ca, &mcast->mcmember.mgid,
+ be16_to_cpu(mcast->mcmember.mlid));
if (ret)
ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
} else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
@@ -762,7 +756,8 @@ void ipoib_mcast_remove_list(struct list_head *remove_list)
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_mcast *mcast;
unsigned long flags;
void *mgid = daddr + 4;
@@ -825,7 +820,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
}
}
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+ mcast->ah->last_send = rn->send(dev, skb, mcast->ah->ah,
+ IB_MULTICAST_QPN);
if (neigh)
ipoib_neigh_put(neigh);
return;
@@ -837,7 +833,7 @@ unlock:
void ipoib_mcast_dev_flush(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
LIST_HEAD(remove_list);
struct ipoib_mcast *mcast, *tmcast;
unsigned long flags;
@@ -1029,7 +1025,7 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
{
- struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(iter->dev);
struct rb_node *n;
struct ipoib_mcast *mcast;
int ret = 1;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index cdc7df4fdb8a..3e44087935ae 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -44,7 +44,7 @@ static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
u16 val;
if (nla_put_u16(skb, IFLA_IPOIB_PKEY, priv->pkey))
@@ -64,8 +64,9 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int ipoib_changelink(struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
+static int ipoib_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[],
+ struct netlink_ext_ack *extack)
{
u16 mode, umcast;
int ret = 0;
@@ -93,7 +94,8 @@ out_err:
}
static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
{
struct net_device *pdev;
struct ipoib_dev_priv *ppriv;
@@ -107,7 +109,7 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
if (!pdev || pdev->type != ARPHRD_INFINIBAND)
return -ENODEV;
- ppriv = netdev_priv(pdev);
+ ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) {
ipoib_warn(ppriv, "child creation disallowed for child devices\n");
@@ -129,10 +131,11 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
*/
child_pkey |= 0x8000;
- err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD);
+ err = __ipoib_vlan_add(ppriv, ipoib_priv(dev),
+ child_pkey, IPOIB_RTNL_CHILD);
if (!err && data)
- err = ipoib_changelink(dev, tb, data);
+ err = ipoib_changelink(dev, tb, data, extack);
return err;
}
@@ -140,8 +143,8 @@ static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head
{
struct ipoib_dev_priv *priv, *ppriv;
- priv = netdev_priv(dev);
- ppriv = netdev_priv(priv->parent);
+ priv = ipoib_priv(dev);
+ ppriv = ipoib_priv(priv->parent);
down_write(&ppriv->vlan_rwsem);
unregister_netdevice_queue(dev, head);
@@ -161,7 +164,7 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
.maxtype = IFLA_IPOIB_MAX,
.policy = ipoib_policy,
.priv_size = sizeof(struct ipoib_dev_priv),
- .setup = ipoib_setup,
+ .setup = ipoib_setup_common,
.newlink = ipoib_new_child_link,
.changelink = ipoib_changelink,
.dellink = ipoib_unregister_child_dev,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 189dcd1709d2..bb64baf25309 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -35,9 +35,10 @@
#include "ipoib.h"
-int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey)
+int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr *qp_attr = NULL;
int ret;
u16 pkey_index;
@@ -56,7 +57,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int
goto out;
/* set correct QKey for QP */
- qp_attr->qkey = priv->qkey;
+ qp_attr->qkey = qkey;
ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
if (ret) {
ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
@@ -74,9 +75,20 @@ out:
return ret;
}
+int ipoib_mcast_detach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret;
+
+ ret = ib_detach_mcast(priv->qp, mgid, mlid);
+
+ return ret;
+}
+
int ipoib_init_qp(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
struct ib_qp_attr qp_attr;
int attr_mask;
@@ -130,7 +142,7 @@ out_fail:
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_init_attr init_attr = {
.cap = {
.max_send_wr = ipoib_sendq_size,
@@ -147,22 +159,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
int ret, size;
int i;
- priv->pd = ib_alloc_pd(priv->ca, 0);
- if (IS_ERR(priv->pd)) {
- printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
- return -ENODEV;
- }
-
- /*
- * the various IPoIB tasks assume they will never race against
- * themselves, so always use a single thread workqueue
- */
- priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM);
- if (!priv->wq) {
- printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
- goto out_free_pd;
- }
-
size = ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev);
if (!ret) {
@@ -173,7 +169,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
size += ipoib_recvq_size * ipoib_max_conn_qp;
} else
if (ret != -ENOSYS)
- goto out_free_wq;
+ return -ENODEV;
cq_attr.cqe = size;
priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL,
@@ -212,10 +208,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
goto out_free_send_cq;
}
- priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
- priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
- priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff;
-
for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
priv->tx_sge[i].lkey = priv->pd->local_dma_lkey;
@@ -247,26 +239,18 @@ out_free_recv_cq:
out_cm_dev_cleanup:
ipoib_cm_dev_cleanup(dev);
-out_free_wq:
- destroy_workqueue(priv->wq);
- priv->wq = NULL;
-
-out_free_pd:
- ib_dealloc_pd(priv->pd);
-
return -ENODEV;
}
void ipoib_transport_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (priv->qp) {
if (ib_destroy_qp(priv->qp))
ipoib_warn(priv, "ib_qp_destroy failed\n");
priv->qp = NULL;
- clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
}
if (ib_destroy_cq(priv->send_cq))
@@ -274,16 +258,6 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
if (ib_destroy_cq(priv->recv_cq))
ipoib_warn(priv, "ib_cq_destroy (recv) failed\n");
-
- ipoib_cm_dev_cleanup(dev);
-
- if (priv->wq) {
- flush_workqueue(priv->wq);
- destroy_workqueue(priv->wq);
- priv->wq = NULL;
- }
-
- ib_dealloc_pd(priv->pd);
}
void ipoib_event(struct ib_event_handler *handler,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index fd811115af49..081b33deff1b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -31,6 +31,7 @@
*/
#include <linux/module.h>
+#include <linux/sched/signal.h>
#include <linux/init.h>
#include <linux/seq_file.h>
@@ -43,7 +44,7 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
char *buf)
{
struct net_device *dev = to_net_dev(d);
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return sprintf(buf, "%s\n", priv->parent->name);
}
@@ -61,9 +62,7 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
priv->parent = ppriv->dev;
set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
- result = ipoib_set_dev_features(priv, ppriv->ca);
- if (result)
- goto err;
+ ipoib_set_dev_features(priv, ppriv->ca);
priv->pkey = pkey;
@@ -87,8 +86,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
goto register_failed;
}
- ipoib_create_debug_files(priv->dev);
-
/* RTNL childs don't need proprietary sysfs entries */
if (type == IPOIB_LEGACY_CHILD) {
if (ipoib_cm_add_mode_attr(priv->dev))
@@ -109,7 +106,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
sysfs_failed:
result = -ENOMEM;
- ipoib_delete_debug_files(priv->dev);
unregister_netdevice(priv->dev);
register_failed:
@@ -129,20 +125,21 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- ppriv = netdev_priv(pdev);
+ ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
return -EPERM;
snprintf(intf_name, sizeof intf_name, "%s.%04x",
ppriv->dev->name, pkey);
- priv = ipoib_intf_alloc(intf_name);
- if (!priv)
- return -ENOMEM;
if (!rtnl_trylock())
return restart_syscall();
+ priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
+ if (!priv)
+ return -ENOMEM;
+
down_write(&ppriv->vlan_rwsem);
/*
@@ -168,11 +165,13 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
out:
up_write(&ppriv->vlan_rwsem);
- if (result)
- free_netdev(priv->dev);
-
rtnl_unlock();
+ if (result) {
+ free_netdev(priv->dev);
+ kfree(priv);
+ }
+
return result;
}
@@ -184,7 +183,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- ppriv = netdev_priv(pdev);
+ ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
return -EPERM;
@@ -196,7 +195,6 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
if (priv->pkey == pkey &&
priv->child_type == IPOIB_LEGACY_CHILD) {
- unregister_netdevice(priv->dev);
list_del(&priv->list);
dev = priv->dev;
break;
@@ -204,10 +202,16 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
}
up_write(&ppriv->vlan_rwsem);
+ if (dev) {
+ ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name);
+ unregister_netdevice(dev);
+ }
+
rtnl_unlock();
if (dev) {
free_netdev(dev);
+ kfree(priv);
return 0;
}
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index e71af717e71b..37b33d708c2d 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -83,6 +83,7 @@ static struct scsi_host_template iscsi_iser_sht;
static struct iscsi_transport iscsi_iser_transport;
static struct scsi_transport_template *iscsi_iser_scsi_transport;
static struct workqueue_struct *release_wq;
+static DEFINE_MUTEX(unbind_iser_conn_mutex);
struct iser_global ig;
int iser_debug_level = 0;
@@ -550,12 +551,14 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
*/
if (iser_conn) {
mutex_lock(&iser_conn->state_mutex);
+ mutex_lock(&unbind_iser_conn_mutex);
iser_conn_terminate(iser_conn);
iscsi_conn_stop(cls_conn, flag);
/* unbind */
iser_conn->iscsi_conn = NULL;
conn->dd_data = NULL;
+ mutex_unlock(&unbind_iser_conn_mutex);
complete(&iser_conn->stop_completion);
mutex_unlock(&iser_conn->state_mutex);
@@ -652,7 +655,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
}
if (iscsi_host_add(shost,
- ib_conn->device->ib_device->dma_device)) {
+ ib_conn->device->ib_device->dev.parent)) {
mutex_unlock(&iser_conn->state_mutex);
goto free_host;
}
@@ -977,13 +980,21 @@ static int iscsi_iser_slave_alloc(struct scsi_device *sdev)
struct iser_conn *iser_conn;
struct ib_device *ib_dev;
+ mutex_lock(&unbind_iser_conn_mutex);
+
session = starget_to_session(scsi_target(sdev))->dd_data;
iser_conn = session->leadconn->dd_data;
+ if (!iser_conn) {
+ mutex_unlock(&unbind_iser_conn_mutex);
+ return -ENOTCONN;
+ }
ib_dev = iser_conn->ib_conn.device->ib_device;
if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K);
+ mutex_unlock(&unbind_iser_conn_mutex);
+
return 0;
}
@@ -994,6 +1005,7 @@ static struct scsi_host_template iscsi_iser_sht = {
.change_queue_depth = scsi_change_queue_depth,
.sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE,
.cmd_per_lun = ISER_DEF_CMD_PER_LUN,
+ .eh_timed_out = iscsi_eh_cmd_timed_out,
.eh_abort_handler = iscsi_eh_abort,
.eh_device_reset_handler= iscsi_eh_device_reset,
.eh_target_reset_handler = iscsi_eh_recover_target,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 9d0b22ad58c1..c1ae4aeae2f9 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -430,6 +430,7 @@ struct iser_fr_desc {
struct list_head list;
struct iser_reg_resources rsc;
struct iser_pi_context *pi_ctx;
+ struct list_head all_list;
};
/**
@@ -443,6 +444,7 @@ struct iser_fr_pool {
struct list_head list;
spinlock_t lock;
int size;
+ struct list_head all_list;
};
/**
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 81ae2e30dd12..2a07692007bd 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -137,8 +137,10 @@ iser_prepare_write_cmd(struct iscsi_task *task,
if (unsol_sz < edtl) {
hdr->flags |= ISER_WSV;
- hdr->write_stag = cpu_to_be32(mem_reg->rkey);
- hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
+ if (buf_out->data_len > imm_sz) {
+ hdr->write_stag = cpu_to_be32(mem_reg->rkey);
+ hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
+ }
iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
"VA:%#llX + unsol:%d\n",
@@ -612,7 +614,7 @@ iser_check_remote_inv(struct iser_conn *iser_conn,
iser_conn, rkey);
if (unlikely(!iser_conn->snd_w_inv)) {
- iser_err("conn %p: unexepected remote invalidation, "
+ iser_err("conn %p: unexpected remote invalidation, "
"terminating connection\n", iser_conn);
return -EPROTO;
}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 6a9d1cb548ee..26a004e97ae0 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -362,6 +362,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
int i, ret;
INIT_LIST_HEAD(&fr_pool->list);
+ INIT_LIST_HEAD(&fr_pool->all_list);
spin_lock_init(&fr_pool->lock);
fr_pool->size = 0;
for (i = 0; i < cmds_max; i++) {
@@ -373,6 +374,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
}
list_add_tail(&desc->list, &fr_pool->list);
+ list_add_tail(&desc->all_list, &fr_pool->all_list);
fr_pool->size++;
}
@@ -392,13 +394,13 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
struct iser_fr_desc *desc, *tmp;
int i = 0;
- if (list_empty(&fr_pool->list))
+ if (list_empty(&fr_pool->all_list))
return;
iser_info("freeing conn %p fr pool\n", ib_conn);
- list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) {
- list_del(&desc->list);
+ list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) {
+ list_del(&desc->all_list);
iser_free_reg_res(&desc->rsc);
if (desc->pi_ctx)
iser_free_pi_ctx(desc->pi_ctx);
@@ -597,7 +599,9 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn,
iser_conn, ib_conn->cma_id, ib_conn->qp);
if (ib_conn->qp != NULL) {
+ mutex_lock(&ig.connlist_mutex);
ib_conn->comp->active_qps--;
+ mutex_unlock(&ig.connlist_mutex);
rdma_destroy_qp(ib_conn->cma_id);
ib_conn->qp = NULL;
}
@@ -704,8 +708,14 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
unsigned short sg_tablesize, sup_sg_tablesize;
sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
- sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE,
- device->ib_device->attrs.max_fast_reg_page_list_len);
+ if (device->ib_device->attrs.device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS)
+ sup_sg_tablesize =
+ min_t(
+ uint, ISCSI_ISER_MAX_SG_TABLESIZE,
+ device->ib_device->attrs.max_fast_reg_page_list_len);
+ else
+ sup_sg_tablesize = ISCSI_ISER_MAX_SG_TABLESIZE;
iser_conn->scsi_sg_tablesize = min(sg_tablesize, sup_sg_tablesize);
}
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 314e95516068..0e662656ef42 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -728,7 +728,7 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id,
iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
break;
default:
- isert_warn("conn %p teminating in state %d\n",
+ isert_warn("conn %p terminating in state %d\n",
isert_conn, isert_conn->state);
}
mutex_unlock(&isert_conn->mutex);
@@ -817,6 +817,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count)
rx_wr->sg_list = &rx_desc->rx_sg;
rx_wr->num_sge = 1;
rx_wr->next = rx_wr + 1;
+ rx_desc->in_use = false;
}
rx_wr--;
rx_wr->next = NULL; /* mark end of work requests list */
@@ -835,6 +836,15 @@ isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc)
struct ib_recv_wr *rx_wr_failed, rx_wr;
int ret;
+ if (!rx_desc->in_use) {
+ /*
+ * if the descriptor is not in-use we already reposted it
+ * for recv, so just silently return
+ */
+ return 0;
+ }
+
+ rx_desc->in_use = false;
rx_wr.wr_cqe = &rx_desc->rx_cqe;
rx_wr.sg_list = &rx_desc->rx_sg;
rx_wr.num_sge = 1;
@@ -1397,6 +1407,8 @@ isert_recv_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
+ rx_desc->in_use = true;
+
ib_dma_sync_single_for_cpu(ib_dev, rx_desc->dma_addr,
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
@@ -1440,7 +1452,7 @@ static void
isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct isert_conn *isert_conn = wc->qp->qp_context;
- struct ib_device *ib_dev = isert_conn->cm_id->device;
+ struct ib_device *ib_dev = isert_conn->device->ib_device;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
isert_print_wc(wc, "login recv");
@@ -1659,10 +1671,23 @@ isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
ret = isert_check_pi_status(cmd, isert_cmd->rw.sig->sig_mr);
isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
- if (ret)
- transport_send_check_condition_and_sense(cmd, cmd->pi_err, 0);
- else
- isert_put_response(isert_conn->conn, isert_cmd->iscsi_cmd);
+ if (ret) {
+ /*
+ * transport_generic_request_failure() expects to have
+ * plus two references to handle queue-full, so re-add
+ * one here as target-core will have already dropped
+ * it after the first isert_put_datain() callback.
+ */
+ kref_get(&cmd->cmd_kref);
+ transport_generic_request_failure(cmd, cmd->pi_err);
+ } else {
+ /*
+ * XXX: isert_put_response() failure is not retried.
+ */
+ ret = isert_put_response(isert_conn->conn, isert_cmd->iscsi_cmd);
+ if (ret)
+ pr_warn_ratelimited("isert_put_response() ret: %d\n", ret);
+ }
}
static void
@@ -1699,13 +1724,15 @@ isert_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
spin_unlock_bh(&cmd->istate_lock);
- if (ret) {
- target_put_sess_cmd(se_cmd);
- transport_send_check_condition_and_sense(se_cmd,
- se_cmd->pi_err, 0);
- } else {
+ /*
+ * transport_generic_request_failure() will drop the extra
+ * se_cmd->cmd_kref reference after T10-PI error, and handle
+ * any non-zero ->queue_status() callback error retries.
+ */
+ if (ret)
+ transport_generic_request_failure(se_cmd, se_cmd->pi_err);
+ else
target_execute_cmd(se_cmd);
- }
}
static void
@@ -2171,26 +2198,28 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
chain_wr = &isert_cmd->tx_desc.send_wr;
}
- isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr);
- isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n", isert_cmd);
- return 1;
+ rc = isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr);
+ isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ rc: %d\n",
+ isert_cmd, rc);
+ return rc;
}
static int
isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
+ int ret;
isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
isert_cmd, cmd->se_cmd.data_length, cmd->write_data_done);
isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done;
- isert_rdma_rw_ctx_post(isert_cmd, conn->context,
- &isert_cmd->tx_desc.tx_cqe, NULL);
+ ret = isert_rdma_rw_ctx_post(isert_cmd, conn->context,
+ &isert_cmd->tx_desc.tx_cqe, NULL);
- isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
- isert_cmd);
- return 0;
+ isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE rc: %d\n",
+ isert_cmd, ret);
+ return ret;
}
static int
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index c02ada57d7f5..87d994de8c91 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -60,7 +60,7 @@
#define ISER_RX_PAD_SIZE (ISCSI_DEF_MAX_RECV_SEG_LEN + 4096 - \
(ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge) + \
- sizeof(struct ib_cqe)))
+ sizeof(struct ib_cqe) + sizeof(bool)))
#define ISCSI_ISER_SG_TABLESIZE 256
@@ -85,6 +85,7 @@ struct iser_rx_desc {
u64 dma_addr;
struct ib_sge rx_sg;
struct ib_cqe rx_cqe;
+ bool in_use;
char pad[ISER_RX_PAD_SIZE];
} __packed;
diff --git a/drivers/infiniband/ulp/opa_vnic/Kconfig b/drivers/infiniband/ulp/opa_vnic/Kconfig
new file mode 100644
index 000000000000..48132ab5e6b9
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Kconfig
@@ -0,0 +1,8 @@
+config INFINIBAND_OPA_VNIC
+ tristate "Intel OPA VNIC support"
+ depends on X86_64 && INFINIBAND
+ ---help---
+ This is Omni-Path (OPA) Virtual Network Interface Controller (VNIC)
+ driver for Ethernet over Omni-Path feature. It implements the HW
+ independent VNIC functionality. It interfaces with Linux stack for
+ data path and IB MAD for the control path.
diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile b/drivers/infiniband/ulp/opa_vnic/Makefile
new file mode 100644
index 000000000000..8061b287cfe4
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Makefile
@@ -0,0 +1,7 @@
+# Makefile - Intel Omni-Path Virtual Network Controller driver
+# Copyright(c) 2017, Intel Corporation.
+#
+obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o
+
+opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o \
+ opa_vnic_vema.o opa_vnic_vema_iface.o
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
new file mode 100644
index 000000000000..afa938bd26d6
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC encapsulation/decapsulation function.
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+
+#include "opa_vnic_internal.h"
+
+/* OPA 16B Header fields */
+#define OPA_16B_LID_MASK 0xFFFFFull
+#define OPA_16B_SLID_HIGH_SHFT 8
+#define OPA_16B_SLID_MASK 0xF00ull
+#define OPA_16B_DLID_MASK 0xF000ull
+#define OPA_16B_DLID_HIGH_SHFT 12
+#define OPA_16B_LEN_SHFT 20
+#define OPA_16B_SC_SHFT 20
+#define OPA_16B_RC_SHFT 25
+#define OPA_16B_PKEY_SHFT 16
+
+#define OPA_VNIC_L4_HDR_SHFT 16
+
+/* L2+L4 hdr len is 20 bytes (5 quad words) */
+#define OPA_VNIC_HDR_QW_LEN 5
+
+static inline void opa_vnic_make_header(u8 *hdr, u32 slid, u32 dlid, u16 len,
+ u16 pkey, u16 entropy, u8 sc, u8 rc,
+ u8 l4_type, u16 l4_hdr)
+{
+ /* h[1]: LT=1, 16B L2=10 */
+ u32 h[OPA_VNIC_HDR_QW_LEN] = {0, 0xc0000000, 0, 0, 0};
+
+ h[2] = l4_type;
+ h[3] = entropy;
+ h[4] = l4_hdr << OPA_VNIC_L4_HDR_SHFT;
+
+ /* Extract and set 4 upper bits and 20 lower bits of the lids */
+ h[0] |= (slid & OPA_16B_LID_MASK);
+ h[2] |= ((slid >> (20 - OPA_16B_SLID_HIGH_SHFT)) & OPA_16B_SLID_MASK);
+
+ h[1] |= (dlid & OPA_16B_LID_MASK);
+ h[2] |= ((dlid >> (20 - OPA_16B_DLID_HIGH_SHFT)) & OPA_16B_DLID_MASK);
+
+ h[0] |= (len << OPA_16B_LEN_SHFT);
+ h[1] |= (rc << OPA_16B_RC_SHFT);
+ h[1] |= (sc << OPA_16B_SC_SHFT);
+ h[2] |= ((u32)pkey << OPA_16B_PKEY_SHFT);
+
+ memcpy(hdr, h, OPA_VNIC_HDR_LEN);
+}
+
+/*
+ * Using a simple hash table for mac table implementation with the last octet
+ * of mac address as a key.
+ */
+static void opa_vnic_free_mac_tbl(struct hlist_head *mactbl)
+{
+ struct opa_vnic_mac_tbl_node *node;
+ struct hlist_node *tmp;
+ int bkt;
+
+ if (!mactbl)
+ return;
+
+ vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) {
+ hash_del(&node->hlist);
+ kfree(node);
+ }
+ kfree(mactbl);
+}
+
+static struct hlist_head *opa_vnic_alloc_mac_tbl(void)
+{
+ u32 size = sizeof(struct hlist_head) * OPA_VNIC_MAC_TBL_SIZE;
+ struct hlist_head *mactbl;
+
+ mactbl = kzalloc(size, GFP_KERNEL);
+ if (!mactbl)
+ return ERR_PTR(-ENOMEM);
+
+ vnic_hash_init(mactbl);
+ return mactbl;
+}
+
+/* opa_vnic_release_mac_tbl - empty and free the mac table */
+void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter)
+{
+ struct hlist_head *mactbl;
+
+ mutex_lock(&adapter->mactbl_lock);
+ mactbl = rcu_access_pointer(adapter->mactbl);
+ rcu_assign_pointer(adapter->mactbl, NULL);
+ synchronize_rcu();
+ opa_vnic_free_mac_tbl(mactbl);
+ mutex_unlock(&adapter->mactbl_lock);
+}
+
+/*
+ * opa_vnic_query_mac_tbl - query the mac table for a section
+ *
+ * This function implements query of specific function of the mac table.
+ * The function also expects the requested range to be valid.
+ */
+void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl)
+{
+ struct opa_vnic_mac_tbl_node *node;
+ struct hlist_head *mactbl;
+ int bkt;
+ u16 loffset, lnum_entries;
+
+ rcu_read_lock();
+ mactbl = rcu_dereference(adapter->mactbl);
+ if (!mactbl)
+ goto get_mac_done;
+
+ loffset = be16_to_cpu(tbl->offset);
+ lnum_entries = be16_to_cpu(tbl->num_entries);
+
+ vnic_hash_for_each(mactbl, bkt, node, hlist) {
+ struct __opa_vnic_mactable_entry *nentry = &node->entry;
+ struct opa_veswport_mactable_entry *entry;
+
+ if ((node->index < loffset) ||
+ (node->index >= (loffset + lnum_entries)))
+ continue;
+
+ /* populate entry in the tbl corresponding to the index */
+ entry = &tbl->tbl_entries[node->index - loffset];
+ memcpy(entry->mac_addr, nentry->mac_addr,
+ ARRAY_SIZE(entry->mac_addr));
+ memcpy(entry->mac_addr_mask, nentry->mac_addr_mask,
+ ARRAY_SIZE(entry->mac_addr_mask));
+ entry->dlid_sd = cpu_to_be32(nentry->dlid_sd);
+ }
+ tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest);
+get_mac_done:
+ rcu_read_unlock();
+}
+
+/*
+ * opa_vnic_update_mac_tbl - update mac table section
+ *
+ * This function updates the specified section of the mac table.
+ * The procedure includes following steps.
+ * - Allocate a new mac (hash) table.
+ * - Add the specified entries to the new table.
+ * (except the ones that are requested to be deleted).
+ * - Add all the other entries from the old mac table.
+ * - If there is a failure, free the new table and return.
+ * - Switch to the new table.
+ * - Free the old table and return.
+ *
+ * The function also expects the requested range to be valid.
+ */
+int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl)
+{
+ struct opa_vnic_mac_tbl_node *node, *new_node;
+ struct hlist_head *new_mactbl, *old_mactbl;
+ int i, bkt, rc = 0;
+ u8 key;
+ u16 loffset, lnum_entries;
+
+ mutex_lock(&adapter->mactbl_lock);
+ /* allocate new mac table */
+ new_mactbl = opa_vnic_alloc_mac_tbl();
+ if (IS_ERR(new_mactbl)) {
+ mutex_unlock(&adapter->mactbl_lock);
+ return PTR_ERR(new_mactbl);
+ }
+
+ loffset = be16_to_cpu(tbl->offset);
+ lnum_entries = be16_to_cpu(tbl->num_entries);
+
+ /* add updated entries to the new mac table */
+ for (i = 0; i < lnum_entries; i++) {
+ struct __opa_vnic_mactable_entry *nentry;
+ struct opa_veswport_mactable_entry *entry =
+ &tbl->tbl_entries[i];
+ u8 *mac_addr = entry->mac_addr;
+ u8 empty_mac[ETH_ALEN] = { 0 };
+
+ v_dbg("new mac entry %4d: %02x:%02x:%02x:%02x:%02x:%02x %x\n",
+ loffset + i, mac_addr[0], mac_addr[1], mac_addr[2],
+ mac_addr[3], mac_addr[4], mac_addr[5],
+ entry->dlid_sd);
+
+ /* if the entry is being removed, do not add it */
+ if (!memcmp(mac_addr, empty_mac, ARRAY_SIZE(empty_mac)))
+ continue;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node) {
+ rc = -ENOMEM;
+ goto updt_done;
+ }
+
+ node->index = loffset + i;
+ nentry = &node->entry;
+ memcpy(nentry->mac_addr, entry->mac_addr,
+ ARRAY_SIZE(nentry->mac_addr));
+ memcpy(nentry->mac_addr_mask, entry->mac_addr_mask,
+ ARRAY_SIZE(nentry->mac_addr_mask));
+ nentry->dlid_sd = be32_to_cpu(entry->dlid_sd);
+ key = node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX];
+ vnic_hash_add(new_mactbl, &node->hlist, key);
+ }
+
+ /* add other entries from current mac table to new mac table */
+ old_mactbl = rcu_access_pointer(adapter->mactbl);
+ if (!old_mactbl)
+ goto switch_tbl;
+
+ vnic_hash_for_each(old_mactbl, bkt, node, hlist) {
+ if ((node->index >= loffset) &&
+ (node->index < (loffset + lnum_entries)))
+ continue;
+
+ new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
+ if (!new_node) {
+ rc = -ENOMEM;
+ goto updt_done;
+ }
+
+ new_node->index = node->index;
+ memcpy(&new_node->entry, &node->entry, sizeof(node->entry));
+ key = new_node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX];
+ vnic_hash_add(new_mactbl, &new_node->hlist, key);
+ }
+
+switch_tbl:
+ /* switch to new table */
+ rcu_assign_pointer(adapter->mactbl, new_mactbl);
+ synchronize_rcu();
+
+ adapter->info.vport.mac_tbl_digest = be32_to_cpu(tbl->mac_tbl_digest);
+updt_done:
+ /* upon failure, free the new table; otherwise, free the old table */
+ if (rc)
+ opa_vnic_free_mac_tbl(new_mactbl);
+ else
+ opa_vnic_free_mac_tbl(old_mactbl);
+
+ mutex_unlock(&adapter->mactbl_lock);
+ return rc;
+}
+
+/* opa_vnic_chk_mac_tbl - check mac table for dlid */
+static uint32_t opa_vnic_chk_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct ethhdr *mac_hdr)
+{
+ struct opa_vnic_mac_tbl_node *node;
+ struct hlist_head *mactbl;
+ u32 dlid = 0;
+ u8 key;
+
+ rcu_read_lock();
+ mactbl = rcu_dereference(adapter->mactbl);
+ if (unlikely(!mactbl))
+ goto chk_done;
+
+ key = mac_hdr->h_dest[OPA_VNIC_MAC_HASH_IDX];
+ vnic_hash_for_each_possible(mactbl, node, hlist, key) {
+ struct __opa_vnic_mactable_entry *entry = &node->entry;
+
+ /* if related to source mac, skip */
+ if (unlikely(OPA_VNIC_DLID_SD_IS_SRC_MAC(entry->dlid_sd)))
+ continue;
+
+ if (!memcmp(node->entry.mac_addr, mac_hdr->h_dest,
+ ARRAY_SIZE(node->entry.mac_addr))) {
+ /* mac address found */
+ dlid = OPA_VNIC_DLID_SD_GET_DLID(node->entry.dlid_sd);
+ break;
+ }
+ }
+
+chk_done:
+ rcu_read_unlock();
+ return dlid;
+}
+
+/* opa_vnic_get_dlid - find and return the DLID */
+static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter,
+ struct sk_buff *skb, u8 def_port)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ u32 dlid;
+
+ dlid = opa_vnic_chk_mac_tbl(adapter, mac_hdr);
+ if (dlid)
+ return dlid;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest)) {
+ dlid = info->vesw.u_mcast_dlid;
+ } else {
+ if (is_local_ether_addr(mac_hdr->h_dest)) {
+ dlid = ((uint32_t)mac_hdr->h_dest[5] << 16) |
+ ((uint32_t)mac_hdr->h_dest[4] << 8) |
+ mac_hdr->h_dest[3];
+ if (unlikely(!dlid))
+ v_warn("Null dlid in MAC address\n");
+ } else if (def_port != OPA_VNIC_INVALID_PORT) {
+ dlid = info->vesw.u_ucast_dlid[def_port];
+ }
+ }
+
+ return dlid;
+}
+
+/* opa_vnic_get_sc - return the service class */
+static u8 opa_vnic_get_sc(struct __opa_veswport_info *info,
+ struct sk_buff *skb)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ u16 vlan_tci;
+ u8 sc;
+
+ if (!__vlan_get_tag(skb, &vlan_tci)) {
+ u8 pcp = OPA_VNIC_VLAN_PCP(vlan_tci);
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ sc = info->vport.pcp_to_sc_mc[pcp];
+ else
+ sc = info->vport.pcp_to_sc_uc[pcp];
+ } else {
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ sc = info->vport.non_vlan_sc_mc;
+ else
+ sc = info->vport.non_vlan_sc_uc;
+ }
+
+ return sc;
+}
+
+u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ struct __opa_veswport_info *info = &adapter->info;
+ u8 vl;
+
+ if (skb_vlan_tag_present(skb)) {
+ u8 pcp = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ vl = info->vport.pcp_to_vl_mc[pcp];
+ else
+ vl = info->vport.pcp_to_vl_uc[pcp];
+ } else {
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ vl = info->vport.non_vlan_vl_mc;
+ else
+ vl = info->vport.non_vlan_vl_uc;
+ }
+
+ return vl;
+}
+
+/* opa_vnic_calc_entropy - calculate the packet entropy */
+u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+ u16 hash16;
+
+ /*
+ * Get flow based 16-bit hash and then XOR the upper and lower bytes
+ * to get the entropy.
+ * __skb_tx_hash limits qcount to 16 bits. Hence, get 15-bit hash.
+ */
+ hash16 = __skb_tx_hash(adapter->netdev, skb, BIT(15));
+ return (u8)((hash16 >> 8) ^ (hash16 & 0xff));
+}
+
+/* opa_vnic_get_def_port - get default port based on entropy */
+static inline u8 opa_vnic_get_def_port(struct opa_vnic_adapter *adapter,
+ u8 entropy)
+{
+ u8 flow_id;
+
+ /* Add the upper and lower 4-bits of entropy to get the flow id */
+ flow_id = ((entropy & 0xf) + (entropy >> 4));
+ return adapter->flow_tbl[flow_id & (OPA_VNIC_FLOW_TBL_SIZE - 1)];
+}
+
+/* Calculate packet length including OPA header, crc and padding */
+static inline int opa_vnic_wire_length(struct sk_buff *skb)
+{
+ u32 pad_len;
+
+ /* padding for 8 bytes size alignment */
+ pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
+ pad_len += OPA_VNIC_ICRC_TAIL_LEN;
+
+ return (skb->len + pad_len) >> 3;
+}
+
+/* opa_vnic_encap_skb - encapsulate skb packet with OPA header and meta data */
+void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct opa_vnic_skb_mdata *mdata;
+ u8 def_port, sc, entropy, *hdr;
+ u16 len, l4_hdr;
+ u32 dlid;
+
+ hdr = skb_push(skb, OPA_VNIC_HDR_LEN);
+
+ entropy = opa_vnic_calc_entropy(adapter, skb);
+ def_port = opa_vnic_get_def_port(adapter, entropy);
+ len = opa_vnic_wire_length(skb);
+ dlid = opa_vnic_get_dlid(adapter, skb, def_port);
+ sc = opa_vnic_get_sc(info, skb);
+ l4_hdr = info->vesw.vesw_id;
+
+ mdata = skb_push(skb, sizeof(*mdata));
+ mdata->vl = opa_vnic_get_vl(adapter, skb);
+ mdata->entropy = entropy;
+ mdata->flags = 0;
+ if (unlikely(!dlid)) {
+ mdata->flags = OPA_VNIC_SKB_MDATA_ENCAP_ERR;
+ return;
+ }
+
+ opa_vnic_make_header(hdr, info->vport.encap_slid, dlid, len,
+ info->vesw.pkey, entropy, sc, 0,
+ OPA_VNIC_L4_ETHR, l4_hdr);
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
new file mode 100644
index 000000000000..4c434b9dd84c
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
@@ -0,0 +1,489 @@
+#ifndef _OPA_VNIC_ENCAP_H
+#define _OPA_VNIC_ENCAP_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains all OPA VNIC declaration required for encapsulation
+ * and decapsulation of Ethernet packets
+ */
+
+#include <linux/types.h>
+#include <rdma/ib_mad.h>
+
+/* EMA class version */
+#define OPA_EMA_CLASS_VERSION 0x80
+
+/*
+ * Define the Intel vendor management class for OPA
+ * ETHERNET MANAGEMENT
+ */
+#define OPA_MGMT_CLASS_INTEL_EMA 0x34
+
+/* EM attribute IDs */
+#define OPA_EM_ATTR_CLASS_PORT_INFO 0x0001
+#define OPA_EM_ATTR_VESWPORT_INFO 0x0011
+#define OPA_EM_ATTR_VESWPORT_MAC_ENTRIES 0x0012
+#define OPA_EM_ATTR_IFACE_UCAST_MACS 0x0013
+#define OPA_EM_ATTR_IFACE_MCAST_MACS 0x0014
+#define OPA_EM_ATTR_DELETE_VESW 0x0015
+#define OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS 0x0020
+#define OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS 0x0022
+
+/* VNIC configured and operational state values */
+#define OPA_VNIC_STATE_DROP_ALL 0x1
+#define OPA_VNIC_STATE_FORWARDING 0x3
+
+#define OPA_VESW_MAX_NUM_DEF_PORT 16
+#define OPA_VNIC_MAX_NUM_PCP 8
+
+#define OPA_VNIC_EMA_DATA (OPA_MGMT_MAD_SIZE - IB_MGMT_VENDOR_HDR)
+
+/* Defines for vendor specific notice(trap) attributes */
+#define OPA_INTEL_EMA_NOTICE_TYPE_INFO 0x04
+
+/* INTEL OUI */
+#define INTEL_OUI_1 0x00
+#define INTEL_OUI_2 0x06
+#define INTEL_OUI_3 0x6a
+
+/* Trap opcodes sent from VNIC */
+#define OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE 0x1
+#define OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE 0x2
+#define OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE 0x3
+
+#define OPA_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd) (!!((dlid_sd) & 0x20))
+#define OPA_VNIC_DLID_SD_GET_DLID(dlid_sd) ((dlid_sd) >> 8)
+
+/* VNIC Ethernet link status */
+#define OPA_VNIC_ETH_LINK_UP 1
+#define OPA_VNIC_ETH_LINK_DOWN 2
+
+/**
+ * struct opa_vesw_info - OPA vnic switch information
+ * @fabric_id: 10-bit fabric id
+ * @vesw_id: 12-bit virtual ethernet switch id
+ * @def_port_mask: bitmask of default ports
+ * @pkey: partition key
+ * @u_mcast_dlid: unknown multicast dlid
+ * @u_ucast_dlid: array of unknown unicast dlids
+ * @eth_mtu: MTUs for each vlan PCP
+ * @eth_mtu_non_vlan: MTU for non vlan packets
+ */
+struct opa_vesw_info {
+ __be16 fabric_id;
+ __be16 vesw_id;
+
+ u8 rsvd0[6];
+ __be16 def_port_mask;
+
+ u8 rsvd1[2];
+ __be16 pkey;
+
+ u8 rsvd2[4];
+ __be32 u_mcast_dlid;
+ __be32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT];
+
+ u8 rsvd3[44];
+ __be16 eth_mtu[OPA_VNIC_MAX_NUM_PCP];
+ __be16 eth_mtu_non_vlan;
+ u8 rsvd4[2];
+} __packed;
+
+/**
+ * struct opa_per_veswport_info - OPA vnic per port information
+ * @port_num: port number
+ * @eth_link_status: current ethernet link state
+ * @base_mac_addr: base mac address
+ * @config_state: configured port state
+ * @oper_state: operational port state
+ * @max_mac_tbl_ent: max number of mac table entries
+ * @max_smac_ent: max smac entries in mac table
+ * @mac_tbl_digest: mac table digest
+ * @encap_slid: base slid for the port
+ * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets
+ * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets
+ * @pcp_to_sc_mc: sc by pcp index for multicast ethernet packets
+ * @pcp_to_vl_mc: vl by pcp index for multicast ethernet packets
+ * @non_vlan_sc_uc: sc for non-vlan unicast ethernet packets
+ * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets
+ * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets
+ * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets
+ * @uc_macs_gen_count: generation count for unicast macs list
+ * @mc_macs_gen_count: generation count for multicast macs list
+ */
+struct opa_per_veswport_info {
+ __be32 port_num;
+
+ u8 eth_link_status;
+ u8 rsvd0[3];
+
+ u8 base_mac_addr[ETH_ALEN];
+ u8 config_state;
+ u8 oper_state;
+
+ __be16 max_mac_tbl_ent;
+ __be16 max_smac_ent;
+ __be32 mac_tbl_digest;
+ u8 rsvd1[4];
+
+ __be32 encap_slid;
+
+ u8 pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP];
+
+ u8 non_vlan_sc_uc;
+ u8 non_vlan_vl_uc;
+ u8 non_vlan_sc_mc;
+ u8 non_vlan_vl_mc;
+
+ u8 rsvd2[48];
+
+ __be16 uc_macs_gen_count;
+ __be16 mc_macs_gen_count;
+
+ u8 rsvd3[8];
+} __packed;
+
+/**
+ * struct opa_veswport_info - OPA vnic port information
+ * @vesw: OPA vnic switch information
+ * @vport: OPA vnic per port information
+ *
+ * On host, each of the virtual ethernet ports belongs
+ * to a different virtual ethernet switches.
+ */
+struct opa_veswport_info {
+ struct opa_vesw_info vesw;
+ struct opa_per_veswport_info vport;
+};
+
+/**
+ * struct opa_veswport_mactable_entry - single entry in the forwarding table
+ * @mac_addr: MAC address
+ * @mac_addr_mask: MAC address bit mask
+ * @dlid_sd: Matching DLID and side data
+ *
+ * On the host each virtual ethernet port will have
+ * a forwarding table. These tables are used to
+ * map a MAC to a LID and other data. For more
+ * details see struct opa_veswport_mactable_entries.
+ * This is the structure of a single mactable entry
+ */
+struct opa_veswport_mactable_entry {
+ u8 mac_addr[ETH_ALEN];
+ u8 mac_addr_mask[ETH_ALEN];
+ __be32 dlid_sd;
+} __packed;
+
+/**
+ * struct opa_veswport_mactable - Forwarding table array
+ * @offset: mac table starting offset
+ * @num_entries: Number of entries to get or set
+ * @mac_tbl_digest: mac table digest
+ * @tbl_entries[]: Array of table entries
+ *
+ * The EM sends down this structure in a MAD indicating
+ * the starting offset in the forwarding table that this
+ * entry is to be loaded into and the number of entries
+ * that that this MAD instance contains
+ * The mac_tbl_digest has been added to this MAD structure. It will be set by
+ * the EM and it will be used by the EM to check if there are any
+ * discrepancies with this value and the value
+ * maintained by the EM in the case of VNIC port being deleted or unloaded
+ * A new instantiation of a VNIC will always have a value of zero.
+ * This value is stored as part of the vnic adapter structure and will be
+ * accessed by the GET and SET routines for both the mactable entries and the
+ * veswport info.
+ */
+struct opa_veswport_mactable {
+ __be16 offset;
+ __be16 num_entries;
+ __be32 mac_tbl_digest;
+ struct opa_veswport_mactable_entry tbl_entries[0];
+} __packed;
+
+/**
+ * struct opa_veswport_summary_counters - summary counters
+ * @vp_instance: vport instance on the OPA port
+ * @vesw_id: virtual ethernet switch id
+ * @veswport_num: virtual ethernet switch port number
+ * @tx_errors: transmit errors
+ * @rx_errors: receive errors
+ * @tx_packets: transmit packets
+ * @rx_packets: receive packets
+ * @tx_bytes: transmit bytes
+ * @rx_bytes: receive bytes
+ * @tx_unicast: unicast packets transmitted
+ * @tx_mcastbcast: multicast/broadcast packets transmitted
+ * @tx_untagged: non-vlan packets transmitted
+ * @tx_vlan: vlan packets transmitted
+ * @tx_64_size: transmit packet length is 64 bytes
+ * @tx_65_127: transmit packet length is >=65 and < 127 bytes
+ * @tx_128_255: transmit packet length is >=128 and < 255 bytes
+ * @tx_256_511: transmit packet length is >=256 and < 511 bytes
+ * @tx_512_1023: transmit packet length is >=512 and < 1023 bytes
+ * @tx_1024_1518: transmit packet length is >=1024 and < 1518 bytes
+ * @tx_1519_max: transmit packet length >= 1519 bytes
+ * @rx_unicast: unicast packets received
+ * @rx_mcastbcast: multicast/broadcast packets received
+ * @rx_untagged: non-vlan packets received
+ * @rx_vlan: vlan packets received
+ * @rx_64_size: received packet length is 64 bytes
+ * @rx_65_127: received packet length is >=65 and < 127 bytes
+ * @rx_128_255: received packet length is >=128 and < 255 bytes
+ * @rx_256_511: received packet length is >=256 and < 511 bytes
+ * @rx_512_1023: received packet length is >=512 and < 1023 bytes
+ * @rx_1024_1518: received packet length is >=1024 and < 1518 bytes
+ * @rx_1519_max: received packet length >= 1519 bytes
+ *
+ * All the above are counters of corresponding conditions.
+ */
+struct opa_veswport_summary_counters {
+ __be16 vp_instance;
+ __be16 vesw_id;
+ __be32 veswport_num;
+
+ __be64 tx_errors;
+ __be64 rx_errors;
+ __be64 tx_packets;
+ __be64 rx_packets;
+ __be64 tx_bytes;
+ __be64 rx_bytes;
+
+ __be64 tx_unicast;
+ __be64 tx_mcastbcast;
+
+ __be64 tx_untagged;
+ __be64 tx_vlan;
+
+ __be64 tx_64_size;
+ __be64 tx_65_127;
+ __be64 tx_128_255;
+ __be64 tx_256_511;
+ __be64 tx_512_1023;
+ __be64 tx_1024_1518;
+ __be64 tx_1519_max;
+
+ __be64 rx_unicast;
+ __be64 rx_mcastbcast;
+
+ __be64 rx_untagged;
+ __be64 rx_vlan;
+
+ __be64 rx_64_size;
+ __be64 rx_65_127;
+ __be64 rx_128_255;
+ __be64 rx_256_511;
+ __be64 rx_512_1023;
+ __be64 rx_1024_1518;
+ __be64 rx_1519_max;
+
+ __be64 reserved[16];
+} __packed;
+
+/**
+ * struct opa_veswport_error_counters - error counters
+ * @vp_instance: vport instance on the OPA port
+ * @vesw_id: virtual ethernet switch id
+ * @veswport_num: virtual ethernet switch port number
+ * @tx_errors: transmit errors
+ * @rx_errors: receive errors
+ * @tx_smac_filt: smac filter errors
+ * @tx_dlid_zero: transmit packets with invalid dlid
+ * @tx_logic: other transmit errors
+ * @tx_drop_state: packet tansmission in non-forward port state
+ * @rx_bad_veswid: received packet with invalid vesw id
+ * @rx_runt: received ethernet packet with length < 64 bytes
+ * @rx_oversize: received ethernet packet with length > MTU size
+ * @rx_eth_down: received packets when interface is down
+ * @rx_drop_state: received packets in non-forwarding port state
+ * @rx_logic: other receive errors
+ *
+ * All the above are counters of corresponding erorr conditions.
+ */
+struct opa_veswport_error_counters {
+ __be16 vp_instance;
+ __be16 vesw_id;
+ __be32 veswport_num;
+
+ __be64 tx_errors;
+ __be64 rx_errors;
+
+ __be64 rsvd0;
+ __be64 tx_smac_filt;
+ __be64 rsvd1;
+ __be64 rsvd2;
+ __be64 rsvd3;
+ __be64 tx_dlid_zero;
+ __be64 rsvd4;
+ __be64 tx_logic;
+ __be64 rsvd5;
+ __be64 tx_drop_state;
+
+ __be64 rx_bad_veswid;
+ __be64 rsvd6;
+ __be64 rx_runt;
+ __be64 rx_oversize;
+ __be64 rsvd7;
+ __be64 rx_eth_down;
+ __be64 rx_drop_state;
+ __be64 rx_logic;
+ __be64 rsvd8;
+
+ __be64 rsvd9[16];
+} __packed;
+
+/**
+ * struct opa_veswport_trap - Trap message sent to EM by VNIC
+ * @fabric_id: 10 bit fabric id
+ * @veswid: 12 bit virtual ethernet switch id
+ * @veswportnum: logical port number on the Virtual switch
+ * @opaportnum: physical port num (redundant on host)
+ * @veswportindex: switch port index on opa port 0 based
+ * @opcode: operation
+ * @reserved: 32 bit for alignment
+ *
+ * The VNIC will send trap messages to the Ethernet manager to
+ * inform it about changes to the VNIC config, behaviour etc.
+ * This is the format of the trap payload.
+ */
+struct opa_veswport_trap {
+ __be16 fabric_id;
+ __be16 veswid;
+ __be32 veswportnum;
+ __be16 opaportnum;
+ u8 veswportindex;
+ u8 opcode;
+ __be32 reserved;
+} __packed;
+
+/**
+ * struct opa_vnic_iface_macs_entry - single entry in the mac list
+ * @mac_addr: MAC address
+ */
+struct opa_vnic_iface_mac_entry {
+ u8 mac_addr[ETH_ALEN];
+};
+
+/**
+ * struct opa_veswport_iface_macs - Msg to set globally administered MAC
+ * @start_idx: position of first entry (0 based)
+ * @num_macs_in_msg: number of MACs in this message
+ * @tot_macs_in_lst: The total number of MACs the agent has
+ * @gen_count: gen_count to indicate change
+ * @entry: The mac list entry
+ *
+ * Same attribute IDS and attribute modifiers as in locally administered
+ * addresses used to set globally administered addresses
+ */
+struct opa_veswport_iface_macs {
+ __be16 start_idx;
+ __be16 num_macs_in_msg;
+ __be16 tot_macs_in_lst;
+ __be16 gen_count;
+ struct opa_vnic_iface_mac_entry entry[0];
+} __packed;
+
+/**
+ * struct opa_vnic_vema_mad - Generic VEMA MAD
+ * @mad_hdr: Generic MAD header
+ * @rmpp_hdr: RMPP header for vendor specific MADs
+ * @oui: Unique org identifier
+ * @data: MAD data
+ */
+struct opa_vnic_vema_mad {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+ u8 reserved;
+ u8 oui[3];
+ u8 data[OPA_VNIC_EMA_DATA];
+};
+
+/**
+ * struct opa_vnic_notice_attr - Generic Notice MAD
+ * @gen_type: Generic/Specific bit and type of notice
+ * @oui_1: Vendor ID byte 1
+ * @oui_2: Vendor ID byte 2
+ * @oui_3: Vendor ID byte 3
+ * @trap_num: Trap number
+ * @toggle_count: Notice toggle bit and count value
+ * @issuer_lid: Trap issuer's lid
+ * @issuer_gid: Issuer GID (only if Report method)
+ * @raw_data: Trap message body
+ */
+struct opa_vnic_notice_attr {
+ u8 gen_type;
+ u8 oui_1;
+ u8 oui_2;
+ u8 oui_3;
+ __be16 trap_num;
+ __be16 toggle_count;
+ __be32 issuer_lid;
+ __be32 reserved;
+ u8 issuer_gid[16];
+ u8 raw_data[64];
+} __packed;
+
+/**
+ * struct opa_vnic_vema_mad_trap - Generic VEMA MAD Trap
+ * @mad_hdr: Generic MAD header
+ * @rmpp_hdr: RMPP header for vendor specific MADs
+ * @oui: Unique org identifier
+ * @notice: Notice structure
+ */
+struct opa_vnic_vema_mad_trap {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+ u8 reserved;
+ u8 oui[3];
+ struct opa_vnic_notice_attr notice;
+};
+
+#endif /* _OPA_VNIC_ENCAP_H */
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
new file mode 100644
index 000000000000..62390e9e0023
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC ethtool functions
+ */
+
+#include <linux/ethtool.h>
+
+#include "opa_vnic_internal.h"
+
+enum {NETDEV_STATS, VNIC_STATS};
+
+struct vnic_stats {
+ char stat_string[ETH_GSTRING_LEN];
+ struct {
+ int sizeof_stat;
+ int stat_offset;
+ };
+};
+
+#define VNIC_STAT(m) { FIELD_SIZEOF(struct opa_vnic_stats, m), \
+ offsetof(struct opa_vnic_stats, m) }
+
+static struct vnic_stats vnic_gstrings_stats[] = {
+ /* NETDEV stats */
+ {"rx_packets", VNIC_STAT(netstats.rx_packets)},
+ {"tx_packets", VNIC_STAT(netstats.tx_packets)},
+ {"rx_bytes", VNIC_STAT(netstats.rx_bytes)},
+ {"tx_bytes", VNIC_STAT(netstats.tx_bytes)},
+ {"rx_errors", VNIC_STAT(netstats.rx_errors)},
+ {"tx_errors", VNIC_STAT(netstats.tx_errors)},
+ {"rx_dropped", VNIC_STAT(netstats.rx_dropped)},
+ {"tx_dropped", VNIC_STAT(netstats.tx_dropped)},
+
+ /* SUMMARY counters */
+ {"tx_unicast", VNIC_STAT(tx_grp.unicast)},
+ {"tx_mcastbcast", VNIC_STAT(tx_grp.mcastbcast)},
+ {"tx_untagged", VNIC_STAT(tx_grp.untagged)},
+ {"tx_vlan", VNIC_STAT(tx_grp.vlan)},
+
+ {"tx_64_size", VNIC_STAT(tx_grp.s_64)},
+ {"tx_65_127", VNIC_STAT(tx_grp.s_65_127)},
+ {"tx_128_255", VNIC_STAT(tx_grp.s_128_255)},
+ {"tx_256_511", VNIC_STAT(tx_grp.s_256_511)},
+ {"tx_512_1023", VNIC_STAT(tx_grp.s_512_1023)},
+ {"tx_1024_1518", VNIC_STAT(tx_grp.s_1024_1518)},
+ {"tx_1519_max", VNIC_STAT(tx_grp.s_1519_max)},
+
+ {"rx_unicast", VNIC_STAT(rx_grp.unicast)},
+ {"rx_mcastbcast", VNIC_STAT(rx_grp.mcastbcast)},
+ {"rx_untagged", VNIC_STAT(rx_grp.untagged)},
+ {"rx_vlan", VNIC_STAT(rx_grp.vlan)},
+
+ {"rx_64_size", VNIC_STAT(rx_grp.s_64)},
+ {"rx_65_127", VNIC_STAT(rx_grp.s_65_127)},
+ {"rx_128_255", VNIC_STAT(rx_grp.s_128_255)},
+ {"rx_256_511", VNIC_STAT(rx_grp.s_256_511)},
+ {"rx_512_1023", VNIC_STAT(rx_grp.s_512_1023)},
+ {"rx_1024_1518", VNIC_STAT(rx_grp.s_1024_1518)},
+ {"rx_1519_max", VNIC_STAT(rx_grp.s_1519_max)},
+
+ /* ERROR counters */
+ {"rx_fifo_errors", VNIC_STAT(netstats.rx_fifo_errors)},
+ {"rx_length_errors", VNIC_STAT(netstats.rx_length_errors)},
+
+ {"tx_fifo_errors", VNIC_STAT(netstats.tx_fifo_errors)},
+ {"tx_carrier_errors", VNIC_STAT(netstats.tx_carrier_errors)},
+
+ {"tx_dlid_zero", VNIC_STAT(tx_dlid_zero)},
+ {"tx_drop_state", VNIC_STAT(tx_drop_state)},
+ {"rx_drop_state", VNIC_STAT(rx_drop_state)},
+ {"rx_oversize", VNIC_STAT(rx_oversize)},
+ {"rx_runt", VNIC_STAT(rx_runt)},
+};
+
+#define VNIC_STATS_LEN ARRAY_SIZE(vnic_gstrings_stats)
+
+/* vnic_get_drvinfo - get driver info */
+static void vnic_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, opa_vnic_driver_version,
+ sizeof(drvinfo->version));
+ strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent),
+ sizeof(drvinfo->bus_info));
+}
+
+/* vnic_get_sset_count - get string set count */
+static int vnic_get_sset_count(struct net_device *netdev, int sset)
+{
+ return (sset == ETH_SS_STATS) ? VNIC_STATS_LEN : -EOPNOTSUPP;
+}
+
+/* vnic_get_ethtool_stats - get statistics */
+static void vnic_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct opa_vnic_stats vstats;
+ int i;
+
+ memset(&vstats, 0, sizeof(vstats));
+ spin_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+ spin_unlock(&adapter->stats_lock);
+ for (i = 0; i < VNIC_STATS_LEN; i++) {
+ char *p = (char *)&vstats + vnic_gstrings_stats[i].stat_offset;
+
+ data[i] = (vnic_gstrings_stats[i].sizeof_stat ==
+ sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+ }
+}
+
+/* vnic_get_strings - get strings */
+static void vnic_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+ int i;
+
+ if (stringset != ETH_SS_STATS)
+ return;
+
+ for (i = 0; i < VNIC_STATS_LEN; i++)
+ memcpy(data + i * ETH_GSTRING_LEN,
+ vnic_gstrings_stats[i].stat_string,
+ ETH_GSTRING_LEN);
+}
+
+/* ethtool ops */
+static const struct ethtool_ops opa_vnic_ethtool_ops = {
+ .get_drvinfo = vnic_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = vnic_get_strings,
+ .get_sset_count = vnic_get_sset_count,
+ .get_ethtool_stats = vnic_get_ethtool_stats,
+};
+
+/* opa_vnic_set_ethtool_ops - set ethtool ops */
+void opa_vnic_set_ethtool_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &opa_vnic_ethtool_ops;
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
new file mode 100644
index 000000000000..ca29e6d5aedc
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
@@ -0,0 +1,329 @@
+#ifndef _OPA_VNIC_INTERNAL_H
+#define _OPA_VNIC_INTERNAL_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC driver internal declarations
+ */
+
+#include <linux/bitops.h>
+#include <linux/etherdevice.h>
+#include <linux/hashtable.h>
+#include <linux/sizes.h>
+#include <rdma/opa_vnic.h>
+
+#include "opa_vnic_encap.h"
+
+#define OPA_VNIC_VLAN_PCP(vlan_tci) \
+ (((vlan_tci) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT)
+
+/* Flow to default port redirection table size */
+#define OPA_VNIC_FLOW_TBL_SIZE 32
+
+/* Invalid port number */
+#define OPA_VNIC_INVALID_PORT 0xff
+
+struct opa_vnic_adapter;
+
+/**
+ * struct __opa_vesw_info - OPA vnic virtual switch info
+ *
+ * Same as opa_vesw_info without bitwise attribute.
+ */
+struct __opa_vesw_info {
+ u16 fabric_id;
+ u16 vesw_id;
+
+ u8 rsvd0[6];
+ u16 def_port_mask;
+
+ u8 rsvd1[2];
+ u16 pkey;
+
+ u8 rsvd2[4];
+ u32 u_mcast_dlid;
+ u32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT];
+
+ u8 rsvd3[44];
+ u16 eth_mtu[OPA_VNIC_MAX_NUM_PCP];
+ u16 eth_mtu_non_vlan;
+ u8 rsvd4[2];
+} __packed;
+
+/**
+ * struct __opa_per_veswport_info - OPA vnic per port info
+ *
+ * Same as opa_per_veswport_info without bitwise attribute.
+ */
+struct __opa_per_veswport_info {
+ u32 port_num;
+
+ u8 eth_link_status;
+ u8 rsvd0[3];
+
+ u8 base_mac_addr[ETH_ALEN];
+ u8 config_state;
+ u8 oper_state;
+
+ u16 max_mac_tbl_ent;
+ u16 max_smac_ent;
+ u32 mac_tbl_digest;
+ u8 rsvd1[4];
+
+ u32 encap_slid;
+
+ u8 pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP];
+
+ u8 non_vlan_sc_uc;
+ u8 non_vlan_vl_uc;
+ u8 non_vlan_sc_mc;
+ u8 non_vlan_vl_mc;
+
+ u8 rsvd2[48];
+
+ u16 uc_macs_gen_count;
+ u16 mc_macs_gen_count;
+
+ u8 rsvd3[8];
+} __packed;
+
+/**
+ * struct __opa_veswport_info - OPA vnic port info
+ *
+ * Same as opa_veswport_info without bitwise attribute.
+ */
+struct __opa_veswport_info {
+ struct __opa_vesw_info vesw;
+ struct __opa_per_veswport_info vport;
+};
+
+/**
+ * struct __opa_veswport_trap - OPA vnic trap info
+ *
+ * Same as opa_veswport_trap without bitwise attribute.
+ */
+struct __opa_veswport_trap {
+ u16 fabric_id;
+ u16 veswid;
+ u32 veswportnum;
+ u16 opaportnum;
+ u8 veswportindex;
+ u8 opcode;
+ u32 reserved;
+} __packed;
+
+/**
+ * struct opa_vnic_ctrl_port - OPA virtual NIC control port
+ * @ibdev: pointer to ib device
+ * @ops: opa vnic control operations
+ * @num_ports: number of opa ports
+ */
+struct opa_vnic_ctrl_port {
+ struct ib_device *ibdev;
+ struct opa_vnic_ctrl_ops *ops;
+ u8 num_ports;
+};
+
+/**
+ * struct opa_vnic_adapter - OPA VNIC netdev private data structure
+ * @netdev: pointer to associated netdev
+ * @ibdev: ib device
+ * @cport: pointer to opa vnic control port
+ * @rn_ops: rdma netdev's net_device_ops
+ * @port_num: OPA port number
+ * @vport_num: vesw port number
+ * @lock: adapter lock
+ * @info: virtual ethernet switch port information
+ * @vema_mac_addr: mac address configured by vema
+ * @umac_hash: unicast maclist hash
+ * @mmac_hash: multicast maclist hash
+ * @mactbl: hash table of MAC entries
+ * @mactbl_lock: mac table lock
+ * @stats_lock: statistics lock
+ * @flow_tbl: flow to default port redirection table
+ * @trap_timeout: trap timeout
+ * @trap_count: no. of traps allowed within timeout period
+ */
+struct opa_vnic_adapter {
+ struct net_device *netdev;
+ struct ib_device *ibdev;
+ struct opa_vnic_ctrl_port *cport;
+ const struct net_device_ops *rn_ops;
+
+ u8 port_num;
+ u8 vport_num;
+
+ /* Lock used around concurrent updates to netdev */
+ struct mutex lock;
+
+ struct __opa_veswport_info info;
+ u8 vema_mac_addr[ETH_ALEN];
+ u32 umac_hash;
+ u32 mmac_hash;
+ struct hlist_head __rcu *mactbl;
+
+ /* Lock used to protect updates to mac table */
+ struct mutex mactbl_lock;
+
+ /* Lock used to protect access to vnic counters */
+ spinlock_t stats_lock;
+
+ u8 flow_tbl[OPA_VNIC_FLOW_TBL_SIZE];
+
+ unsigned long trap_timeout;
+ u8 trap_count;
+};
+
+/* Same as opa_veswport_mactable_entry, but without bitwise attribute */
+struct __opa_vnic_mactable_entry {
+ u8 mac_addr[ETH_ALEN];
+ u8 mac_addr_mask[ETH_ALEN];
+ u32 dlid_sd;
+} __packed;
+
+/**
+ * struct opa_vnic_mac_tbl_node - OPA VNIC mac table node
+ * @hlist: hash list handle
+ * @index: index of entry in the mac table
+ * @entry: entry in the table
+ */
+struct opa_vnic_mac_tbl_node {
+ struct hlist_node hlist;
+ u16 index;
+ struct __opa_vnic_mactable_entry entry;
+};
+
+#define v_dbg(format, arg...) \
+ netdev_dbg(adapter->netdev, format, ## arg)
+#define v_err(format, arg...) \
+ netdev_err(adapter->netdev, format, ## arg)
+#define v_info(format, arg...) \
+ netdev_info(adapter->netdev, format, ## arg)
+#define v_warn(format, arg...) \
+ netdev_warn(adapter->netdev, format, ## arg)
+
+#define c_err(format, arg...) \
+ dev_err(&cport->ibdev->dev, format, ## arg)
+#define c_info(format, arg...) \
+ dev_info(&cport->ibdev->dev, format, ## arg)
+#define c_dbg(format, arg...) \
+ dev_dbg(&cport->ibdev->dev, format, ## arg)
+
+/* The maximum allowed entries in the mac table */
+#define OPA_VNIC_MAC_TBL_MAX_ENTRIES 2048
+/* Limit of smac entries in mac table */
+#define OPA_VNIC_MAX_SMAC_LIMIT 256
+
+/* The last octet of the MAC address is used as the key to the hash table */
+#define OPA_VNIC_MAC_HASH_IDX 5
+
+/* The VNIC MAC hash table is of size 2^8 */
+#define OPA_VNIC_MAC_TBL_HASH_BITS 8
+#define OPA_VNIC_MAC_TBL_SIZE BIT(OPA_VNIC_MAC_TBL_HASH_BITS)
+
+/* VNIC HASH MACROS */
+#define vnic_hash_init(hashtable) __hash_init(hashtable, OPA_VNIC_MAC_TBL_SIZE)
+
+#define vnic_hash_add(hashtable, node, key) \
+ hlist_add_head(node, \
+ &hashtable[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))])
+
+#define vnic_hash_for_each_safe(name, bkt, tmp, obj, member) \
+ for ((bkt) = 0, obj = NULL; \
+ !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++) \
+ hlist_for_each_entry_safe(obj, tmp, &name[bkt], member)
+
+#define vnic_hash_for_each_possible(name, obj, member, key) \
+ hlist_for_each_entry(obj, \
+ &name[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))], member)
+
+#define vnic_hash_for_each(name, bkt, obj, member) \
+ for ((bkt) = 0, obj = NULL; \
+ !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++) \
+ hlist_for_each_entry(obj, &name[bkt], member)
+
+extern char opa_vnic_driver_name[];
+extern const char opa_vnic_driver_version[];
+
+struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
+ u8 port_num, u8 vport_num);
+void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter);
+void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter);
+void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter);
+void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl);
+int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl);
+void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs);
+void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs);
+void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_summary_counters *cntrs);
+void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_error_counters *cntrs);
+void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info);
+void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info);
+void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info);
+void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info);
+void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event);
+void opa_vnic_set_ethtool_ops(struct net_device *netdev);
+void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
+ struct __opa_veswport_trap *data, u32 lid);
+
+#endif /* _OPA_VNIC_INTERNAL_H */
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
new file mode 100644
index 000000000000..1a3c25364b64
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA Virtual Network Interface Controller (VNIC) driver
+ * netdev functionality.
+ */
+
+#include <linux/module.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+
+#include "opa_vnic_internal.h"
+
+#define OPA_TX_TIMEOUT_MS 1000
+
+#define OPA_VNIC_SKB_HEADROOM \
+ ALIGN((OPA_VNIC_HDR_LEN + OPA_VNIC_SKB_MDATA_LEN), 8)
+
+/* This function is overloaded for opa_vnic specific implementation */
+static void opa_vnic_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct opa_vnic_stats vstats;
+
+ memset(&vstats, 0, sizeof(vstats));
+ spin_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+ spin_unlock(&adapter->stats_lock);
+ memcpy(stats, &vstats.netstats, sizeof(*stats));
+}
+
+/* opa_netdev_start_xmit - transmit function */
+static netdev_tx_t opa_netdev_start_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+
+ v_dbg("xmit: queue %d skb len %d\n", skb->queue_mapping, skb->len);
+ /* pad to ensure mininum ethernet packet length */
+ if (unlikely(skb->len < ETH_ZLEN)) {
+ if (skb_padto(skb, ETH_ZLEN))
+ return NETDEV_TX_OK;
+
+ skb_put(skb, ETH_ZLEN - skb->len);
+ }
+
+ opa_vnic_encap_skb(adapter, skb);
+ return adapter->rn_ops->ndo_start_xmit(skb, netdev);
+}
+
+static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
+ void *accel_priv,
+ select_queue_fallback_t fallback)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct opa_vnic_skb_mdata *mdata;
+ int rc;
+
+ /* pass entropy and vl as metadata in skb */
+ mdata = skb_push(skb, sizeof(*mdata));
+ mdata->entropy = opa_vnic_calc_entropy(adapter, skb);
+ mdata->vl = opa_vnic_get_vl(adapter, skb);
+ rc = adapter->rn_ops->ndo_select_queue(netdev, skb,
+ accel_priv, fallback);
+ skb_pull(skb, sizeof(*mdata));
+ return rc;
+}
+
+/* opa_vnic_process_vema_config - process vema configuration updates */
+void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct rdma_netdev *rn = netdev_priv(adapter->netdev);
+ u8 port_num[OPA_VESW_MAX_NUM_DEF_PORT] = { 0 };
+ struct net_device *netdev = adapter->netdev;
+ u8 i, port_count = 0;
+ u16 port_mask;
+
+ /* If the base_mac_addr is changed, update the interface mac address */
+ if (memcmp(info->vport.base_mac_addr, adapter->vema_mac_addr,
+ ARRAY_SIZE(info->vport.base_mac_addr))) {
+ struct sockaddr saddr;
+
+ memcpy(saddr.sa_data, info->vport.base_mac_addr,
+ ARRAY_SIZE(info->vport.base_mac_addr));
+ mutex_lock(&adapter->lock);
+ eth_mac_addr(netdev, &saddr);
+ memcpy(adapter->vema_mac_addr,
+ info->vport.base_mac_addr, ETH_ALEN);
+ mutex_unlock(&adapter->lock);
+ }
+
+ rn->set_id(netdev, info->vesw.vesw_id);
+
+ /* Handle MTU limit change */
+ rtnl_lock();
+ netdev->max_mtu = max_t(unsigned int, info->vesw.eth_mtu_non_vlan,
+ netdev->min_mtu);
+ if (netdev->mtu > netdev->max_mtu)
+ dev_set_mtu(netdev, netdev->max_mtu);
+ rtnl_unlock();
+
+ /* Update flow to default port redirection table */
+ port_mask = info->vesw.def_port_mask;
+ for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) {
+ if (port_mask & 1)
+ port_num[port_count++] = i;
+ port_mask >>= 1;
+ }
+
+ /*
+ * Build the flow table. Flow table is required when destination LID
+ * is not available. Up to OPA_VNIC_FLOW_TBL_SIZE flows supported.
+ * Each flow need a default port number to get its dlid from the
+ * u_ucast_dlid array.
+ */
+ for (i = 0; i < OPA_VNIC_FLOW_TBL_SIZE; i++)
+ adapter->flow_tbl[i] = port_count ? port_num[i % port_count] :
+ OPA_VNIC_INVALID_PORT;
+
+ /* Operational state can only be DROP_ALL or FORWARDING */
+ if (info->vport.config_state == OPA_VNIC_STATE_FORWARDING) {
+ info->vport.oper_state = OPA_VNIC_STATE_FORWARDING;
+ netif_dormant_off(netdev);
+ } else {
+ info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL;
+ netif_dormant_on(netdev);
+ }
+}
+
+/*
+ * Set the power on default values in adapter's vema interface structure.
+ */
+static inline void opa_vnic_set_pod_values(struct opa_vnic_adapter *adapter)
+{
+ adapter->info.vport.max_mac_tbl_ent = OPA_VNIC_MAC_TBL_MAX_ENTRIES;
+ adapter->info.vport.max_smac_ent = OPA_VNIC_MAX_SMAC_LIMIT;
+ adapter->info.vport.config_state = OPA_VNIC_STATE_DROP_ALL;
+ adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN;
+}
+
+/* opa_vnic_set_mac_addr - change mac address */
+static int opa_vnic_set_mac_addr(struct net_device *netdev, void *addr)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct sockaddr *sa = addr;
+ int rc;
+
+ if (!memcmp(netdev->dev_addr, sa->sa_data, ETH_ALEN))
+ return 0;
+
+ mutex_lock(&adapter->lock);
+ rc = eth_mac_addr(netdev, addr);
+ mutex_unlock(&adapter->lock);
+ if (rc)
+ return rc;
+
+ adapter->info.vport.uc_macs_gen_count++;
+ opa_vnic_vema_report_event(adapter,
+ OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE);
+ return 0;
+}
+
+/*
+ * opa_vnic_mac_send_event - post event on possible mac list exchange
+ * Send trap when digest from uc/mc mac list differs from previous run.
+ * Digest is evaluated similar to how cksum does.
+ */
+static void opa_vnic_mac_send_event(struct net_device *netdev, u8 event)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct netdev_hw_addr *ha;
+ struct netdev_hw_addr_list *hw_list;
+ u32 *ref_crc;
+ u32 l, crc = 0;
+
+ switch (event) {
+ case OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE:
+ hw_list = &netdev->uc;
+ adapter->info.vport.uc_macs_gen_count++;
+ ref_crc = &adapter->umac_hash;
+ break;
+ case OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE:
+ hw_list = &netdev->mc;
+ adapter->info.vport.mc_macs_gen_count++;
+ ref_crc = &adapter->mmac_hash;
+ break;
+ default:
+ return;
+ }
+ netdev_hw_addr_list_for_each(ha, hw_list) {
+ crc = crc32_le(crc, ha->addr, ETH_ALEN);
+ }
+ l = netdev_hw_addr_list_count(hw_list) * ETH_ALEN;
+ crc = ~crc32_le(crc, (void *)&l, sizeof(l));
+
+ if (crc != *ref_crc) {
+ *ref_crc = crc;
+ opa_vnic_vema_report_event(adapter, event);
+ }
+}
+
+/* opa_vnic_set_rx_mode - handle uc/mc mac list change */
+static void opa_vnic_set_rx_mode(struct net_device *netdev)
+{
+ opa_vnic_mac_send_event(netdev,
+ OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE);
+
+ opa_vnic_mac_send_event(netdev,
+ OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE);
+}
+
+/* opa_netdev_open - activate network interface */
+static int opa_netdev_open(struct net_device *netdev)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ int rc;
+
+ rc = adapter->rn_ops->ndo_open(adapter->netdev);
+ if (rc) {
+ v_dbg("open failed %d\n", rc);
+ return rc;
+ }
+
+ /* Update eth link status and send trap */
+ adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_UP;
+ opa_vnic_vema_report_event(adapter,
+ OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE);
+ return 0;
+}
+
+/* opa_netdev_close - disable network interface */
+static int opa_netdev_close(struct net_device *netdev)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ int rc;
+
+ rc = adapter->rn_ops->ndo_stop(adapter->netdev);
+ if (rc) {
+ v_dbg("close failed %d\n", rc);
+ return rc;
+ }
+
+ /* Update eth link status and send trap */
+ adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN;
+ opa_vnic_vema_report_event(adapter,
+ OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE);
+ return 0;
+}
+
+/* netdev ops */
+static const struct net_device_ops opa_netdev_ops = {
+ .ndo_open = opa_netdev_open,
+ .ndo_stop = opa_netdev_close,
+ .ndo_start_xmit = opa_netdev_start_xmit,
+ .ndo_get_stats64 = opa_vnic_get_stats64,
+ .ndo_set_rx_mode = opa_vnic_set_rx_mode,
+ .ndo_select_queue = opa_vnic_select_queue,
+ .ndo_set_mac_address = opa_vnic_set_mac_addr,
+};
+
+/* opa_vnic_add_netdev - create vnic netdev interface */
+struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
+ u8 port_num, u8 vport_num)
+{
+ struct opa_vnic_adapter *adapter;
+ struct net_device *netdev;
+ struct rdma_netdev *rn;
+ int rc;
+
+ netdev = ibdev->alloc_rdma_netdev(ibdev, port_num,
+ RDMA_NETDEV_OPA_VNIC,
+ "veth%d", NET_NAME_UNKNOWN,
+ ether_setup);
+ if (!netdev)
+ return ERR_PTR(-ENOMEM);
+ else if (IS_ERR(netdev))
+ return ERR_CAST(netdev);
+
+ rn = netdev_priv(netdev);
+ adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+ if (!adapter) {
+ rc = -ENOMEM;
+ goto adapter_err;
+ }
+
+ rn->clnt_priv = adapter;
+ rn->hca = ibdev;
+ rn->port_num = port_num;
+ adapter->netdev = netdev;
+ adapter->ibdev = ibdev;
+ adapter->port_num = port_num;
+ adapter->vport_num = vport_num;
+ adapter->rn_ops = netdev->netdev_ops;
+
+ netdev->netdev_ops = &opa_netdev_ops;
+ netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netdev->hard_header_len += OPA_VNIC_SKB_HEADROOM;
+ mutex_init(&adapter->lock);
+ mutex_init(&adapter->mactbl_lock);
+ spin_lock_init(&adapter->stats_lock);
+
+ SET_NETDEV_DEV(netdev, ibdev->dev.parent);
+
+ opa_vnic_set_ethtool_ops(netdev);
+
+ opa_vnic_set_pod_values(adapter);
+
+ rc = register_netdev(netdev);
+ if (rc)
+ goto netdev_err;
+
+ netif_carrier_off(netdev);
+ netif_dormant_on(netdev);
+ v_info("initialized\n");
+
+ return adapter;
+netdev_err:
+ mutex_destroy(&adapter->lock);
+ mutex_destroy(&adapter->mactbl_lock);
+ kfree(adapter);
+adapter_err:
+ rn->free_rdma_netdev(netdev);
+
+ return ERR_PTR(rc);
+}
+
+/* opa_vnic_rem_netdev - remove vnic netdev interface */
+void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct rdma_netdev *rn = netdev_priv(netdev);
+
+ v_info("removing\n");
+ unregister_netdev(netdev);
+ opa_vnic_release_mac_tbl(adapter);
+ mutex_destroy(&adapter->lock);
+ mutex_destroy(&adapter->mactbl_lock);
+ kfree(adapter);
+ rn->free_rdma_netdev(netdev);
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
new file mode 100644
index 000000000000..cf768dd78d1b
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -0,0 +1,1056 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA Virtual Network Interface Controller (VNIC)
+ * Ethernet Management Agent (EMA) driver
+ */
+
+#include <linux/module.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+
+#include "opa_vnic_internal.h"
+
+#define DRV_VERSION "1.0"
+char opa_vnic_driver_name[] = "opa_vnic";
+const char opa_vnic_driver_version[] = DRV_VERSION;
+
+/*
+ * The trap service level is kept in bits 3 to 7 in the trap_sl_rsvd
+ * field in the class port info MAD.
+ */
+#define GET_TRAP_SL_FROM_CLASS_PORT_INFO(x) (((x) >> 3) & 0x1f)
+
+/* Cap trap bursts to a reasonable limit good for normal cases */
+#define OPA_VNIC_TRAP_BURST_LIMIT 4
+
+/*
+ * VNIC trap limit timeout.
+ * Inverse of cap2_mask response time out (1.0737 secs) = 0.9
+ * secs approx IB spec 13.4.6.2.1 PortInfoSubnetTimeout and
+ * 13.4.9 Traps.
+ */
+#define OPA_VNIC_TRAP_TIMEOUT ((4096 * (1UL << 18)) / 1000)
+
+#define OPA_VNIC_UNSUP_ATTR \
+ cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB)
+
+#define OPA_VNIC_INVAL_ATTR \
+ cpu_to_be16(IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE)
+
+#define OPA_VNIC_CLASS_CAP_TRAP 0x1
+
+/* Maximum number of VNIC ports supported */
+#define OPA_VNIC_MAX_NUM_VPORT 255
+
+/**
+ * struct opa_vnic_vema_port -- VNIC VEMA port details
+ * @cport: pointer to port
+ * @mad_agent: pointer to mad agent for port
+ * @class_port_info: Class port info information.
+ * @tid: Transaction id
+ * @port_num: OPA port number
+ * @vport_idr: vnic ports idr
+ * @event_handler: ib event handler
+ * @lock: adapter interface lock
+ */
+struct opa_vnic_vema_port {
+ struct opa_vnic_ctrl_port *cport;
+ struct ib_mad_agent *mad_agent;
+ struct opa_class_port_info class_port_info;
+ u64 tid;
+ u8 port_num;
+ struct idr vport_idr;
+ struct ib_event_handler event_handler;
+
+ /* Lock to query/update network adapter */
+ struct mutex lock;
+};
+
+static void opa_vnic_vema_add_one(struct ib_device *device);
+static void opa_vnic_vema_rem_one(struct ib_device *device,
+ void *client_data);
+
+static struct ib_client opa_vnic_client = {
+ .name = opa_vnic_driver_name,
+ .add = opa_vnic_vema_add_one,
+ .remove = opa_vnic_vema_rem_one,
+};
+
+/**
+ * vema_get_vport_num -- Get the vnic from the mad
+ * @recvd_mad: Received mad
+ *
+ * Return: returns value of the vnic port number
+ */
+static inline u8 vema_get_vport_num(struct opa_vnic_vema_mad *recvd_mad)
+{
+ return be32_to_cpu(recvd_mad->mad_hdr.attr_mod) & 0xff;
+}
+
+/**
+ * vema_get_vport_adapter -- Get vnic port adapter from recvd mad
+ * @recvd_mad: received mad
+ * @port: ptr to port struct on which MAD was recvd
+ *
+ * Return: vnic adapter
+ */
+static inline struct opa_vnic_adapter *
+vema_get_vport_adapter(struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_port *port)
+{
+ u8 vport_num = vema_get_vport_num(recvd_mad);
+
+ return idr_find(&port->vport_idr, vport_num);
+}
+
+/**
+ * vema_mac_tbl_req_ok -- Check if mac request has correct values
+ * @mac_tbl: mac table
+ *
+ * This function checks for the validity of the offset and number of
+ * entries required.
+ *
+ * Return: true if offset and num_entries are valid
+ */
+static inline bool vema_mac_tbl_req_ok(struct opa_veswport_mactable *mac_tbl)
+{
+ u16 offset, num_entries;
+ u16 req_entries = ((OPA_VNIC_EMA_DATA - sizeof(*mac_tbl)) /
+ sizeof(mac_tbl->tbl_entries[0]));
+
+ offset = be16_to_cpu(mac_tbl->offset);
+ num_entries = be16_to_cpu(mac_tbl->num_entries);
+
+ return ((num_entries <= req_entries) &&
+ (offset + num_entries <= OPA_VNIC_MAC_TBL_MAX_ENTRIES));
+}
+
+/*
+ * Return the power on default values in the port info structure
+ * in big endian format as required by MAD.
+ */
+static inline void vema_get_pod_values(struct opa_veswport_info *port_info)
+{
+ memset(port_info, 0, sizeof(*port_info));
+ port_info->vport.max_mac_tbl_ent =
+ cpu_to_be16(OPA_VNIC_MAC_TBL_MAX_ENTRIES);
+ port_info->vport.max_smac_ent =
+ cpu_to_be16(OPA_VNIC_MAX_SMAC_LIMIT);
+ port_info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL;
+ port_info->vport.config_state = OPA_VNIC_STATE_DROP_ALL;
+}
+
+/**
+ * vema_add_vport -- Add a new vnic port
+ * @port: ptr to opa_vnic_vema_port struct
+ * @vport_num: vnic port number (to be added)
+ *
+ * Return a pointer to the vnic adapter structure
+ */
+static struct opa_vnic_adapter *vema_add_vport(struct opa_vnic_vema_port *port,
+ u8 vport_num)
+{
+ struct opa_vnic_ctrl_port *cport = port->cport;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = opa_vnic_add_netdev(cport->ibdev, port->port_num, vport_num);
+ if (!IS_ERR(adapter)) {
+ int rc;
+
+ adapter->cport = cport;
+ rc = idr_alloc(&port->vport_idr, adapter, vport_num,
+ vport_num + 1, GFP_NOWAIT);
+ if (rc < 0) {
+ opa_vnic_rem_netdev(adapter);
+ adapter = ERR_PTR(rc);
+ }
+ }
+
+ return adapter;
+}
+
+/**
+ * vema_get_class_port_info -- Get class info for port
+ * @port: Port on whic MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function copies the latest class port info value set for the
+ * port and stores it for generating traps
+ */
+static void vema_get_class_port_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_class_port_info *port_info;
+
+ port_info = (struct opa_class_port_info *)rsp_mad->data;
+ memcpy(port_info, &port->class_port_info, sizeof(*port_info));
+ port_info->base_version = OPA_MGMT_BASE_VERSION,
+ port_info->class_version = OPA_EMA_CLASS_VERSION;
+
+ /*
+ * Set capability mask bit indicating agent generates traps,
+ * and set the maximum number of VNIC ports supported.
+ */
+ port_info->cap_mask = cpu_to_be16((OPA_VNIC_CLASS_CAP_TRAP |
+ (OPA_VNIC_MAX_NUM_VPORT << 8)));
+
+ /*
+ * Since a get routine is always sent by the EM first we
+ * set the expected response time to
+ * 4.096 usec * 2^18 == 1.0737 sec here.
+ */
+ port_info->cap_mask2_resp_time = cpu_to_be32(18);
+}
+
+/**
+ * vema_set_class_port_info -- Get class info for port
+ * @port: Port on whic MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function updates the port class info for the specific vnic
+ * and sets up the response mad data
+ */
+static void vema_set_class_port_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ memcpy(&port->class_port_info, recvd_mad->data,
+ sizeof(port->class_port_info));
+
+ vema_get_class_port_info(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_get_veswport_info -- Get veswport info
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ */
+static void vema_get_veswport_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_info *port_info =
+ (struct opa_veswport_info *)rsp_mad->data;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (adapter) {
+ memset(port_info, 0, sizeof(*port_info));
+ opa_vnic_get_vesw_info(adapter, &port_info->vesw);
+ opa_vnic_get_per_veswport_info(adapter,
+ &port_info->vport);
+ } else {
+ vema_get_pod_values(port_info);
+ }
+}
+
+/**
+ * vema_set_veswport_info -- Set veswport info
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function gets the port class infor for vnic
+ */
+static void vema_set_veswport_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_vnic_ctrl_port *cport = port->cport;
+ struct opa_veswport_info *port_info;
+ struct opa_vnic_adapter *adapter;
+ u8 vport_num;
+
+ vport_num = vema_get_vport_num(recvd_mad);
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ adapter = vema_add_vport(port, vport_num);
+ if (IS_ERR(adapter)) {
+ c_err("failed to add vport %d: %ld\n",
+ vport_num, PTR_ERR(adapter));
+ goto err_exit;
+ }
+ }
+
+ port_info = (struct opa_veswport_info *)recvd_mad->data;
+ opa_vnic_set_vesw_info(adapter, &port_info->vesw);
+ opa_vnic_set_per_veswport_info(adapter, &port_info->vport);
+
+ /* Process the new config settings */
+ opa_vnic_process_vema_config(adapter);
+
+ vema_get_veswport_info(port, recvd_mad, rsp_mad);
+ return;
+
+err_exit:
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+}
+
+/**
+ * vema_get_mac_entries -- Get MAC entries in VNIC MAC table
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function gets the MAC entries that are programmed into
+ * the VNIC MAC forwarding table. It checks for the validity of
+ * the index into the MAC table and the number of entries that
+ * are to be retrieved.
+ */
+static void vema_get_mac_entries(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_mactable *mac_tbl_in, *mac_tbl_out;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ mac_tbl_in = (struct opa_veswport_mactable *)recvd_mad->data;
+ mac_tbl_out = (struct opa_veswport_mactable *)rsp_mad->data;
+
+ if (vema_mac_tbl_req_ok(mac_tbl_in)) {
+ mac_tbl_out->offset = mac_tbl_in->offset;
+ mac_tbl_out->num_entries = mac_tbl_in->num_entries;
+ opa_vnic_query_mac_tbl(adapter, mac_tbl_out);
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ }
+}
+
+/**
+ * vema_set_mac_entries -- Set MAC entries in VNIC MAC table
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function sets the MAC entries in the VNIC forwarding table
+ * It checks for the validity of the index and the number of forwarding
+ * table entries to be programmed.
+ */
+static void vema_set_mac_entries(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_mactable *mac_tbl;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ mac_tbl = (struct opa_veswport_mactable *)recvd_mad->data;
+ if (vema_mac_tbl_req_ok(mac_tbl)) {
+ if (opa_vnic_update_mac_tbl(adapter, mac_tbl))
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ }
+ vema_get_mac_entries(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_set_delete_vesw -- Reset VESW info to POD values
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function clears all the fields of veswport info for the requested vesw
+ * and sets them back to the power-on default values. It does not delete the
+ * vesw.
+ */
+static void vema_set_delete_vesw(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_info *port_info =
+ (struct opa_veswport_info *)rsp_mad->data;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ vema_get_pod_values(port_info);
+ opa_vnic_set_vesw_info(adapter, &port_info->vesw);
+ opa_vnic_set_per_veswport_info(adapter, &port_info->vport);
+
+ /* Process the new config settings */
+ opa_vnic_process_vema_config(adapter);
+
+ opa_vnic_release_mac_tbl(adapter);
+
+ vema_get_veswport_info(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_get_mac_list -- Get the unicast/multicast macs.
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ * @attr_id: Attribute ID indicating multicast or unicast mac list
+ */
+static void vema_get_mac_list(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad,
+ u16 attr_id)
+{
+ struct opa_veswport_iface_macs *macs_in, *macs_out;
+ int max_entries = (OPA_VNIC_EMA_DATA - sizeof(*macs_out)) / ETH_ALEN;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ macs_in = (struct opa_veswport_iface_macs *)recvd_mad->data;
+ macs_out = (struct opa_veswport_iface_macs *)rsp_mad->data;
+
+ macs_out->start_idx = macs_in->start_idx;
+ if (macs_in->num_macs_in_msg)
+ macs_out->num_macs_in_msg = macs_in->num_macs_in_msg;
+ else
+ macs_out->num_macs_in_msg = cpu_to_be16(max_entries);
+
+ if (attr_id == OPA_EM_ATTR_IFACE_MCAST_MACS)
+ opa_vnic_query_mcast_macs(adapter, macs_out);
+ else
+ opa_vnic_query_ucast_macs(adapter, macs_out);
+}
+
+/**
+ * vema_get_summary_counters -- Gets summary counters.
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_get_summary_counters(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_summary_counters *cntrs;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (adapter) {
+ cntrs = (struct opa_veswport_summary_counters *)rsp_mad->data;
+ opa_vnic_get_summary_counters(adapter, cntrs);
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ }
+}
+
+/**
+ * vema_get_error_counters -- Gets summary counters.
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_get_error_counters(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_error_counters *cntrs;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (adapter) {
+ cntrs = (struct opa_veswport_error_counters *)rsp_mad->data;
+ opa_vnic_get_error_counters(adapter, cntrs);
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ }
+}
+
+/**
+ * vema_get -- Process received get MAD
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_get(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
+
+ switch (attr_id) {
+ case OPA_EM_ATTR_CLASS_PORT_INFO:
+ vema_get_class_port_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_INFO:
+ vema_get_veswport_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES:
+ vema_get_mac_entries(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_IFACE_UCAST_MACS:
+ /* fall through */
+ case OPA_EM_ATTR_IFACE_MCAST_MACS:
+ vema_get_mac_list(port, recvd_mad, rsp_mad, attr_id);
+ break;
+ case OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS:
+ vema_get_summary_counters(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS:
+ vema_get_error_counters(port, recvd_mad, rsp_mad);
+ break;
+ default:
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ break;
+ }
+}
+
+/**
+ * vema_set -- Process received set MAD
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_set(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
+
+ switch (attr_id) {
+ case OPA_EM_ATTR_CLASS_PORT_INFO:
+ vema_set_class_port_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_INFO:
+ vema_set_veswport_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES:
+ vema_set_mac_entries(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_DELETE_VESW:
+ vema_set_delete_vesw(port, recvd_mad, rsp_mad);
+ break;
+ default:
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ break;
+ }
+}
+
+/**
+ * vema_send -- Send handler for VEMA MAD agent
+ * @mad_agent: pointer to the mad agent
+ * @mad_wc: pointer to mad send work completion information
+ *
+ * Free all the data structures associated with the sent MAD
+ */
+static void vema_send(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_wc *mad_wc)
+{
+ rdma_destroy_ah(mad_wc->send_buf->ah);
+ ib_free_send_mad(mad_wc->send_buf);
+}
+
+/**
+ * vema_recv -- Recv handler for VEMA MAD agent
+ * @mad_agent: pointer to the mad agent
+ * @send_buf: Send buffer if found, else NULL
+ * @mad_wc: pointer to mad send work completion information
+ *
+ * Handle only set and get methods and respond to other methods
+ * as unsupported. Allocate response buffer and address handle
+ * for the response MAD.
+ */
+static void vema_recv(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
+ struct ib_mad_recv_wc *mad_wc)
+{
+ struct opa_vnic_vema_port *port;
+ struct ib_ah *ah;
+ struct ib_mad_send_buf *rsp;
+ struct opa_vnic_vema_mad *vema_mad;
+
+ if (!mad_wc || !mad_wc->recv_buf.mad)
+ return;
+
+ port = mad_agent->context;
+ ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
+ mad_wc->recv_buf.grh, mad_agent->port_num);
+ if (IS_ERR(ah))
+ goto free_recv_mad;
+
+ rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
+ mad_wc->wc->pkey_index, 0,
+ IB_MGMT_VENDOR_HDR, OPA_VNIC_EMA_DATA,
+ GFP_KERNEL, OPA_MGMT_BASE_VERSION);
+ if (IS_ERR(rsp))
+ goto err_rsp;
+
+ rsp->ah = ah;
+ vema_mad = rsp->mad;
+ memcpy(vema_mad, mad_wc->recv_buf.mad, IB_MGMT_VENDOR_HDR);
+ vema_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+ vema_mad->mad_hdr.status = 0;
+
+ /* Lock ensures network adapter is not removed */
+ mutex_lock(&port->lock);
+
+ switch (mad_wc->recv_buf.mad->mad_hdr.method) {
+ case IB_MGMT_METHOD_GET:
+ vema_get(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad,
+ vema_mad);
+ break;
+ case IB_MGMT_METHOD_SET:
+ vema_set(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad,
+ vema_mad);
+ break;
+ default:
+ vema_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ break;
+ }
+ mutex_unlock(&port->lock);
+
+ if (!ib_post_send_mad(rsp, NULL)) {
+ /*
+ * with post send successful ah and send mad
+ * will be destroyed in send handler
+ */
+ goto free_recv_mad;
+ }
+
+ ib_free_send_mad(rsp);
+
+err_rsp:
+ rdma_destroy_ah(ah);
+free_recv_mad:
+ ib_free_recv_mad(mad_wc);
+}
+
+/**
+ * vema_get_port -- Gets the opa_vnic_vema_port
+ * @cport: pointer to control dev
+ * @port_num: Port number
+ *
+ * This function loops through the ports and returns
+ * the opa_vnic_vema port structure that is associated
+ * with the OPA port number
+ *
+ * Return: ptr to requested opa_vnic_vema_port strucure
+ * if success, NULL if not
+ */
+static struct opa_vnic_vema_port *
+vema_get_port(struct opa_vnic_ctrl_port *cport, u8 port_num)
+{
+ struct opa_vnic_vema_port *port = (void *)cport + sizeof(*cport);
+
+ if (port_num > cport->num_ports)
+ return NULL;
+
+ return port + (port_num - 1);
+}
+
+/**
+ * opa_vnic_vema_send_trap -- This function sends a trap to the EM
+ * @cport: pointer to vnic control port
+ * @data: pointer to trap data filled by calling function
+ * @lid: issuers lid (encap_slid from vesw_port_info)
+ *
+ * This function is called from the VNIC driver to send a trap if there
+ * is somethng the EM should be notified about. These events currently
+ * are
+ * 1) UNICAST INTERFACE MACADDRESS changes
+ * 2) MULTICAST INTERFACE MACADDRESS changes
+ * 3) ETHERNET LINK STATUS changes
+ * While allocating the send mad the remote site qpn used is 1
+ * as this is the well known QP.
+ *
+ */
+void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
+ struct __opa_veswport_trap *data, u32 lid)
+{
+ struct opa_vnic_ctrl_port *cport = adapter->cport;
+ struct ib_mad_send_buf *send_buf;
+ struct opa_vnic_vema_port *port;
+ struct ib_device *ibp;
+ struct opa_vnic_vema_mad_trap *trap_mad;
+ struct opa_class_port_info *class;
+ struct rdma_ah_attr ah_attr;
+ struct ib_ah *ah;
+ struct opa_veswport_trap *trap;
+ u32 trap_lid;
+ u16 pkey_idx;
+
+ if (!cport)
+ goto err_exit;
+ ibp = cport->ibdev;
+ port = vema_get_port(cport, data->opaportnum);
+ if (!port || !port->mad_agent)
+ goto err_exit;
+
+ if (time_before(jiffies, adapter->trap_timeout)) {
+ if (adapter->trap_count == OPA_VNIC_TRAP_BURST_LIMIT) {
+ v_warn("Trap rate exceeded\n");
+ goto err_exit;
+ } else {
+ adapter->trap_count++;
+ }
+ } else {
+ adapter->trap_count = 0;
+ }
+
+ class = &port->class_port_info;
+ /* Set up address handle */
+ memset(&ah_attr, 0, sizeof(ah_attr));
+ ah_attr.type = rdma_ah_find_type(ibp, port->port_num);
+ rdma_ah_set_sl(&ah_attr,
+ GET_TRAP_SL_FROM_CLASS_PORT_INFO(class->trap_sl_rsvd));
+ rdma_ah_set_port_num(&ah_attr, port->port_num);
+ trap_lid = be32_to_cpu(class->trap_lid);
+ /*
+ * check for trap lid validity, must not be zero
+ * The trap sink could change after we fashion the MAD but since traps
+ * are not guaranteed we won't use a lock as anyway the change will take
+ * place even with locking.
+ */
+ if (!trap_lid) {
+ c_err("%s: Invalid dlid\n", __func__);
+ goto err_exit;
+ }
+
+ rdma_ah_set_dlid(&ah_attr, trap_lid);
+ ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr);
+ if (IS_ERR(ah)) {
+ c_err("%s:Couldn't create new AH = %p\n", __func__, ah);
+ c_err("%s:dlid = %d, sl = %d, port = %d\n", __func__,
+ rdma_ah_get_dlid(&ah_attr), rdma_ah_get_sl(&ah_attr),
+ rdma_ah_get_port_num(&ah_attr));
+ goto err_exit;
+ }
+
+ if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_FULL,
+ &pkey_idx) < 0) {
+ c_err("%s:full key not found, defaulting to partial\n",
+ __func__);
+ if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_PARTIAL,
+ &pkey_idx) < 0)
+ pkey_idx = 1;
+ }
+
+ send_buf = ib_create_send_mad(port->mad_agent, 1, pkey_idx, 0,
+ IB_MGMT_VENDOR_HDR, IB_MGMT_MAD_DATA,
+ GFP_ATOMIC, OPA_MGMT_BASE_VERSION);
+ if (IS_ERR(send_buf)) {
+ c_err("%s:Couldn't allocate send buf\n", __func__);
+ goto err_sndbuf;
+ }
+
+ send_buf->ah = ah;
+
+ /* Set up common MAD hdr */
+ trap_mad = send_buf->mad;
+ trap_mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION;
+ trap_mad->mad_hdr.mgmt_class = OPA_MGMT_CLASS_INTEL_EMA;
+ trap_mad->mad_hdr.class_version = OPA_EMA_CLASS_VERSION;
+ trap_mad->mad_hdr.method = IB_MGMT_METHOD_TRAP;
+ port->tid++;
+ trap_mad->mad_hdr.tid = cpu_to_be64(port->tid);
+ trap_mad->mad_hdr.attr_id = IB_SMP_ATTR_NOTICE;
+
+ /* Set up vendor OUI */
+ trap_mad->oui[0] = INTEL_OUI_1;
+ trap_mad->oui[1] = INTEL_OUI_2;
+ trap_mad->oui[2] = INTEL_OUI_3;
+
+ /* Setup notice attribute portion */
+ trap_mad->notice.gen_type = OPA_INTEL_EMA_NOTICE_TYPE_INFO << 1;
+ trap_mad->notice.oui_1 = INTEL_OUI_1;
+ trap_mad->notice.oui_2 = INTEL_OUI_2;
+ trap_mad->notice.oui_3 = INTEL_OUI_3;
+ trap_mad->notice.issuer_lid = cpu_to_be32(lid);
+
+ /* copy the actual trap data */
+ trap = (struct opa_veswport_trap *)trap_mad->notice.raw_data;
+ trap->fabric_id = cpu_to_be16(data->fabric_id);
+ trap->veswid = cpu_to_be16(data->veswid);
+ trap->veswportnum = cpu_to_be32(data->veswportnum);
+ trap->opaportnum = cpu_to_be16(data->opaportnum);
+ trap->veswportindex = data->veswportindex;
+ trap->opcode = data->opcode;
+
+ /* If successful send set up rate limit timeout else bail */
+ if (ib_post_send_mad(send_buf, NULL)) {
+ ib_free_send_mad(send_buf);
+ } else {
+ if (adapter->trap_count)
+ return;
+ adapter->trap_timeout = jiffies +
+ usecs_to_jiffies(OPA_VNIC_TRAP_TIMEOUT);
+ return;
+ }
+
+err_sndbuf:
+ rdma_destroy_ah(ah);
+err_exit:
+ v_err("Aborting trap\n");
+}
+
+static int vema_rem_vport(int id, void *p, void *data)
+{
+ struct opa_vnic_adapter *adapter = p;
+
+ opa_vnic_rem_netdev(adapter);
+ return 0;
+}
+
+static int vema_enable_vport(int id, void *p, void *data)
+{
+ struct opa_vnic_adapter *adapter = p;
+
+ netif_carrier_on(adapter->netdev);
+ return 0;
+}
+
+static int vema_disable_vport(int id, void *p, void *data)
+{
+ struct opa_vnic_adapter *adapter = p;
+
+ netif_carrier_off(adapter->netdev);
+ return 0;
+}
+
+static void opa_vnic_event(struct ib_event_handler *handler,
+ struct ib_event *record)
+{
+ struct opa_vnic_vema_port *port =
+ container_of(handler, struct opa_vnic_vema_port, event_handler);
+ struct opa_vnic_ctrl_port *cport = port->cport;
+
+ if (record->element.port_num != port->port_num)
+ return;
+
+ c_dbg("OPA_VNIC received event %d on device %s port %d\n",
+ record->event, record->device->name, record->element.port_num);
+
+ if (record->event == IB_EVENT_PORT_ERR)
+ idr_for_each(&port->vport_idr, vema_disable_vport, NULL);
+ if (record->event == IB_EVENT_PORT_ACTIVE)
+ idr_for_each(&port->vport_idr, vema_enable_vport, NULL);
+}
+
+/**
+ * vema_unregister -- Unregisters agent
+ * @cport: pointer to control port
+ *
+ * This deletes the registration by VEMA for MADs
+ */
+static void vema_unregister(struct opa_vnic_ctrl_port *cport)
+{
+ int i;
+
+ for (i = 1; i <= cport->num_ports; i++) {
+ struct opa_vnic_vema_port *port = vema_get_port(cport, i);
+
+ if (!port->mad_agent)
+ continue;
+
+ /* Lock ensures no MAD is being processed */
+ mutex_lock(&port->lock);
+ idr_for_each(&port->vport_idr, vema_rem_vport, NULL);
+ mutex_unlock(&port->lock);
+
+ ib_unregister_mad_agent(port->mad_agent);
+ port->mad_agent = NULL;
+ mutex_destroy(&port->lock);
+ idr_destroy(&port->vport_idr);
+ ib_unregister_event_handler(&port->event_handler);
+ }
+}
+
+/**
+ * vema_register -- Registers agent
+ * @cport: pointer to control port
+ *
+ * This function registers the handlers for the VEMA MADs
+ *
+ * Return: returns 0 on success. non zero otherwise
+ */
+static int vema_register(struct opa_vnic_ctrl_port *cport)
+{
+ struct ib_mad_reg_req reg_req = {
+ .mgmt_class = OPA_MGMT_CLASS_INTEL_EMA,
+ .mgmt_class_version = OPA_MGMT_BASE_VERSION,
+ .oui = { INTEL_OUI_1, INTEL_OUI_2, INTEL_OUI_3 }
+ };
+ int i;
+
+ set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
+ set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
+
+ /* register ib event handler and mad agent for each port on dev */
+ for (i = 1; i <= cport->num_ports; i++) {
+ struct opa_vnic_vema_port *port = vema_get_port(cport, i);
+ int ret;
+
+ port->cport = cport;
+ port->port_num = i;
+
+ INIT_IB_EVENT_HANDLER(&port->event_handler,
+ cport->ibdev, opa_vnic_event);
+ ret = ib_register_event_handler(&port->event_handler);
+ if (ret) {
+ c_err("port %d: event handler register failed\n", i);
+ vema_unregister(cport);
+ return ret;
+ }
+
+ idr_init(&port->vport_idr);
+ mutex_init(&port->lock);
+ port->mad_agent = ib_register_mad_agent(cport->ibdev, i,
+ IB_QPT_GSI, &reg_req,
+ IB_MGMT_RMPP_VERSION,
+ vema_send, vema_recv,
+ port, 0);
+ if (IS_ERR(port->mad_agent)) {
+ ret = PTR_ERR(port->mad_agent);
+ port->mad_agent = NULL;
+ mutex_destroy(&port->lock);
+ idr_destroy(&port->vport_idr);
+ vema_unregister(cport);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * opa_vnic_vema_add_one -- Handle new ib device
+ * @device: ib device pointer
+ *
+ * Allocate the vnic control port and initialize it.
+ */
+static void opa_vnic_vema_add_one(struct ib_device *device)
+{
+ struct opa_vnic_ctrl_port *cport;
+ int rc, size = sizeof(*cport);
+
+ if (!rdma_cap_opa_vnic(device))
+ return;
+
+ size += device->phys_port_cnt * sizeof(struct opa_vnic_vema_port);
+ cport = kzalloc(size, GFP_KERNEL);
+ if (!cport)
+ return;
+
+ cport->num_ports = device->phys_port_cnt;
+ cport->ibdev = device;
+
+ /* Initialize opa vnic management agent (vema) */
+ rc = vema_register(cport);
+ if (!rc)
+ c_info("VNIC client initialized\n");
+
+ ib_set_client_data(device, &opa_vnic_client, cport);
+}
+
+/**
+ * opa_vnic_vema_rem_one -- Handle ib device removal
+ * @device: ib device pointer
+ * @client_data: ib client data
+ *
+ * Uninitialize and free the vnic control port.
+ */
+static void opa_vnic_vema_rem_one(struct ib_device *device,
+ void *client_data)
+{
+ struct opa_vnic_ctrl_port *cport = client_data;
+
+ if (!cport)
+ return;
+
+ c_info("removing VNIC client\n");
+ vema_unregister(cport);
+ kfree(cport);
+}
+
+static int __init opa_vnic_init(void)
+{
+ int rc;
+
+ pr_info("OPA Virtual Network Driver - v%s\n",
+ opa_vnic_driver_version);
+
+ rc = ib_register_client(&opa_vnic_client);
+ if (rc)
+ pr_err("VNIC driver register failed %d\n", rc);
+
+ return rc;
+}
+module_init(opa_vnic_init);
+
+static void opa_vnic_deinit(void)
+{
+ ib_unregister_client(&opa_vnic_client);
+}
+module_exit(opa_vnic_deinit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel OPA Virtual Network driver");
+MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
new file mode 100644
index 000000000000..c2733964379c
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC EMA Interface functions.
+ */
+
+#include "opa_vnic_internal.h"
+
+/**
+ * opa_vnic_vema_report_event - sent trap to report the specified event
+ * @adapter: vnic port adapter
+ * @event: event to be reported
+ *
+ * This function calls vema api to sent a trap for the given event.
+ */
+void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct __opa_veswport_trap trap_data;
+
+ trap_data.fabric_id = info->vesw.fabric_id;
+ trap_data.veswid = info->vesw.vesw_id;
+ trap_data.veswportnum = info->vport.port_num;
+ trap_data.opaportnum = adapter->port_num;
+ trap_data.veswportindex = adapter->vport_num;
+ trap_data.opcode = event;
+
+ opa_vnic_vema_send_trap(adapter, &trap_data, info->vport.encap_slid);
+}
+
+/**
+ * opa_vnic_get_error_counters - get summary counters
+ * @adapter: vnic port adapter
+ * @cntrs: pointer to destination summary counters structure
+ *
+ * This function populates the summary counters that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_summary_counters *cntrs)
+{
+ struct opa_vnic_stats vstats;
+ __be64 *dst;
+ u64 *src;
+
+ memset(&vstats, 0, sizeof(vstats));
+ spin_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats);
+ spin_unlock(&adapter->stats_lock);
+
+ cntrs->vp_instance = cpu_to_be16(adapter->vport_num);
+ cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id);
+ cntrs->veswport_num = cpu_to_be32(adapter->port_num);
+
+ cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors);
+ cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors);
+ cntrs->tx_packets = cpu_to_be64(vstats.netstats.tx_packets);
+ cntrs->rx_packets = cpu_to_be64(vstats.netstats.rx_packets);
+ cntrs->tx_bytes = cpu_to_be64(vstats.netstats.tx_bytes);
+ cntrs->rx_bytes = cpu_to_be64(vstats.netstats.rx_bytes);
+
+ /*
+ * This loop depends on layout of
+ * opa_veswport_summary_counters opa_vnic_stats structures.
+ */
+ for (dst = &cntrs->tx_unicast, src = &vstats.tx_grp.unicast;
+ dst < &cntrs->reserved[0]; dst++, src++) {
+ *dst = cpu_to_be64(*src);
+ }
+}
+
+/**
+ * opa_vnic_get_error_counters - get error counters
+ * @adapter: vnic port adapter
+ * @cntrs: pointer to destination error counters structure
+ *
+ * This function populates the error counters that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_error_counters *cntrs)
+{
+ struct opa_vnic_stats vstats;
+
+ memset(&vstats, 0, sizeof(vstats));
+ spin_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats);
+ spin_unlock(&adapter->stats_lock);
+
+ cntrs->vp_instance = cpu_to_be16(adapter->vport_num);
+ cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id);
+ cntrs->veswport_num = cpu_to_be32(adapter->port_num);
+
+ cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors);
+ cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors);
+ cntrs->tx_dlid_zero = cpu_to_be64(vstats.tx_dlid_zero);
+ cntrs->tx_drop_state = cpu_to_be64(vstats.tx_drop_state);
+ cntrs->tx_logic = cpu_to_be64(vstats.netstats.tx_fifo_errors +
+ vstats.netstats.tx_carrier_errors);
+
+ cntrs->rx_bad_veswid = cpu_to_be64(vstats.netstats.rx_nohandler);
+ cntrs->rx_runt = cpu_to_be64(vstats.rx_runt);
+ cntrs->rx_oversize = cpu_to_be64(vstats.rx_oversize);
+ cntrs->rx_drop_state = cpu_to_be64(vstats.rx_drop_state);
+ cntrs->rx_logic = cpu_to_be64(vstats.netstats.rx_fifo_errors);
+}
+
+/**
+ * opa_vnic_get_vesw_info -- Get the vesw information
+ * @adapter: vnic port adapter
+ * @info: pointer to destination vesw info structure
+ *
+ * This function copies the vesw info that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info)
+{
+ struct __opa_vesw_info *src = &adapter->info.vesw;
+ int i;
+
+ info->fabric_id = cpu_to_be16(src->fabric_id);
+ info->vesw_id = cpu_to_be16(src->vesw_id);
+ memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0));
+ info->def_port_mask = cpu_to_be16(src->def_port_mask);
+ memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1));
+ info->pkey = cpu_to_be16(src->pkey);
+
+ memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2));
+ info->u_mcast_dlid = cpu_to_be32(src->u_mcast_dlid);
+ for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++)
+ info->u_ucast_dlid[i] = cpu_to_be32(src->u_ucast_dlid[i]);
+
+ memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3));
+ for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++)
+ info->eth_mtu[i] = cpu_to_be16(src->eth_mtu[i]);
+
+ info->eth_mtu_non_vlan = cpu_to_be16(src->eth_mtu_non_vlan);
+ memcpy(info->rsvd4, src->rsvd4, ARRAY_SIZE(src->rsvd4));
+}
+
+/**
+ * opa_vnic_set_vesw_info -- Set the vesw information
+ * @adapter: vnic port adapter
+ * @info: pointer to vesw info structure
+ *
+ * This function updates the vesw info that is maintained by the
+ * given adapter with vesw info provided. Reserved fields are stored
+ * and returned back to EM as is.
+ */
+void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info)
+{
+ struct __opa_vesw_info *dst = &adapter->info.vesw;
+ int i;
+
+ dst->fabric_id = be16_to_cpu(info->fabric_id);
+ dst->vesw_id = be16_to_cpu(info->vesw_id);
+ memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0));
+ dst->def_port_mask = be16_to_cpu(info->def_port_mask);
+ memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1));
+ dst->pkey = be16_to_cpu(info->pkey);
+
+ memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2));
+ dst->u_mcast_dlid = be32_to_cpu(info->u_mcast_dlid);
+ for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++)
+ dst->u_ucast_dlid[i] = be32_to_cpu(info->u_ucast_dlid[i]);
+
+ memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3));
+ for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++)
+ dst->eth_mtu[i] = be16_to_cpu(info->eth_mtu[i]);
+
+ dst->eth_mtu_non_vlan = be16_to_cpu(info->eth_mtu_non_vlan);
+ memcpy(dst->rsvd4, info->rsvd4, ARRAY_SIZE(info->rsvd4));
+}
+
+/**
+ * opa_vnic_get_per_veswport_info -- Get the vesw per port information
+ * @adapter: vnic port adapter
+ * @info: pointer to destination vport info structure
+ *
+ * This function copies the vesw per port info that is maintained by the
+ * given adapter to destination address provided.
+ * Note that the read only fields are not copied.
+ */
+void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info)
+{
+ struct __opa_per_veswport_info *src = &adapter->info.vport;
+
+ info->port_num = cpu_to_be32(src->port_num);
+ info->eth_link_status = src->eth_link_status;
+ memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0));
+
+ memcpy(info->base_mac_addr, src->base_mac_addr,
+ ARRAY_SIZE(info->base_mac_addr));
+ info->config_state = src->config_state;
+ info->oper_state = src->oper_state;
+ info->max_mac_tbl_ent = cpu_to_be16(src->max_mac_tbl_ent);
+ info->max_smac_ent = cpu_to_be16(src->max_smac_ent);
+ info->mac_tbl_digest = cpu_to_be32(src->mac_tbl_digest);
+ memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1));
+
+ info->encap_slid = cpu_to_be32(src->encap_slid);
+ memcpy(info->pcp_to_sc_uc, src->pcp_to_sc_uc,
+ ARRAY_SIZE(info->pcp_to_sc_uc));
+ memcpy(info->pcp_to_vl_uc, src->pcp_to_vl_uc,
+ ARRAY_SIZE(info->pcp_to_vl_uc));
+ memcpy(info->pcp_to_sc_mc, src->pcp_to_sc_mc,
+ ARRAY_SIZE(info->pcp_to_sc_mc));
+ memcpy(info->pcp_to_vl_mc, src->pcp_to_vl_mc,
+ ARRAY_SIZE(info->pcp_to_vl_mc));
+ info->non_vlan_sc_uc = src->non_vlan_sc_uc;
+ info->non_vlan_vl_uc = src->non_vlan_vl_uc;
+ info->non_vlan_sc_mc = src->non_vlan_sc_mc;
+ info->non_vlan_vl_mc = src->non_vlan_vl_mc;
+ memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2));
+
+ info->uc_macs_gen_count = cpu_to_be16(src->uc_macs_gen_count);
+ info->mc_macs_gen_count = cpu_to_be16(src->mc_macs_gen_count);
+ memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3));
+}
+
+/**
+ * opa_vnic_set_per_veswport_info -- Set vesw per port information
+ * @adapter: vnic port adapter
+ * @info: pointer to vport info structure
+ *
+ * This function updates the vesw per port info that is maintained by the
+ * given adapter with vesw per port info provided. Reserved fields are
+ * stored and returned back to EM as is.
+ */
+void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info)
+{
+ struct __opa_per_veswport_info *dst = &adapter->info.vport;
+
+ dst->port_num = be32_to_cpu(info->port_num);
+ memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0));
+
+ memcpy(dst->base_mac_addr, info->base_mac_addr,
+ ARRAY_SIZE(dst->base_mac_addr));
+ dst->config_state = info->config_state;
+ memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1));
+
+ dst->encap_slid = be32_to_cpu(info->encap_slid);
+ memcpy(dst->pcp_to_sc_uc, info->pcp_to_sc_uc,
+ ARRAY_SIZE(dst->pcp_to_sc_uc));
+ memcpy(dst->pcp_to_vl_uc, info->pcp_to_vl_uc,
+ ARRAY_SIZE(dst->pcp_to_vl_uc));
+ memcpy(dst->pcp_to_sc_mc, info->pcp_to_sc_mc,
+ ARRAY_SIZE(dst->pcp_to_sc_mc));
+ memcpy(dst->pcp_to_vl_mc, info->pcp_to_vl_mc,
+ ARRAY_SIZE(dst->pcp_to_vl_mc));
+ dst->non_vlan_sc_uc = info->non_vlan_sc_uc;
+ dst->non_vlan_vl_uc = info->non_vlan_vl_uc;
+ dst->non_vlan_sc_mc = info->non_vlan_sc_mc;
+ dst->non_vlan_vl_mc = info->non_vlan_vl_mc;
+ memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2));
+ memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3));
+}
+
+/**
+ * opa_vnic_query_mcast_macs - query multicast mac list
+ * @adapter: vnic port adapter
+ * @macs: pointer mac list
+ *
+ * This function populates the provided mac list with the configured
+ * multicast addresses in the adapter.
+ */
+void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs)
+{
+ u16 start_idx, num_macs, idx = 0, count = 0;
+ struct netdev_hw_addr *ha;
+
+ start_idx = be16_to_cpu(macs->start_idx);
+ num_macs = be16_to_cpu(macs->num_macs_in_msg);
+ netdev_for_each_mc_addr(ha, adapter->netdev) {
+ struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+ if (start_idx > idx++)
+ continue;
+ else if (num_macs == count)
+ break;
+ memcpy(entry, ha->addr, sizeof(*entry));
+ count++;
+ }
+
+ macs->tot_macs_in_lst = cpu_to_be16(netdev_mc_count(adapter->netdev));
+ macs->num_macs_in_msg = cpu_to_be16(count);
+ macs->gen_count = cpu_to_be16(adapter->info.vport.mc_macs_gen_count);
+}
+
+/**
+ * opa_vnic_query_ucast_macs - query unicast mac list
+ * @adapter: vnic port adapter
+ * @macs: pointer mac list
+ *
+ * This function populates the provided mac list with the configured
+ * unicast addresses in the adapter.
+ */
+void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs)
+{
+ u16 start_idx, tot_macs, num_macs, idx = 0, count = 0;
+ struct netdev_hw_addr *ha;
+
+ start_idx = be16_to_cpu(macs->start_idx);
+ num_macs = be16_to_cpu(macs->num_macs_in_msg);
+ /* loop through dev_addrs list first */
+ for_each_dev_addr(adapter->netdev, ha) {
+ struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+ /* Do not include EM specified MAC address */
+ if (!memcmp(adapter->info.vport.base_mac_addr, ha->addr,
+ ARRAY_SIZE(adapter->info.vport.base_mac_addr)))
+ continue;
+
+ if (start_idx > idx++)
+ continue;
+ else if (num_macs == count)
+ break;
+ memcpy(entry, ha->addr, sizeof(*entry));
+ count++;
+ }
+
+ /* loop through uc list */
+ netdev_for_each_uc_addr(ha, adapter->netdev) {
+ struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+ if (start_idx > idx++)
+ continue;
+ else if (num_macs == count)
+ break;
+ memcpy(entry, ha->addr, sizeof(*entry));
+ count++;
+ }
+
+ tot_macs = netdev_hw_addr_list_count(&adapter->netdev->dev_addrs) +
+ netdev_uc_count(adapter->netdev);
+ macs->tot_macs_in_lst = cpu_to_be16(tot_macs);
+ macs->num_macs_in_msg = cpu_to_be16(count);
+ macs->gen_count = cpu_to_be16(adapter->info.vport.uc_macs_gen_count);
+}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 79bf48477ddb..2354c742caa1 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -40,6 +40,7 @@
#include <linux/parser.h>
#include <linux/random.h>
#include <linux/jiffies.h>
+#include <linux/lockdep.h>
#include <rdma/ib_cache.h>
#include <linux/atomic.h>
@@ -311,6 +312,11 @@ static int srp_new_cm_id(struct srp_rdma_ch *ch)
if (ch->cm_id)
ib_destroy_cm_id(ch->cm_id);
ch->cm_id = new_cm_id;
+ if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
+ target->srp_host->port))
+ ch->path.rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ ch->path.rec_type = SA_PATH_REC_TYPE_IB;
ch->path.sgid = target->sgid;
ch->path.dgid = target->orig_dgid;
ch->path.pkey = target->pkey;
@@ -371,7 +377,6 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
struct srp_fr_desc *d;
struct ib_mr *mr;
int i, ret = -EINVAL;
- enum ib_mr_type mr_type;
if (pool_size <= 0)
goto err;
@@ -385,13 +390,9 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
spin_lock_init(&pool->lock);
INIT_LIST_HEAD(&pool->free_list);
- if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
- mr_type = IB_MR_TYPE_SG_GAPS;
- else
- mr_type = IB_MR_TYPE_MEM_REG;
-
for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
- mr = ib_alloc_mr(pd, mr_type, max_page_list_len);
+ mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
+ max_page_list_len);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
if (ret == -ENOMEM)
@@ -470,9 +471,13 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
* completion handler can access the queue pair while it is
* being destroyed.
*/
-static void srp_destroy_qp(struct ib_qp *qp)
+static void srp_destroy_qp(struct srp_rdma_ch *ch, struct ib_qp *qp)
{
- ib_drain_rq(qp);
+ spin_lock_irq(&ch->lock);
+ ib_process_cq_direct(ch->send_cq, -1);
+ spin_unlock_irq(&ch->lock);
+
+ ib_drain_qp(qp);
ib_destroy_qp(qp);
}
@@ -546,7 +551,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
}
if (ch->qp)
- srp_destroy_qp(ch->qp);
+ srp_destroy_qp(ch, ch->qp);
if (ch->recv_cq)
ib_free_cq(ch->recv_cq);
if (ch->send_cq)
@@ -570,7 +575,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
return 0;
err_qp:
- srp_destroy_qp(qp);
+ ib_destroy_qp(qp);
err_send_cq:
ib_free_cq(send_cq);
@@ -613,7 +618,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
ib_destroy_fmr_pool(ch->fmr_pool);
}
- srp_destroy_qp(ch->qp);
+ srp_destroy_qp(ch, ch->qp);
ib_free_cq(ch->send_cq);
ib_free_cq(ch->recv_cq);
@@ -643,7 +648,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
}
static void srp_path_rec_completion(int status,
- struct ib_sa_path_rec *pathrec,
+ struct sa_path_rec *pathrec,
void *ch_ptr)
{
struct srp_rdma_ch *ch = ch_ptr;
@@ -1804,6 +1809,8 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
struct srp_iu *iu;
+ lockdep_assert_held(&ch->lock);
+
ib_process_cq_direct(ch->send_cq, -1);
if (list_empty(&ch->free_tx))
@@ -1824,6 +1831,11 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
return iu;
}
+/*
+ * Note: if this function is called from inside ib_drain_sq() then it will
+ * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
+ * with status IB_WC_SUCCESS then that's a bug.
+ */
static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
@@ -1834,6 +1846,8 @@ static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
+ lockdep_assert_held(&ch->lock);
+
list_add(&iu->list, &ch->free_tx);
}
@@ -1889,17 +1903,24 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
spin_lock_irqsave(&ch->lock, flags);
ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
+ if (rsp->tag == ch->tsk_mgmt_tag) {
+ ch->tsk_mgmt_status = -1;
+ if (be32_to_cpu(rsp->resp_data_len) >= 4)
+ ch->tsk_mgmt_status = rsp->data[3];
+ complete(&ch->tsk_mgmt_done);
+ } else {
+ shost_printk(KERN_ERR, target->scsi_host,
+ "Received tsk mgmt response too late for tag %#llx\n",
+ rsp->tag);
+ }
spin_unlock_irqrestore(&ch->lock, flags);
-
- ch->tsk_mgmt_status = -1;
- if (be32_to_cpu(rsp->resp_data_len) >= 4)
- ch->tsk_mgmt_status = rsp->data[3];
- complete(&ch->tsk_mgmt_done);
} else {
scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
- if (scmnd) {
+ if (scmnd && scmnd->host_scribble) {
req = (void *)scmnd->host_scribble;
scmnd = srp_claim_req(ch, req, NULL, scmnd);
+ } else {
+ scmnd = NULL;
}
if (!scmnd) {
shost_printk(KERN_ERR, target->scsi_host,
@@ -2383,12 +2404,12 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
switch (event->param.rej_rcvd.reason) {
case IB_CM_REJ_PORT_CM_REDIRECT:
cpi = event->param.rej_rcvd.ari;
- ch->path.dlid = cpi->redirect_lid;
+ sa_path_set_dlid(&ch->path, htonl(ntohs(cpi->redirect_lid)));
ch->path.pkey = cpi->redirect_pkey;
cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
- ch->status = ch->path.dlid ?
+ ch->status = sa_path_get_dlid(&ch->path) ?
SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
break;
@@ -2531,19 +2552,18 @@ srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
}
static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
- u8 func)
+ u8 func, u8 *status)
{
struct srp_target_port *target = ch->target;
struct srp_rport *rport = target->rport;
struct ib_device *dev = target->srp_host->srp_dev->dev;
struct srp_iu *iu;
struct srp_tsk_mgmt *tsk_mgmt;
+ int res;
if (!ch->connected || target->qp_in_error)
return -1;
- init_completion(&ch->tsk_mgmt_done);
-
/*
* Lock the rport mutex to avoid that srp_create_ch_ib() is
* invoked while a task management function is being sent.
@@ -2566,10 +2586,16 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
tsk_mgmt->opcode = SRP_TSK_MGMT;
int_to_scsilun(lun, &tsk_mgmt->lun);
- tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
tsk_mgmt->tsk_mgmt_func = func;
tsk_mgmt->task_tag = req_tag;
+ spin_lock_irq(&ch->lock);
+ ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
+ tsk_mgmt->tag = ch->tsk_mgmt_tag;
+ spin_unlock_irq(&ch->lock);
+
+ init_completion(&ch->tsk_mgmt_done);
+
ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
DMA_TO_DEVICE);
if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
@@ -2578,13 +2604,15 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
return -1;
}
+ res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
+ msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
+ if (res > 0 && status)
+ *status = ch->tsk_mgmt_status;
mutex_unlock(&rport->mutex);
- if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
- msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
- return -1;
+ WARN_ON_ONCE(res < 0);
- return 0;
+ return res > 0 ? 0 : -1;
}
static int srp_abort(struct scsi_cmnd *scmnd)
@@ -2610,7 +2638,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
shost_printk(KERN_ERR, target->scsi_host,
"Sending SRP abort for tag %#x\n", tag);
if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
- SRP_TSK_ABORT_TASK) == 0)
+ SRP_TSK_ABORT_TASK, NULL) == 0)
ret = SUCCESS;
else if (target->rport->state == SRP_RPORT_LOST)
ret = FAST_IO_FAIL;
@@ -2628,14 +2656,15 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
struct srp_target_port *target = host_to_target(scmnd->device->host);
struct srp_rdma_ch *ch;
int i;
+ u8 status;
shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
ch = &target->ch[0];
if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
- SRP_TSK_LUN_RESET))
+ SRP_TSK_LUN_RESET, &status))
return FAILED;
- if (ch->tsk_mgmt_status)
+ if (status)
return FAILED;
for (i = 0; i < target->ch_count; i++) {
@@ -2664,9 +2693,8 @@ static int srp_slave_alloc(struct scsi_device *sdev)
struct Scsi_Host *shost = sdev->host;
struct srp_target_port *target = host_to_target(shost);
struct srp_device *srp_dev = target->srp_host->srp_dev;
- struct ib_device *ibdev = srp_dev->dev;
- if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
+ if (true)
blk_queue_virt_boundary(sdev->request_queue,
~srp_dev->mr_page_mask);
@@ -2869,6 +2897,7 @@ static struct scsi_host_template srp_template = {
.info = srp_target_info,
.queuecommand = srp_queuecommand,
.change_queue_depth = srp_change_queue_depth,
+ .eh_timed_out = srp_timed_out,
.eh_abort_handler = srp_abort,
.eh_device_reset_handler = srp_reset_device,
.eh_host_reset_handler = srp_reset_host,
@@ -2909,7 +2938,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
sprintf(target->target_name, "SRP.T10:%016llX",
be64_to_cpu(target->id_ext));
- if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
+ if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
return -ENODEV;
memcpy(ids.port_id, &target->id_ext, 8);
@@ -3421,11 +3450,12 @@ static ssize_t srp_create_target(struct device *dev,
ret = srp_connect_ch(ch, multich);
if (ret) {
shost_printk(KERN_ERR, target->scsi_host,
- PFX "Connection %d/%d failed\n",
+ PFX "Connection %d/%d to %pI6 failed\n",
ch_start + cpu_idx,
- target->ch_count);
+ target->ch_count,
+ ch->target->orig_dgid.raw);
if (node_idx == 0 && cpu_idx == 0) {
- goto err_disconnect;
+ goto free_ch;
} else {
srp_free_ch_ib(target, ch);
srp_free_req_data(target, ch);
@@ -3472,6 +3502,7 @@ put:
err_disconnect:
srp_disconnect_target(target);
+free_ch:
for (i = 0; i < target->ch_count; i++) {
ch = &target->ch[i];
srp_free_ch_ib(target, ch);
@@ -3520,7 +3551,7 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
host->port = port;
host->dev.class = &srp_class;
- host->dev.parent = device->dev->dma_device;
+ host->dev.parent = device->dev->dev.parent;
dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
if (device_register(&host->dev))
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 21c69695f9d4..ab9077b81d5a 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -152,7 +152,7 @@ struct srp_rdma_ch {
struct completion done;
int status;
- struct ib_sa_path_rec path;
+ struct sa_path_rec path;
struct ib_sa_query *path_query;
int path_query_id;
@@ -163,6 +163,7 @@ struct srp_rdma_ch {
int max_ti_iu_len;
int comp_vector;
+ u64 tsk_mgmt_tag;
struct completion tsk_mgmt_done;
u8 tsk_mgmt_status;
bool connected;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index d21ba9d857c3..402275be0931 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -417,7 +417,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_wc)
{
- ib_destroy_ah(mad_wc->send_buf->ah);
+ rdma_destroy_ah(mad_wc->send_buf->ah);
ib_free_send_mad(mad_wc->send_buf);
}
@@ -481,7 +481,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
ib_free_send_mad(rsp);
err_rsp:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
err:
ib_free_recv_mad(mad_wc);
}
@@ -500,6 +500,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
struct ib_mad_reg_req reg_req;
struct ib_port_modify port_modify;
struct ib_port_attr port_attr;
+ __be16 *guid;
int ret;
memset(&port_modify, 0, sizeof(port_modify));
@@ -522,10 +523,17 @@ static int srpt_refresh_port(struct srpt_port *sport)
if (ret)
goto err_query_port;
+ sport->port_guid_wwn.priv = sport;
+ guid = (__be16 *)&sport->gid.global.interface_id;
snprintf(sport->port_guid, sizeof(sport->port_guid),
- "0x%016llx%016llx",
- be64_to_cpu(sport->gid.global.subnet_prefix),
- be64_to_cpu(sport->gid.global.interface_id));
+ "%04x:%04x:%04x:%04x",
+ be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
+ be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
+ sport->port_gid_wwn.priv = sport;
+ snprintf(sport->port_gid, sizeof(sport->port_gid),
+ "0x%016llx%016llx",
+ be64_to_cpu(sport->gid.global.subnet_prefix),
+ be64_to_cpu(sport->gid.global.interface_id));
if (!sport->mad_agent) {
memset(&reg_req, 0, sizeof(reg_req));
@@ -1149,8 +1157,8 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
}
spin_unlock_irqrestore(&ioctx->spinlock, flags);
- pr_debug("Aborting cmd with state %d and tag %lld\n", state,
- ioctx->cmd.tag);
+ pr_debug("Aborting cmd with state %d -> %d and tag %lld\n", state,
+ ioctx->state, ioctx->cmd.tag);
switch (state) {
case SRPT_STATE_NEW:
@@ -1838,6 +1846,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
struct srp_login_rej *rej;
struct ib_cm_rep_param *rep_param;
struct srpt_rdma_ch *ch, *tmp_ch;
+ __be16 *guid;
u32 it_iu_len;
int i, ret = 0;
@@ -1983,26 +1992,30 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
goto destroy_ib;
}
- /*
- * Use the initator port identifier as the session name, when
- * checking against se_node_acl->initiatorname[] this can be
- * with or without preceeding '0x'.
- */
+ guid = (__be16 *)&param->primary_path->sgid.global.interface_id;
+ snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x",
+ be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
+ be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
snprintf(ch->sess_name, sizeof(ch->sess_name), "0x%016llx%016llx",
be64_to_cpu(*(__be64 *)ch->i_port_id),
be64_to_cpu(*(__be64 *)(ch->i_port_id + 8)));
pr_debug("registering session %s\n", ch->sess_name);
- ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0,
+ if (sport->port_guid_tpg.se_tpg_wwn)
+ ch->sess = target_alloc_session(&sport->port_guid_tpg, 0, 0,
+ TARGET_PROT_NORMAL,
+ ch->ini_guid, ch, NULL);
+ if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
+ ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
TARGET_PROT_NORMAL, ch->sess_name, ch,
NULL);
/* Retry without leading "0x" */
- if (IS_ERR(ch->sess))
- ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0,
+ if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
+ ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
TARGET_PROT_NORMAL,
ch->sess_name + 2, ch, NULL);
- if (IS_ERR(ch->sess)) {
+ if (IS_ERR_OR_NULL(ch->sess)) {
pr_info("Rejected login because no ACL has been configured yet for initiator %s.\n",
ch->sess_name);
rej->reason = cpu_to_be32((PTR_ERR(ch->sess) == -ENOMEM) ?
@@ -2289,12 +2302,8 @@ static void srpt_queue_response(struct se_cmd *cmd)
}
spin_unlock_irqrestore(&ioctx->spinlock, flags);
- if (unlikely(transport_check_aborted_status(&ioctx->cmd, false)
- || WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) {
- atomic_inc(&ch->req_lim_delta);
- srpt_abort_cmd(ioctx);
+ if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT)))
return;
- }
/* For read commands, transfer the data to the initiator. */
if (ioctx->cmd.data_direction == DMA_FROM_DEVICE &&
@@ -2420,7 +2429,7 @@ static int srpt_release_sdev(struct srpt_device *sdev)
return 0;
}
-static struct srpt_port *__srpt_lookup_port(const char *name)
+static struct se_wwn *__srpt_lookup_wwn(const char *name)
{
struct ib_device *dev;
struct srpt_device *sdev;
@@ -2435,23 +2444,25 @@ static struct srpt_port *__srpt_lookup_port(const char *name)
for (i = 0; i < dev->phys_port_cnt; i++) {
sport = &sdev->port[i];
- if (!strcmp(sport->port_guid, name))
- return sport;
+ if (strcmp(sport->port_guid, name) == 0)
+ return &sport->port_guid_wwn;
+ if (strcmp(sport->port_gid, name) == 0)
+ return &sport->port_gid_wwn;
}
}
return NULL;
}
-static struct srpt_port *srpt_lookup_port(const char *name)
+static struct se_wwn *srpt_lookup_wwn(const char *name)
{
- struct srpt_port *sport;
+ struct se_wwn *wwn;
spin_lock(&srpt_dev_lock);
- sport = __srpt_lookup_port(name);
+ wwn = __srpt_lookup_wwn(name);
spin_unlock(&srpt_dev_lock);
- return sport;
+ return wwn;
}
/**
@@ -2464,8 +2475,7 @@ static void srpt_add_one(struct ib_device *device)
struct ib_srq_init_attr srq_attr;
int i;
- pr_debug("device = %p, device->dma_ops = %p\n", device,
- device->dma_ops);
+ pr_debug("device = %p\n", device);
sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
if (!sdev)
@@ -2643,11 +2653,19 @@ static char *srpt_get_fabric_name(void)
return "srpt";
}
+static struct srpt_port *srpt_tpg_to_sport(struct se_portal_group *tpg)
+{
+ return tpg->se_tpg_wwn->priv;
+}
+
static char *srpt_get_fabric_wwn(struct se_portal_group *tpg)
{
- struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(tpg);
- return sport->port_guid;
+ WARN_ON_ONCE(tpg != &sport->port_guid_tpg &&
+ tpg != &sport->port_gid_tpg);
+ return tpg == &sport->port_guid_tpg ? sport->port_guid :
+ sport->port_gid;
}
static u16 srpt_get_tag(struct se_portal_group *tpg)
@@ -2667,7 +2685,8 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
struct srpt_rdma_ch *ch = ioctx->ch;
unsigned long flags;
- WARN_ON(ioctx->state != SRPT_STATE_DONE);
+ WARN_ON_ONCE(ioctx->state != SRPT_STATE_DONE &&
+ !(ioctx->cmd.transport_state & CMD_T_ABORTED));
if (ioctx->n_rw_ctx) {
srpt_free_rw_ctxs(ch, ioctx);
@@ -2737,6 +2756,19 @@ static int srpt_get_tcm_cmd_state(struct se_cmd *se_cmd)
return srpt_get_cmd_state(ioctx);
}
+static int srpt_parse_guid(u64 *guid, const char *name)
+{
+ u16 w[4];
+ int ret = -EINVAL;
+
+ if (sscanf(name, "%hx:%hx:%hx:%hx", &w[0], &w[1], &w[2], &w[3]) != 4)
+ goto out;
+ *guid = get_unaligned_be64(w);
+ ret = 0;
+out:
+ return ret;
+}
+
/**
* srpt_parse_i_port_id() - Parse an initiator port ID.
* @name: ASCII representation of a 128-bit initiator port ID.
@@ -2772,20 +2804,23 @@ out:
*/
static int srpt_init_nodeacl(struct se_node_acl *se_nacl, const char *name)
{
+ u64 guid;
u8 i_port_id[16];
+ int ret;
- if (srpt_parse_i_port_id(i_port_id, name) < 0) {
+ ret = srpt_parse_guid(&guid, name);
+ if (ret < 0)
+ ret = srpt_parse_i_port_id(i_port_id, name);
+ if (ret < 0)
pr_err("invalid initiator port ID %s\n", name);
- return -EINVAL;
- }
- return 0;
+ return ret;
}
static ssize_t srpt_tpg_attrib_srp_max_rdma_size_show(struct config_item *item,
char *page)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size);
}
@@ -2794,7 +2829,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rdma_size_store(struct config_item *item,
const char *page, size_t count)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
unsigned long val;
int ret;
@@ -2822,7 +2857,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rsp_size_show(struct config_item *item,
char *page)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size);
}
@@ -2831,7 +2866,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rsp_size_store(struct config_item *item,
const char *page, size_t count)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
unsigned long val;
int ret;
@@ -2859,7 +2894,7 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_show(struct config_item *item,
char *page)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size);
}
@@ -2868,7 +2903,7 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item,
const char *page, size_t count)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
unsigned long val;
int ret;
@@ -2906,7 +2941,7 @@ static struct configfs_attribute *srpt_tpg_attrib_attrs[] = {
static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page)
{
struct se_portal_group *se_tpg = to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return snprintf(page, PAGE_SIZE, "%d\n", (sport->enabled) ? 1: 0);
}
@@ -2915,7 +2950,7 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item,
const char *page, size_t count)
{
struct se_portal_group *se_tpg = to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
struct srpt_device *sdev = sport->sdev;
struct srpt_rdma_ch *ch;
unsigned long tmp;
@@ -2967,15 +3002,19 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
struct config_group *group,
const char *name)
{
- struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn);
+ struct srpt_port *sport = wwn->priv;
+ static struct se_portal_group *tpg;
int res;
- /* Initialize sport->port_wwn and sport->port_tpg_1 */
- res = core_tpg_register(&sport->port_wwn, &sport->port_tpg_1, SCSI_PROTOCOL_SRP);
+ WARN_ON_ONCE(wwn != &sport->port_guid_wwn &&
+ wwn != &sport->port_gid_wwn);
+ tpg = wwn == &sport->port_guid_wwn ? &sport->port_guid_tpg :
+ &sport->port_gid_tpg;
+ res = core_tpg_register(wwn, tpg, SCSI_PROTOCOL_SRP);
if (res)
return ERR_PTR(res);
- return &sport->port_tpg_1;
+ return tpg;
}
/**
@@ -2984,11 +3023,10 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
*/
static void srpt_drop_tpg(struct se_portal_group *tpg)
{
- struct srpt_port *sport = container_of(tpg,
- struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(tpg);
sport->enabled = false;
- core_tpg_deregister(&sport->port_tpg_1);
+ core_tpg_deregister(tpg);
}
/**
@@ -2999,19 +3037,7 @@ static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf,
struct config_group *group,
const char *name)
{
- struct srpt_port *sport;
- int ret;
-
- sport = srpt_lookup_port(name);
- pr_debug("make_tport(%s)\n", name);
- ret = -EINVAL;
- if (!sport)
- goto err;
-
- return &sport->port_wwn;
-
-err:
- return ERR_PTR(ret);
+ return srpt_lookup_wwn(name) ? : ERR_PTR(-EINVAL);
}
/**
@@ -3020,9 +3046,6 @@ err:
*/
static void srpt_drop_tport(struct se_wwn *wwn)
{
- struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn);
-
- pr_debug("drop_tport(%s\n", config_item_name(&sport->port_wwn.wwn_group.cg_item));
}
static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf)
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 581878782854..cc1183851af5 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -258,6 +258,7 @@ enum rdma_ch_state {
* against concurrent modification by the cm_id spinlock.
* @sess: Session information associated with this SRP channel.
* @sess_name: Session name.
+ * @ini_guid: Initiator port GUID.
* @release_work: Allows scheduling of srpt_release_channel().
* @release_done: Enables waiting for srpt_release_channel() completion.
*/
@@ -284,6 +285,7 @@ struct srpt_rdma_ch {
struct list_head cmd_wait_list;
struct se_session *sess;
u8 sess_name[36];
+ u8 ini_guid[24];
struct work_struct release_work;
struct completion *release_done;
};
@@ -306,28 +308,34 @@ struct srpt_port_attrib {
* @mad_agent: per-port management datagram processing information.
* @enabled: Whether or not this target port is enabled.
* @port_guid: ASCII representation of Port GUID
+ * @port_gid: ASCII representation of Port GID
* @port: one-based port number.
* @sm_lid: cached value of the port's sm_lid.
* @lid: cached value of the port's lid.
* @gid: cached value of the port's gid.
* @port_acl_lock spinlock for port_acl_list:
* @work: work structure for refreshing the aforementioned cached values.
- * @port_tpg_1 Target portal group = 1 data.
- * @port_wwn: Target core WWN data.
+ * @port_guid_tpg: TPG associated with target port GUID.
+ * @port_guid_wwn: WWN associated with target port GUID.
+ * @port_gid_tpg: TPG associated with target port GID.
+ * @port_gid_wwn: WWN associated with target port GID.
* @port_acl_list: Head of the list with all node ACLs for this port.
*/
struct srpt_port {
struct srpt_device *sdev;
struct ib_mad_agent *mad_agent;
bool enabled;
- u8 port_guid[64];
+ u8 port_guid[24];
+ u8 port_gid[64];
u8 port;
u16 sm_lid;
u16 lid;
union ib_gid gid;
struct work_struct work;
- struct se_portal_group port_tpg_1;
- struct se_wwn port_wwn;
+ struct se_portal_group port_guid_tpg;
+ struct se_wwn port_guid_wwn;
+ struct se_portal_group port_gid_tpg;
+ struct se_wwn port_gid_wwn;
struct srpt_port_attrib port_attrib;
};
OpenPOWER on IntegriCloud