diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5/main.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 447 |
1 files changed, 420 insertions, 27 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b0ec175cc6ba..ec737e2287fe 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -40,6 +40,8 @@ #include <linux/io-mapping.h> #include <linux/sched.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_cache.h> #include <linux/mlx5/vport.h> #include <rdma/ib_smi.h> #include <rdma/ib_umem.h> @@ -66,12 +68,14 @@ static char mlx5_version[] = DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; +enum { + MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3, +}; + static enum rdma_link_layer -mlx5_ib_port_link_layer(struct ib_device *device) +mlx5_port_type_cap_to_rdma_ll(int port_type_cap) { - struct mlx5_ib_dev *dev = to_mdev(device); - - switch (MLX5_CAP_GEN(dev->mdev, port_type)) { + switch (port_type_cap) { case MLX5_CAP_PORT_TYPE_IB: return IB_LINK_LAYER_INFINIBAND; case MLX5_CAP_PORT_TYPE_ETH: @@ -81,6 +85,202 @@ mlx5_ib_port_link_layer(struct ib_device *device) } } +static enum rdma_link_layer +mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); + + return mlx5_port_type_cap_to_rdma_ll(port_type_cap); +} + +static int mlx5_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev, + roce.nb); + + if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER)) + return NOTIFY_DONE; + + write_lock(&ibdev->roce.netdev_lock); + if (ndev->dev.parent == &ibdev->mdev->pdev->dev) + ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev; + write_unlock(&ibdev->roce.netdev_lock); + + return NOTIFY_DONE; +} + +static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, + u8 port_num) +{ + struct mlx5_ib_dev *ibdev = to_mdev(device); + struct net_device *ndev; + + /* Ensure ndev does not disappear before we invoke dev_hold() + */ + read_lock(&ibdev->roce.netdev_lock); + ndev = ibdev->roce.netdev; + if (ndev) + dev_hold(ndev); + read_unlock(&ibdev->roce.netdev_lock); + + return ndev; +} + +static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, + struct ib_port_attr *props) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct net_device *ndev; + enum ib_mtu ndev_ib_mtu; + u16 qkey_viol_cntr; + + memset(props, 0, sizeof(*props)); + + props->port_cap_flags |= IB_PORT_CM_SUP; + props->port_cap_flags |= IB_PORT_IP_BASED_GIDS; + + props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, + roce_address_table_size); + props->max_mtu = IB_MTU_4096; + props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); + props->pkey_tbl_len = 1; + props->state = IB_PORT_DOWN; + props->phys_state = 3; + + mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr); + props->qkey_viol_cntr = qkey_viol_cntr; + + ndev = mlx5_ib_get_netdev(device, port_num); + if (!ndev) + return 0; + + if (netif_running(ndev) && netif_carrier_ok(ndev)) { + props->state = IB_PORT_ACTIVE; + props->phys_state = 5; + } + + ndev_ib_mtu = iboe_get_mtu(ndev->mtu); + + dev_put(ndev); + + props->active_mtu = min(props->max_mtu, ndev_ib_mtu); + + props->active_width = IB_WIDTH_4X; /* TODO */ + props->active_speed = IB_SPEED_QDR; /* TODO */ + + return 0; +} + +static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid, + const struct ib_gid_attr *attr, + void *mlx5_addr) +{ +#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v) + char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr, + source_l3_address); + void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr, + source_mac_47_32); + + if (!gid) + return; + + ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr); + + if (is_vlan_dev(attr->ndev)) { + MLX5_SET_RA(mlx5_addr, vlan_valid, 1); + MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev)); + } + + switch (attr->gid_type) { + case IB_GID_TYPE_IB: + MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1); + break; + case IB_GID_TYPE_ROCE_UDP_ENCAP: + MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2); + break; + + default: + WARN_ON(true); + } + + if (attr->gid_type != IB_GID_TYPE_IB) { + if (ipv6_addr_v4mapped((void *)gid)) + MLX5_SET_RA(mlx5_addr, roce_l3_type, + MLX5_ROCE_L3_TYPE_IPV4); + else + MLX5_SET_RA(mlx5_addr, roce_l3_type, + MLX5_ROCE_L3_TYPE_IPV6); + } + + if ((attr->gid_type == IB_GID_TYPE_IB) || + !ipv6_addr_v4mapped((void *)gid)) + memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid)); + else + memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4); +} + +static int set_roce_addr(struct ib_device *device, u8 port_num, + unsigned int index, + const union ib_gid *gid, + const struct ib_gid_attr *attr) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + u32 in[MLX5_ST_SZ_DW(set_roce_address_in)]; + u32 out[MLX5_ST_SZ_DW(set_roce_address_out)]; + void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address); + enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num); + + if (ll != IB_LINK_LAYER_ETHERNET) + return -EINVAL; + + memset(in, 0, sizeof(in)); + + ib_gid_to_mlx5_roce_addr(gid, attr, in_addr); + + MLX5_SET(set_roce_address_in, in, roce_address_index, index); + MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num, + unsigned int index, const union ib_gid *gid, + const struct ib_gid_attr *attr, + __always_unused void **context) +{ + return set_roce_addr(device, port_num, index, gid, attr); +} + +static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num, + unsigned int index, __always_unused void **context) +{ + return set_roce_addr(device, port_num, index, NULL, NULL); +} + +__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, + int index) +{ + struct ib_gid_attr attr; + union ib_gid gid; + + if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr)) + return 0; + + if (!attr.ndev) + return 0; + + dev_put(attr.ndev); + + if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) + return 0; + + return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port)); +} + static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { return !dev->mdev->issi; @@ -97,13 +297,35 @@ static int mlx5_get_vport_access_method(struct ib_device *ibdev) if (mlx5_use_mad_ifc(to_mdev(ibdev))) return MLX5_VPORT_ACCESS_METHOD_MAD; - if (mlx5_ib_port_link_layer(ibdev) == + if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) return MLX5_VPORT_ACCESS_METHOD_NIC; return MLX5_VPORT_ACCESS_METHOD_HCA; } +static void get_atomic_caps(struct mlx5_ib_dev *dev, + struct ib_device_attr *props) +{ + u8 tmp; + u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); + u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp); + u8 atomic_req_8B_endianness_mode = + MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode); + + /* Check if HW supports 8 bytes standard atomic operations and capable + * of host endianness respond + */ + tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD; + if (((atomic_operations & tmp) == tmp) && + (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) && + (atomic_req_8B_endianness_mode)) { + props->atomic_cap = IB_ATOMIC_HCA; + } else { + props->atomic_cap = IB_ATOMIC_NONE; + } +} + static int mlx5_query_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid) { @@ -119,13 +341,21 @@ static int mlx5_query_system_image_guid(struct ib_device *ibdev, case MLX5_VPORT_ACCESS_METHOD_HCA: err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); - if (!err) - *sys_image_guid = cpu_to_be64(tmp); - return err; + break; + + case MLX5_VPORT_ACCESS_METHOD_NIC: + err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp); + break; default: return -EINVAL; } + + if (!err) + *sys_image_guid = cpu_to_be64(tmp); + + return err; + } static int mlx5_query_max_pkeys(struct ib_device *ibdev, @@ -179,13 +409,20 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, case MLX5_VPORT_ACCESS_METHOD_HCA: err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp); - if (!err) - *node_guid = cpu_to_be64(tmp); - return err; + break; + + case MLX5_VPORT_ACCESS_METHOD_NIC: + err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp); + break; default: return -EINVAL; } + + if (!err) + *node_guid = cpu_to_be64(tmp); + + return err; } struct mlx5_reg_node_desc { @@ -263,6 +500,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, block_lb_mc)) props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && + (MLX5_CAP_ETH(dev->mdev, csum_cap))) + props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; + props->vendor_part_id = mdev->pdev->device; props->hw_ver = mdev->pdev->revision; @@ -278,7 +519,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_sge = min(max_rq_sg, max_sq_sg); props->max_sge_rd = props->max_sge; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); - props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1; + props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); @@ -289,13 +530,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = (unsigned int)-1; - props->atomic_cap = IB_ATOMIC_NONE; + get_atomic_caps(dev, props); props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ + props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); + props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (MLX5_CAP_GEN(mdev, pg)) @@ -303,6 +546,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->odp_caps = dev->odp_caps; #endif + if (MLX5_CAP_GEN(mdev, cd)) + props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; + return 0; } @@ -483,6 +729,9 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, case MLX5_VPORT_ACCESS_METHOD_HCA: return mlx5_query_hca_port(ibdev, port, props); + case MLX5_VPORT_ACCESS_METHOD_NIC: + return mlx5_query_port_roce(ibdev, port, props); + default: return -EINVAL; } @@ -583,8 +832,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_alloc_ucontext_req_v2 req; - struct mlx5_ib_alloc_ucontext_resp resp; + struct mlx5_ib_alloc_ucontext_req_v2 req = {}; + struct mlx5_ib_alloc_ucontext_resp resp = {}; struct mlx5_ib_ucontext *context; struct mlx5_uuar_info *uuari; struct mlx5_uar *uars; @@ -599,20 +848,22 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (!dev->ib_active) return ERR_PTR(-EAGAIN); - memset(&req, 0, sizeof(req)); + if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr)) + return ERR_PTR(-EINVAL); + reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) ver = 0; - else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2)) + else if (reqlen >= sizeof(struct mlx5_ib_alloc_ucontext_req_v2)) ver = 2; else return ERR_PTR(-EINVAL); - err = ib_copy_from_udata(&req, udata, reqlen); + err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req))); if (err) return ERR_PTR(err); - if (req.flags || req.reserved) + if (req.flags) return ERR_PTR(-EINVAL); if (req.total_num_uuars > MLX5_MAX_UUARS) @@ -621,6 +872,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (req.total_num_uuars == 0) return ERR_PTR(-EINVAL); + if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) + return ERR_PTR(-EOPNOTSUPP); + + if (reqlen > sizeof(req) && + !ib_is_udata_cleared(udata, sizeof(req), + reqlen - sizeof(req))) + return ERR_PTR(-EOPNOTSUPP); + req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_NON_FP_BF_REGS_PER_PAGE); if (req.num_low_latency_uuars > req.total_num_uuars - 1) @@ -636,6 +895,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + resp.cqe_version = min_t(__u8, + (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), + req.max_cqe_version); + resp.response_length = min(offsetof(typeof(resp), response_length) + + sizeof(resp.response_length), udata->outlen); context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@ -681,22 +945,49 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; #endif + if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { + err = mlx5_core_alloc_transport_domain(dev->mdev, + &context->tdn); + if (err) + goto out_uars; + } + INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); resp.tot_uuars = req.total_num_uuars; resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); - err = ib_copy_to_udata(udata, &resp, - sizeof(resp) - sizeof(resp.reserved)); + + if (field_avail(typeof(resp), cqe_version, udata->outlen)) + resp.response_length += sizeof(resp.cqe_version); + + if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { + resp.comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; + resp.hca_core_clock_offset = + offsetof(struct mlx5_init_seg, internal_timer_h) % + PAGE_SIZE; + resp.response_length += sizeof(resp.hca_core_clock_offset) + + sizeof(resp.reserved2) + + sizeof(resp.reserved3); + } + + err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) - goto out_uars; + goto out_td; uuari->ver = ver; uuari->num_low_latency_uuars = req.num_low_latency_uuars; uuari->uars = uars; uuari->num_uars = num_uars; + context->cqe_version = resp.cqe_version; + return &context->ibucontext; +out_td: + if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) + mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); + out_uars: for (i--; i >= 0; i--) mlx5_cmd_free_uar(dev->mdev, uars[i].index); @@ -721,6 +1012,9 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) struct mlx5_uuar_info *uuari = &context->uuari; int i; + if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) + mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); + for (i = 0; i < uuari->num_uars; i++) { if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index)) mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index); @@ -790,6 +1084,30 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: return -ENOSYS; + case MLX5_IB_MMAP_CORE_CLOCK: + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + if (vma->vm_flags & (VM_WRITE | VM_EXEC)) + return -EPERM; + + /* Don't expose to user-space information it shouldn't have */ + if (PAGE_SIZE > 4096) + return -EOPNOTSUPP; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + pfn = (dev->mdev->iseg_base + + offsetof(struct mlx5_init_seg, internal_timer_h)) >> + PAGE_SHIFT; + if (io_remap_pfn_range(vma, vma->vm_start, pfn, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + + mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n", + vma->vm_start, + (unsigned long long)pfn << PAGE_SHIFT); + break; + default: return -EINVAL; } @@ -1758,6 +2076,32 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) mlx5_ib_dealloc_pd(devr->p0); } +static u32 get_core_cap_flags(struct ib_device *ibdev) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1); + u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type); + u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version); + u32 ret = 0; + + if (ll == IB_LINK_LAYER_INFINIBAND) + return RDMA_CORE_PORT_IBA_IB; + + if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) + return 0; + + if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP)) + return 0; + + if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP) + ret |= RDMA_CORE_PORT_IBA_ROCE; + + if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP) + ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + + return ret; +} + static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { @@ -1770,20 +2114,50 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->core_cap_flags = get_core_cap_flags(ibdev); immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; } +static int mlx5_enable_roce(struct mlx5_ib_dev *dev) +{ + int err; + + dev->roce.nb.notifier_call = mlx5_netdev_event; + err = register_netdevice_notifier(&dev->roce.nb); + if (err) + return err; + + err = mlx5_nic_vport_enable_roce(dev->mdev); + if (err) + goto err_unregister_netdevice_notifier; + + return 0; + +err_unregister_netdevice_notifier: + unregister_netdevice_notifier(&dev->roce.nb); + return err; +} + +static void mlx5_disable_roce(struct mlx5_ib_dev *dev) +{ + mlx5_nic_vport_disable_roce(dev->mdev); + unregister_netdevice_notifier(&dev->roce.nb); +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; + enum rdma_link_layer ll; + int port_type_cap; int err; int i; - /* don't create IB instance over Eth ports, no RoCE yet! */ - if (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) + port_type_cap = MLX5_CAP_GEN(mdev, port_type); + ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); + + if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce)) return NULL; printk_once(KERN_INFO "%s", mlx5_version); @@ -1794,6 +2168,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->mdev = mdev; + rwlock_init(&dev->roce.netdev_lock); err = get_port_caps(dev); if (err) goto err_dealloc; @@ -1843,7 +2218,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.query_device = mlx5_ib_query_device; dev->ib_dev.query_port = mlx5_ib_query_port; + dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer; + if (ll == IB_LINK_LAYER_ETHERNET) + dev->ib_dev.get_netdev = mlx5_ib_get_netdev; dev->ib_dev.query_gid = mlx5_ib_query_gid; + dev->ib_dev.add_gid = mlx5_ib_add_gid; + dev->ib_dev.del_gid = mlx5_ib_del_gid; dev->ib_dev.query_pkey = mlx5_ib_query_pkey; dev->ib_dev.modify_device = mlx5_ib_modify_device; dev->ib_dev.modify_port = mlx5_ib_modify_port; @@ -1893,7 +2273,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } - if (mlx5_ib_port_link_layer(&dev->ib_dev) == + if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) { dev->ib_dev.create_flow = mlx5_ib_create_flow; dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; @@ -1908,9 +2288,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) mutex_init(&dev->flow_db.lock); mutex_init(&dev->cap_mask_mutex); + if (ll == IB_LINK_LAYER_ETHERNET) { + err = mlx5_enable_roce(dev); + if (err) + goto err_dealloc; + } + err = create_dev_resources(&dev->devr); if (err) - goto err_dealloc; + goto err_disable_roce; err = mlx5_ib_odp_init_one(dev); if (err) @@ -1947,6 +2333,10 @@ err_odp: err_rsrc: destroy_dev_resources(&dev->devr); +err_disable_roce: + if (ll == IB_LINK_LAYER_ETHERNET) + mlx5_disable_roce(dev); + err_dealloc: ib_dealloc_device((struct ib_device *)dev); @@ -1956,11 +2346,14 @@ err_dealloc: static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) { struct mlx5_ib_dev *dev = context; + enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); ib_unregister_device(&dev->ib_dev); destroy_umrc_res(dev); mlx5_ib_odp_remove_one(dev); destroy_dev_resources(&dev->devr); + if (ll == IB_LINK_LAYER_ETHERNET) + mlx5_disable_roce(dev); ib_dealloc_device(&dev->ib_dev); } |