diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.c | 130 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.h | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/devx.c | 38 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/flow.c | 34 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 258 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mem.c | 12 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 24 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 89 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 220 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 62 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq_cmd.c | 6 |
11 files changed, 369 insertions, 508 deletions
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 6c8645033102..4937947400cd 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -186,136 +186,6 @@ int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) return err; } -int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, - u16 uid, phys_addr_t *addr, u32 *obj_id) -{ - struct mlx5_core_dev *dev = dm->dev; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; - u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {}; - unsigned long *block_map; - u64 icm_start_addr; - u32 log_icm_size; - u32 num_blocks; - u32 max_blocks; - u64 block_idx; - void *sw_icm; - int ret; - - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, - MLX5_CMD_OP_CREATE_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); - MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); - - switch (type) { - case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: - icm_start_addr = MLX5_CAP64_DEV_MEM(dev, - steering_sw_icm_start_address); - log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size); - block_map = dm->steering_sw_icm_alloc_blocks; - break; - case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: - icm_start_addr = MLX5_CAP64_DEV_MEM(dev, - header_modify_sw_icm_start_address); - log_icm_size = MLX5_CAP_DEV_MEM(dev, - log_header_modify_sw_icm_size); - block_map = dm->header_modify_sw_icm_alloc_blocks; - break; - default: - return -EINVAL; - } - - num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >> - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); - max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); - spin_lock(&dm->lock); - block_idx = bitmap_find_next_zero_area(block_map, - max_blocks, - 0, - num_blocks, 0); - - if (block_idx < max_blocks) - bitmap_set(block_map, - block_idx, num_blocks); - - spin_unlock(&dm->lock); - - if (block_idx >= max_blocks) - return -ENOMEM; - - sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm); - icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); - MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr, - icm_start_addr); - MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length)); - - ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - if (ret) { - spin_lock(&dm->lock); - bitmap_clear(block_map, - block_idx, num_blocks); - spin_unlock(&dm->lock); - - return ret; - } - - *addr = icm_start_addr; - *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); - - return 0; -} - -int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, - u16 uid, phys_addr_t addr, u32 obj_id) -{ - struct mlx5_core_dev *dev = dm->dev; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; - u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; - unsigned long *block_map; - u32 num_blocks; - u64 start_idx; - int err; - - num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >> - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); - - switch (type) { - case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: - start_idx = - (addr - MLX5_CAP64_DEV_MEM( - dev, steering_sw_icm_start_address)) >> - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); - block_map = dm->steering_sw_icm_alloc_blocks; - break; - case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: - start_idx = - (addr - - MLX5_CAP64_DEV_MEM( - dev, header_modify_sw_icm_start_address)) >> - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); - block_map = dm->header_modify_sw_icm_alloc_blocks; - break; - default: - return -EINVAL; - } - - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, - MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); - MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); - - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - if (err) - return err; - - spin_lock(&dm->lock); - bitmap_clear(block_map, - start_idx, num_blocks); - spin_unlock(&dm->lock); - - return 0; -} - int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) { u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 0572dcba6eae..169cab4915e3 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -65,8 +65,4 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, u16 uid); int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port); -int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, - u16 uid, phys_addr_t *addr, u32 *obj_id); -int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, - u16 uid, phys_addr_t addr, u32 obj_id); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index ec4370f99381..59022b744144 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -233,6 +233,8 @@ static bool is_legacy_obj_event_num(u16 event_num) case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: case MLX5_EVENT_TYPE_DCT_DRAINED: case MLX5_EVENT_TYPE_COMP: + case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION: + case MLX5_EVENT_TYPE_XRQ_ERROR: return true; default: return false; @@ -315,8 +317,10 @@ static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe) case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: return eqe->data.qp_srq.type; case MLX5_EVENT_TYPE_CQ_ERROR: + case MLX5_EVENT_TYPE_XRQ_ERROR: return 0; case MLX5_EVENT_TYPE_DCT_DRAINED: + case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION: return MLX5_EVENT_QUEUE_TYPE_DCT; default: return MLX5_GET(affiliated_event_header, &eqe->data, obj_type); @@ -542,6 +546,8 @@ static u64 devx_get_obj_id(const void *in) break; case MLX5_CMD_OP_ARM_XRQ: case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_RELEASE_XRQ_ERROR: + case MLX5_CMD_OP_MODIFY_XRQ: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, MLX5_GET(arm_xrq_in, in, xrqn)); break; @@ -776,6 +782,14 @@ static bool devx_is_obj_create_cmd(const void *in, u16 *opcode) return true; return false; } + case MLX5_CMD_OP_CREATE_PSV: + { + u8 num_psv = MLX5_GET(create_psv_in, in, num_psv); + + if (num_psv == 1) + return true; + return false; + } default: return false; } @@ -810,6 +824,8 @@ static bool devx_is_obj_modify_cmd(const void *in) case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: case MLX5_CMD_OP_ARM_XRQ: case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_RELEASE_XRQ_ERROR: + case MLX5_CMD_OP_MODIFY_XRQ: return true; case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: { @@ -922,6 +938,7 @@ static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev) case MLX5_CMD_OP_QUERY_CONG_STATUS: case MLX5_CMD_OP_QUERY_CONG_PARAMS: case MLX5_CMD_OP_QUERY_CONG_STATISTICS: + case MLX5_CMD_OP_QUERY_LAG: return true; default: return false; @@ -1215,6 +1232,12 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, case MLX5_CMD_OP_ALLOC_XRCD: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD); break; + case MLX5_CMD_OP_CREATE_PSV: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_PSV); + MLX5_SET(destroy_psv_in, din, psvn, + MLX5_GET(create_psv_out, out, psv0_index)); + break; default: /* The entry must match to one of the devx_is_obj_create_cmd */ WARN_ON(true); @@ -2026,7 +2049,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( event_sub->eventfd = eventfd_ctx_fdget(redirect_fd); - if (IS_ERR(event_sub)) { + if (IS_ERR(event_sub->eventfd)) { err = PTR_ERR(event_sub->eventfd); event_sub->eventfd = NULL; goto err; @@ -2285,7 +2308,11 @@ static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data) case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; break; + case MLX5_EVENT_TYPE_XRQ_ERROR: + obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff; + break; case MLX5_EVENT_TYPE_DCT_DRAINED: + case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION: obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; break; case MLX5_EVENT_TYPE_CQ_ERROR: @@ -2644,12 +2671,13 @@ static int devx_async_event_close(struct inode *inode, struct file *filp) struct devx_async_event_file *ev_file = filp->private_data; struct devx_event_subscription *event_sub, *event_sub_tmp; struct devx_async_event_data *entry, *tmp; + struct mlx5_ib_dev *dev = ev_file->dev; - mutex_lock(&ev_file->dev->devx_event_table.event_xa_lock); + mutex_lock(&dev->devx_event_table.event_xa_lock); /* delete the subscriptions which are related to this FD */ list_for_each_entry_safe(event_sub, event_sub_tmp, &ev_file->subscribed_events_list, file_list) { - devx_cleanup_subscription(ev_file->dev, event_sub); + devx_cleanup_subscription(dev, event_sub); if (event_sub->eventfd) eventfd_ctx_put(event_sub->eventfd); @@ -2658,7 +2686,7 @@ static int devx_async_event_close(struct inode *inode, struct file *filp) kfree_rcu(event_sub, rcu); } - mutex_unlock(&ev_file->dev->devx_event_table.event_xa_lock); + mutex_unlock(&dev->devx_event_table.event_xa_lock); /* free the pending events allocation */ if (!ev_file->omit_data) { @@ -2670,7 +2698,7 @@ static int devx_async_event_close(struct inode *inode, struct file *filp) } uverbs_close_fd(filp); - put_device(&ev_file->dev->ib_dev.dev); + put_device(&dev->ib_dev.dev); return 0; } diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index b8841355fcd5..b198ff10cde9 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -32,6 +32,9 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB: *namespace = MLX5_FLOW_NAMESPACE_FDB; break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX: + *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX; + break; default: return -EINVAL; } @@ -101,6 +104,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !dest_devx) return -EINVAL; + /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */ + if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && + ((!dest_devx && !dest_qp) || (dest_devx && dest_qp))) + return -EINVAL; + if (dest_devx) { devx_obj = uverbs_attr_get_obj( attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); @@ -112,8 +120,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( */ if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type)) return -EINVAL; - /* Allow only flow table as dest when inserting to FDB */ - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && + /* Allow only flow table as dest when inserting to FDB or RDMA_RX */ + if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) return -EINVAL; } else if (dest_qp) { @@ -322,11 +331,11 @@ void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) switch (maction->flow_action_raw.sub_type) { case MLX5_IB_FLOW_ACTION_MODIFY_HEADER: mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev, - maction->flow_action_raw.action_id); + maction->flow_action_raw.modify_hdr); break; case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT: mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev, - maction->flow_action_raw.action_id); + maction->flow_action_raw.pkt_reformat); break; case MLX5_IB_FLOW_ACTION_DECAP: break; @@ -352,10 +361,11 @@ mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev, if (!maction) return ERR_PTR(-ENOMEM); - ret = mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in, - &maction->flow_action_raw.action_id); + maction->flow_action_raw.modify_hdr = + mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in); - if (ret) { + if (IS_ERR(maction->flow_action_raw.modify_hdr)) { + ret = PTR_ERR(maction->flow_action_raw.modify_hdr); kfree(maction); return ERR_PTR(ret); } @@ -479,11 +489,13 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx( if (ret) return ret; - ret = mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, - in, namespace, - &maction->flow_action_raw.action_id); - if (ret) + maction->flow_action_raw.pkt_reformat = + mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, + in, namespace); + if (IS_ERR(maction->flow_action_raw.pkt_reformat)) { + ret = PTR_ERR(maction->flow_action_raw.pkt_reformat); return ret; + } maction->flow_action_raw.sub_type = MLX5_IB_FLOW_ACTION_PACKET_REFORMAT; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c2a5780cb394..831539419c30 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -535,7 +535,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); props->pkey_tbl_len = 1; props->state = IB_PORT_DOWN; - props->phys_state = 3; + props->phys_state = IB_PORT_PHYS_STATE_DISABLED; mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); props->qkey_viol_cntr = qkey_viol_cntr; @@ -561,7 +561,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, if (netif_running(ndev) && netif_carrier_ok(ndev)) { props->state = IB_PORT_ACTIVE; - props->phys_state = 5; + props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; } ndev_ib_mtu = iboe_get_mtu(ndev->mtu); @@ -1023,7 +1023,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - if (MLX5_CAP_GEN(mdev, pg)) + if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; props->odp_caps = dev->odp_caps; } @@ -1867,10 +1867,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, if (err) goto out_sys_pages; - if (ibdev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING) - context->ibucontext.invalidate_range = - &mlx5_ib_invalidate_range; - if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { err = mlx5_ib_devx_create(dev, true); if (err < 0) @@ -1999,11 +1995,6 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); struct mlx5_bfreg_info *bfregi; - /* All umem's must be destroyed before destroying the ucontext. */ - mutex_lock(&ibcontext->per_mm_list_lock); - WARN_ON(!list_empty(&ibcontext->per_mm_list)); - mutex_unlock(&ibcontext->per_mm_list_lock); - bfregi = &context->bfregi; mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); @@ -2280,6 +2271,7 @@ static inline int check_dm_type_support(struct mlx5_ib_dev *dev, return -EOPNOTSUPP; break; case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: if (!capable(CAP_SYS_RAWIO) || !capable(CAP_NET_RAW)) return -EPERM; @@ -2344,20 +2336,20 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, struct uverbs_attr_bundle *attrs, int type) { - struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm; + struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev; u64 act_size; int err; /* Allocation size must a multiple of the basic block size * and a power of 2. */ - act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev)); + act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dev)); act_size = roundup_pow_of_two(act_size); dm->size = act_size; - err = mlx5_cmd_alloc_sw_icm(dm_db, type, act_size, - to_mucontext(ctx)->devx_uid, &dm->dev_addr, - &dm->icm_dm.obj_id); + err = mlx5_dm_sw_icm_alloc(dev, type, act_size, + to_mucontext(ctx)->devx_uid, &dm->dev_addr, + &dm->icm_dm.obj_id); if (err) return err; @@ -2365,9 +2357,9 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, &dm->dev_addr, sizeof(dm->dev_addr)); if (err) - mlx5_cmd_dealloc_sw_icm(dm_db, type, dm->size, - to_mucontext(ctx)->devx_uid, - dm->dev_addr, dm->icm_dm.obj_id); + mlx5_dm_sw_icm_dealloc(dev, type, dm->size, + to_mucontext(ctx)->devx_uid, dm->dev_addr, + dm->icm_dm.obj_id); return err; } @@ -2407,8 +2399,14 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, attrs); break; case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + err = handle_alloc_dm_sw_icm(context, dm, + attr, attrs, + MLX5_SW_ICM_TYPE_STEERING); + break; case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: - err = handle_alloc_dm_sw_icm(context, dm, attr, attrs, type); + err = handle_alloc_dm_sw_icm(context, dm, + attr, attrs, + MLX5_SW_ICM_TYPE_HEADER_MODIFY); break; default: err = -EOPNOTSUPP; @@ -2428,6 +2426,7 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context( &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_core_dev *dev = to_mdev(ibdm->device)->mdev; struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm; struct mlx5_ib_dm *dm = to_mdm(ibdm); u32 page_idx; @@ -2439,19 +2438,23 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) if (ret) return ret; - page_idx = (dm->dev_addr - - pci_resource_start(dm_db->dev->pdev, 0) - - MLX5_CAP64_DEV_MEM(dm_db->dev, - memic_bar_start_addr)) >> - PAGE_SHIFT; + page_idx = (dm->dev_addr - pci_resource_start(dev->pdev, 0) - + MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr)) >> + PAGE_SHIFT; bitmap_clear(ctx->dm_pages, page_idx, DIV_ROUND_UP(dm->size, PAGE_SIZE)); break; case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING, + dm->size, ctx->devx_uid, dm->dev_addr, + dm->icm_dm.obj_id); + if (ret) + return ret; + break; case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: - ret = mlx5_cmd_dealloc_sw_icm(dm_db, dm->type, dm->size, - ctx->devx_uid, dm->dev_addr, - dm->icm_dm.obj_id); + ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_HEADER_MODIFY, + dm->size, ctx->devx_uid, dm->dev_addr, + dm->icm_dm.obj_id); if (ret) return ret; break; @@ -2646,7 +2649,8 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) return -EINVAL; action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - action->modify_id = maction->flow_action_raw.action_id; + action->modify_hdr = + maction->flow_action_raw.modify_hdr; return 0; } if (maction->flow_action_raw.sub_type == @@ -2663,8 +2667,8 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, return -EINVAL; action->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - action->reformat_id = - maction->flow_action_raw.action_id; + action->pkt_reformat = + maction->flow_action_raw.pkt_reformat; return 0; } /* fall through */ @@ -3967,6 +3971,11 @@ _get_flow_table(struct mlx5_ib_dev *dev, esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; priority = FDB_BYPASS_PATH; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) { + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, + log_max_ft_size)); + priority = fs_matcher->priority; } max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); @@ -3981,6 +3990,8 @@ _get_flow_table(struct mlx5_ib_dev *dev, prio = &dev->flow_db->egress_prios[priority]; else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) prio = &dev->flow_db->fdb; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) + prio = &dev->flow_db->rdma_rx[priority]; if (!prio) return ERR_PTR(-EINVAL); @@ -5322,11 +5333,21 @@ static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), }; +static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev) +{ + return MLX5_ESWITCH_MANAGER(mdev) && + mlx5_ib_eswitch_mode(mdev->priv.eswitch) == + MLX5_ESWITCH_OFFLOADS; +} + static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { + int num_cnt_ports; int i; - for (i = 0; i < dev->num_ports; i++) { + num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; + + for (i = 0; i < num_cnt_ports; i++) { if (dev->port[i].cnts.set_id_valid) mlx5_core_dealloc_q_counter(dev->mdev, dev->port[i].cnts.set_id); @@ -5428,13 +5449,15 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) { + int num_cnt_ports; int err = 0; int i; bool is_shared; is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; + num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; - for (i = 0; i < dev->num_ports; i++) { + for (i = 0; i < num_cnt_ports; i++) { err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); if (err) goto err_alloc; @@ -5454,7 +5477,6 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) } dev->port[i].cnts.set_id_valid = true; } - return 0; err_alloc: @@ -5462,25 +5484,50 @@ err_alloc: return err; } +static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, + u8 port_num) +{ + return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts : + &dev->port[port_num].cnts; +} + +/** + * mlx5_ib_get_counters_id - Returns counters id to use for device+port + * @dev: Pointer to mlx5 IB device + * @port_num: Zero based port number + * + * mlx5_ib_get_counters_id() Returns counters set id to use for given + * device port combination in switchdev and non switchdev mode of the + * parent device. + */ +u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num) +{ + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); + + return cnts->set_id; +} + static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, u8 port_num) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_port *port = &dev->port[port_num - 1]; + const struct mlx5_ib_counters *cnts; + bool is_switchdev = is_mdev_switchdev_mode(dev->mdev); - /* We support only per port stats */ - if (port_num == 0) + if ((is_switchdev && port_num) || (!is_switchdev && !port_num)) return NULL; - return rdma_alloc_hw_stats_struct(port->cnts.names, - port->cnts.num_q_counters + - port->cnts.num_cong_counters + - port->cnts.num_ext_ppcnt_counters, + cnts = get_counters(dev, port_num - 1); + + return rdma_alloc_hw_stats_struct(cnts->names, + cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, - struct mlx5_ib_port *port, + const struct mlx5_ib_counters *cnts, struct rdma_hw_stats *stats, u16 set_id) { @@ -5497,8 +5544,8 @@ static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, if (ret) goto free; - for (i = 0; i < port->cnts.num_q_counters; i++) { - val = *(__be32 *)(out + port->cnts.offsets[i]); + for (i = 0; i < cnts->num_q_counters; i++) { + val = *(__be32 *)(out + cnts->offsets[i]); stats->value[i] = (u64)be32_to_cpu(val); } @@ -5508,10 +5555,10 @@ free: } static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, - struct mlx5_ib_port *port, - struct rdma_hw_stats *stats) + const struct mlx5_ib_counters *cnts, + struct rdma_hw_stats *stats) { - int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters; + int offset = cnts->num_q_counters + cnts->num_cong_counters; int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); int ret, i; void *out; @@ -5524,12 +5571,10 @@ static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, if (ret) goto free; - for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) { + for (i = 0; i < cnts->num_ext_ppcnt_counters; i++) stats->value[i + offset] = be64_to_cpup((__be64 *)(out + - port->cnts.offsets[i + offset])); - } - + cnts->offsets[i + offset])); free: kvfree(out); return ret; @@ -5540,7 +5585,7 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, u8 port_num, int index) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_port *port = &dev->port[port_num - 1]; + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); struct mlx5_core_dev *mdev; int ret, num_counters; u8 mdev_port_num; @@ -5548,18 +5593,17 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, if (!stats) return -EINVAL; - num_counters = port->cnts.num_q_counters + - port->cnts.num_cong_counters + - port->cnts.num_ext_ppcnt_counters; + num_counters = cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; /* q_counters are per IB device, query the master mdev */ - ret = mlx5_ib_query_q_counters(dev->mdev, port, stats, - port->cnts.set_id); + ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id); if (ret) return ret; if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - ret = mlx5_ib_query_ext_ppcnt_counters(dev, port, stats); + ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats); if (ret) return ret; } @@ -5576,10 +5620,10 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, } ret = mlx5_lag_query_cong_counters(dev->mdev, stats->value + - port->cnts.num_q_counters, - port->cnts.num_cong_counters, - port->cnts.offsets + - port->cnts.num_q_counters); + cnts->num_q_counters, + cnts->num_cong_counters, + cnts->offsets + + cnts->num_q_counters); mlx5_ib_put_native_port_mdev(dev, port_num); if (ret) @@ -5594,20 +5638,22 @@ static struct rdma_hw_stats * mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) { struct mlx5_ib_dev *dev = to_mdev(counter->device); - struct mlx5_ib_port *port = &dev->port[counter->port - 1]; + const struct mlx5_ib_counters *cnts = + get_counters(dev, counter->port - 1); /* Q counters are in the beginning of all counters */ - return rdma_alloc_hw_stats_struct(port->cnts.names, - port->cnts.num_q_counters, + return rdma_alloc_hw_stats_struct(cnts->names, + cnts->num_q_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) { struct mlx5_ib_dev *dev = to_mdev(counter->device); - struct mlx5_ib_port *port = &dev->port[counter->port - 1]; + const struct mlx5_ib_counters *cnts = + get_counters(dev, counter->port - 1); - return mlx5_ib_query_q_counters(dev->mdev, port, + return mlx5_ib_query_q_counters(dev->mdev, cnts, counter->stats, counter->id); } @@ -5784,7 +5830,6 @@ static void init_delay_drop(struct mlx5_ib_dev *dev) mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n"); } -/* The mlx5_ib_multiport_mutex should be held when calling this function */ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) { @@ -5794,6 +5839,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, int err; int i; + lockdep_assert_held(&mlx5_ib_multiport_mutex); + mlx5_ib_cleanup_cong_debugfs(ibdev, port_num); spin_lock(&port->mp.mpi_lock); @@ -5802,13 +5849,12 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, return; } - if (mpi->mdev_events.notifier_call) - mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events); - mpi->mdev_events.notifier_call = NULL; - mpi->ibdev = NULL; spin_unlock(&port->mp.mpi_lock); + if (mpi->mdev_events.notifier_call) + mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events); + mpi->mdev_events.notifier_call = NULL; mlx5_remove_netdev_notifier(ibdev, port_num); spin_lock(&port->mp.mpi_lock); @@ -5844,13 +5890,14 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, ibdev->port[port_num].roce.last_port_state = IB_PORT_DOWN; } -/* The mlx5_ib_multiport_mutex should be held when calling this function */ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) { u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; int err; + lockdep_assert_held(&mlx5_ib_multiport_mutex); + spin_lock(&ibdev->port[port_num].mp.mpi_lock); if (ibdev->port[port_num].mp.mpi) { mlx5_ib_dbg(ibdev, "port %d already affiliated.\n", @@ -6097,8 +6144,6 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { - struct mlx5_core_dev *mdev = dev->mdev; - mlx5_ib_cleanup_multiport_master(dev); if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { srcu_barrier(&dev->mr_srcu); @@ -6106,29 +6151,11 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) } WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); - - WARN_ON(dev->dm.steering_sw_icm_alloc_blocks && - !bitmap_empty( - dev->dm.steering_sw_icm_alloc_blocks, - BIT(MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size) - - MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)))); - - kfree(dev->dm.steering_sw_icm_alloc_blocks); - - WARN_ON(dev->dm.header_modify_sw_icm_alloc_blocks && - !bitmap_empty(dev->dm.header_modify_sw_icm_alloc_blocks, - BIT(MLX5_CAP_DEV_MEM( - mdev, log_header_modify_sw_icm_size) - - MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)))); - - kfree(dev->dm.header_modify_sw_icm_alloc_blocks); } static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; - u64 header_modify_icm_blocks = 0; - u64 steering_icm_blocks = 0; int err; int i; @@ -6140,6 +6167,8 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->port[i].roce.last_port_state = IB_PORT_DOWN; } + mlx5_ib_internal_fill_odp_caps(dev); + err = mlx5_ib_init_multiport_master(dev); if (err) return err; @@ -6173,51 +6202,17 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); - if (MLX5_CAP_GEN_64(mdev, general_obj_types) & - MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) { - if (MLX5_CAP64_DEV_MEM(mdev, steering_sw_icm_start_address)) { - steering_icm_blocks = - BIT(MLX5_CAP_DEV_MEM(mdev, - log_steering_sw_icm_size) - - MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)); - - dev->dm.steering_sw_icm_alloc_blocks = - kcalloc(BITS_TO_LONGS(steering_icm_blocks), - sizeof(unsigned long), GFP_KERNEL); - if (!dev->dm.steering_sw_icm_alloc_blocks) - goto err_mp; - } - - if (MLX5_CAP64_DEV_MEM(mdev, - header_modify_sw_icm_start_address)) { - header_modify_icm_blocks = BIT( - MLX5_CAP_DEV_MEM( - mdev, log_header_modify_sw_icm_size) - - MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)); - - dev->dm.header_modify_sw_icm_alloc_blocks = - kcalloc(BITS_TO_LONGS(header_modify_icm_blocks), - sizeof(unsigned long), GFP_KERNEL); - if (!dev->dm.header_modify_sw_icm_alloc_blocks) - goto err_dm; - } - } - spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { err = init_srcu_struct(&dev->mr_srcu); if (err) - goto err_dm; + goto err_mp; } return 0; -err_dm: - kfree(dev->dm.steering_sw_icm_alloc_blocks); - kfree(dev->dm.header_modify_sw_icm_alloc_blocks); - err_mp: mlx5_ib_cleanup_multiport_master(dev); @@ -6564,8 +6559,6 @@ static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) { - mlx5_ib_internal_fill_odp_caps(dev); - return mlx5_ib_odp_init_one(dev); } @@ -6933,7 +6926,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->port = kcalloc(num_ports, sizeof(*dev->port), GFP_KERNEL); if (!dev->port) { - ib_dealloc_device((struct ib_device *)dev); + ib_dealloc_device(&dev->ib_dev); return NULL; } @@ -6960,6 +6953,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) mlx5_ib_unbind_slave_port(mpi->ibdev, mpi); list_del(&mpi->list); mutex_unlock(&mlx5_ib_multiport_mutex); + kfree(mpi); return; } diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index fe1a76d8531c..b5aece786b36 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -56,18 +56,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, struct scatterlist *sg; int entry; - if (umem->is_odp) { - unsigned int page_shift = to_ib_umem_odp(umem)->page_shift; - - *ncont = ib_umem_page_count(umem); - *count = *ncont << (page_shift - PAGE_SHIFT); - *shift = page_shift; - if (order) - *order = ilog2(roundup_pow_of_two(*ncont)); - - return; - } - addr = addr >> PAGE_SHIFT; tmp = (unsigned long)addr; m = find_first_bit(&tmp, BITS_PER_LONG); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index c482f19958b3..2ceaef3ea3fb 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -200,6 +200,7 @@ struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS]; struct mlx5_ib_flow_prio fdb; + struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT]; struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. @@ -481,6 +482,7 @@ struct mlx5_umr_wr { u64 length; int access_flags; u32 mkey; + u8 ignore_free_state:1; }; static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr) @@ -867,7 +869,10 @@ struct mlx5_ib_flow_action { struct { struct mlx5_ib_dev *dev; u32 sub_type; - u32 action_id; + union { + struct mlx5_modify_hdr *modify_hdr; + struct mlx5_pkt_reformat *pkt_reformat; + }; } flow_action_raw; }; }; @@ -880,8 +885,6 @@ struct mlx5_dm { */ spinlock_t lock; DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES); - unsigned long *steering_sw_icm_alloc_blocks; - unsigned long *header_modify_sw_icm_alloc_blocks; }; struct mlx5_read_counters_attr { @@ -1474,4 +1477,19 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, bool dyn_bfreg); int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); +u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num); + +static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, + bool do_modify_atomic) +{ + if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) + return false; + + if (do_modify_atomic && + MLX5_CAP_GEN(dev->mdev, atomic) && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) + return false; + + return true; +} #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 20ece6e0b2fc..1eff031ef048 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -51,22 +51,12 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); -static bool umr_can_modify_entity_size(struct mlx5_ib_dev *dev) -{ - return !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled); -} static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) { return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); } -static bool use_umr(struct mlx5_ib_dev *dev, int order) -{ - return order <= mr_cache_max_order(dev) && - umr_can_modify_entity_size(dev); -} - static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); @@ -545,13 +535,16 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return; c = order2idx(dev, mr->order); - if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { - mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); - return; - } + WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES); - if (unreg_umr(dev, mr)) + if (unreg_umr(dev, mr)) { + mr->allocated_from_cache = false; + destroy_mkey(dev, mr); + ent = &cache->ent[c]; + if (ent->cur < ent->limit) + queue_work(cache->wq, &ent->work); return; + } ent = &cache->ent[c]; spin_lock_irq(&ent->lock); @@ -791,19 +784,37 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, int *ncont, int *order) { struct ib_umem *u; - int err; *umem = NULL; - u = ib_umem_get(udata, start, length, access_flags, 0); - err = PTR_ERR_OR_ZERO(u); - if (err) { - mlx5_ib_dbg(dev, "umem get failed (%d)\n", err); - return err; + if (access_flags & IB_ACCESS_ON_DEMAND) { + struct ib_umem_odp *odp; + + odp = ib_umem_odp_get(udata, start, length, access_flags); + if (IS_ERR(odp)) { + mlx5_ib_dbg(dev, "umem get failed (%ld)\n", + PTR_ERR(odp)); + return PTR_ERR(odp); + } + + u = &odp->umem; + + *page_shift = odp->page_shift; + *ncont = ib_umem_odp_num_pages(odp); + *npages = *ncont << (*page_shift - PAGE_SHIFT); + if (order) + *order = ilog2(roundup_pow_of_two(*ncont)); + } else { + u = ib_umem_get(udata, start, length, access_flags, 0); + if (IS_ERR(u)) { + mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); + return PTR_ERR(u); + } + + mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, + page_shift, ncont, order); } - mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, - page_shift, ncont, order); if (!*npages) { mlx5_ib_warn(dev, "avoid zero region\n"); ib_umem_release(u); @@ -1268,7 +1279,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr = NULL; - bool populate_mtts = false; + bool use_umr; struct ib_umem *umem; int page_shift; int npages; @@ -1300,29 +1311,28 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err < 0) return ERR_PTR(err); - if (use_umr(dev, order)) { + use_umr = mlx5_ib_can_use_umr(dev, true); + + if (order <= mr_cache_max_order(dev) && use_umr) { mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, page_shift, order, access_flags); if (PTR_ERR(mr) == -EAGAIN) { mlx5_ib_dbg(dev, "cache empty for order %d\n", order); mr = NULL; } - populate_mtts = false; } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { if (access_flags & IB_ACCESS_ON_DEMAND) { err = -EINVAL; pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n"); goto error; } - populate_mtts = true; + use_umr = false; } if (!mr) { - if (!umr_can_modify_entity_size(dev)) - populate_mtts = true; mutex_lock(&dev->slow_path_mutex); mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags, populate_mtts); + page_shift, access_flags, !use_umr); mutex_unlock(&dev->slow_path_mutex); } @@ -1338,7 +1348,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, update_odp_mr(mr); - if (!populate_mtts) { + if (use_umr) { int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; if (access_flags & IB_ACCESS_ON_DEMAND) @@ -1373,9 +1383,11 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return 0; umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | - MLX5_IB_SEND_UMR_FAIL_IF_FREE; + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; umrwr.wr.opcode = MLX5_IB_WR_UMR; + umrwr.pd = dev->umrc.pd; umrwr.mkey = mr->mmkey.key; + umrwr.ignore_free_state = 1; return mlx5_ib_post_send_wait(dev, &umrwr); } @@ -1452,7 +1464,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) { + if (!mlx5_ib_can_use_umr(dev, true) || + (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) { /* * UMR can't be used - MKey needs to be replaced. */ @@ -1577,10 +1590,10 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mr->sig = NULL; } - mlx5_free_priv_descs(mr); - - if (!allocated_from_cache) + if (!allocated_from_cache) { destroy_mkey(dev, mr); + mlx5_free_priv_descs(mr); + } } static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) @@ -1604,7 +1617,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Wait for all running page-fault handlers to finish. */ synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ - if (umem_odp->page_list) + if (!umem_odp->is_implicit_odp) mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); @@ -1615,7 +1628,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) * so that there will not be any invalidations in * flight, looking at the *mr struct. */ - ib_umem_release(umem); + ib_umem_odp_release(umem_odp); atomic_sub(npages, &dev->mdev->priv.reg_pages); /* Avoid double-freeing the umem. */ diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 5b642d81e617..2e9b43061797 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -184,7 +184,7 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, for (i = 0; i < nentries; i++, pklm++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); va = (offset + i) * MLX5_IMR_MTT_SIZE; - if (odp && odp->umem.address == va) { + if (odp && ib_umem_start(odp) == va) { struct mlx5_ib_mr *mtt = odp->private; pklm->key = cpu_to_be32(mtt->ibmr.lkey); @@ -206,7 +206,7 @@ static void mr_leaf_free_action(struct work_struct *work) mr->parent = NULL; synchronize_srcu(&mr->dev->mr_srcu); - ib_umem_release(&odp->umem); + ib_umem_odp_release(odp); if (imr->live) mlx5_ib_update_xlt(imr, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | @@ -246,7 +246,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, * overwrite the same MTTs. Concurent invalidations might race us, * but they will write 0s as well, so no difference in the end result. */ - + mutex_lock(&umem_odp->umem_mutex); for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) { idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; /* @@ -278,6 +278,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, idx - blk_start_idx + 1, 0, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); + mutex_unlock(&umem_odp->umem_mutex); /* * We are now sure that the device will not access the * memory. We can safely unmap it, and mark it as dirty if @@ -300,7 +301,8 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) memset(caps, 0, sizeof(*caps)); - if (!MLX5_CAP_GEN(dev->mdev, pg)) + if (!MLX5_CAP_GEN(dev->mdev, pg) || + !mlx5_ib_can_use_umr(dev, true)) return; caps->general_caps = IB_ODP_SUPPORT; @@ -354,7 +356,8 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) && MLX5_CAP_GEN(dev->mdev, null_mkey) && - MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) + MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && + !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled)) caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT; return; @@ -383,7 +386,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, } static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, - struct ib_umem *umem, + struct ib_umem_odp *umem_odp, bool ksm, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -401,7 +404,7 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, mr->dev = dev; mr->access_flags = access_flags; mr->mmkey.iova = 0; - mr->umem = umem; + mr->umem = &umem_odp->umem; if (ksm) { err = mlx5_ib_update_xlt(mr, 0, @@ -461,18 +464,17 @@ next_mr: if (nentries) nentries++; } else { - odp = ib_alloc_odp_umem(odp_mr, addr, - MLX5_IMR_MTT_SIZE); + odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE); if (IS_ERR(odp)) { mutex_unlock(&odp_mr->umem_mutex); return ERR_CAST(odp); } - mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0, + mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0, mr->access_flags); if (IS_ERR(mtt)) { mutex_unlock(&odp_mr->umem_mutex); - ib_umem_release(&odp->umem); + ib_umem_odp_release(odp); return ERR_CAST(mtt); } @@ -494,7 +496,7 @@ next_mr: addr += MLX5_IMR_MTT_SIZE; if (unlikely(addr < io_virt + bcnt)) { odp = odp_next(odp); - if (odp && odp->umem.address != addr) + if (odp && ib_umem_start(odp) != addr) odp = NULL; goto next_mr; } @@ -518,19 +520,19 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, int access_flags) { struct mlx5_ib_mr *imr; - struct ib_umem *umem; + struct ib_umem_odp *umem_odp; - umem = ib_umem_get(udata, 0, 0, access_flags, 0); - if (IS_ERR(umem)) - return ERR_CAST(umem); + umem_odp = ib_umem_odp_alloc_implicit(udata, access_flags); + if (IS_ERR(umem_odp)) + return ERR_CAST(umem_odp); - imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags); + imr = implicit_mr_alloc(&pd->ibpd, umem_odp, 1, access_flags); if (IS_ERR(imr)) { - ib_umem_release(umem); + ib_umem_odp_release(umem_odp); return ERR_CAST(imr); } - imr->umem = umem; + imr->umem = &umem_odp->umem; init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); @@ -538,34 +540,31 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, return imr; } -static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end, - void *cookie) +void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { - struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie; - - if (mr->parent != imr) - return 0; - - ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); + struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr); + struct rb_node *node; - if (umem_odp->dying) - return 0; + down_read(&per_mm->umem_rwsem); + for (node = rb_first_cached(&per_mm->umem_tree); node; + node = rb_next(node)) { + struct ib_umem_odp *umem_odp = + rb_entry(node, struct ib_umem_odp, interval_tree.rb); + struct mlx5_ib_mr *mr = umem_odp->private; - WRITE_ONCE(umem_odp->dying, 1); - atomic_inc(&imr->num_leaf_free); - schedule_work(&umem_odp->work); + if (mr->parent != imr) + continue; - return 0; -} + ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); -void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) -{ - struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr); + if (umem_odp->dying) + continue; - down_read(&per_mm->umem_rwsem); - rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0, ULLONG_MAX, - mr_leaf_free, true, imr); + WRITE_ONCE(umem_odp->dying, 1); + atomic_inc(&imr->num_leaf_free); + schedule_work(&umem_odp->work); + } up_read(&per_mm->umem_rwsem); wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); @@ -578,7 +577,6 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, u32 flags) { int npages = 0, current_seq, page_shift, ret, np; - bool implicit = false; struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; @@ -587,13 +585,12 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, struct ib_umem_odp *odp; size_t size; - if (!odp_mr->page_list) { + if (odp_mr->is_implicit_odp) { odp = implicit_mr_get_data(mr, io_virt, bcnt); if (IS_ERR(odp)) return PTR_ERR(odp); mr = odp->private; - implicit = true; } else { odp = odp_mr; } @@ -606,7 +603,7 @@ next_mr: start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; access_mask = ODP_READ_ALLOWED_BIT; - if (prefetch && !downgrade && !mr->umem->writable) { + if (prefetch && !downgrade && !odp->umem.writable) { /* prefetch with write-access must * be supported by the MR */ @@ -614,7 +611,7 @@ next_mr: goto out; } - if (mr->umem->writable && !downgrade) + if (odp->umem.writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; current_seq = READ_ONCE(odp->notifiers_seq); @@ -624,8 +621,8 @@ next_mr: */ smp_rmb(); - ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size, - access_mask, current_seq); + ret = ib_umem_odp_map_dma_pages(odp, io_virt, size, access_mask, + current_seq); if (ret < 0) goto out; @@ -633,8 +630,7 @@ next_mr: np = ret; mutex_lock(&odp->umem_mutex); - if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem), - current_seq)) { + if (!ib_umem_mmu_notifier_retry(odp, current_seq)) { /* * No need to check whether the MTTs really belong to * this MR, since ib_umem_odp_map_dma_pages already @@ -667,7 +663,7 @@ next_mr: io_virt += size; next = odp_next(odp); - if (unlikely(!next || next->umem.address != io_virt)) { + if (unlikely(!next || ib_umem_start(next) != io_virt)) { mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n", io_virt, next); return -EAGAIN; @@ -681,19 +677,15 @@ next_mr: out: if (ret == -EAGAIN) { - if (implicit || !odp->dying) { - unsigned long timeout = - msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT); - - if (!wait_for_completion_timeout( - &odp->notifier_completion, - timeout)) { - mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", - current_seq, odp->notifiers_seq, odp->notifiers_count); - } - } else { - /* The MR is being killed, kill the QP as well. */ - ret = -EFAULT; + unsigned long timeout = msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT); + + if (!wait_for_completion_timeout(&odp->notifier_completion, + timeout)) { + mlx5_ib_warn( + dev, + "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", + current_seq, odp->notifiers_seq, + odp->notifiers_count); } } @@ -990,17 +982,6 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, return ret < 0 ? ret : npages; } -static const u32 mlx5_ib_odp_opcode_cap[] = { - [MLX5_OPCODE_SEND] = IB_ODP_SUPPORT_SEND, - [MLX5_OPCODE_SEND_IMM] = IB_ODP_SUPPORT_SEND, - [MLX5_OPCODE_SEND_INVAL] = IB_ODP_SUPPORT_SEND, - [MLX5_OPCODE_RDMA_WRITE] = IB_ODP_SUPPORT_WRITE, - [MLX5_OPCODE_RDMA_WRITE_IMM] = IB_ODP_SUPPORT_WRITE, - [MLX5_OPCODE_RDMA_READ] = IB_ODP_SUPPORT_READ, - [MLX5_OPCODE_ATOMIC_CS] = IB_ODP_SUPPORT_ATOMIC, - [MLX5_OPCODE_ATOMIC_FA] = IB_ODP_SUPPORT_ATOMIC, -}; - /* * Parse initiator WQE. Advances the wqe pointer to point at the * scatter-gather list, and set wqe_end to the end of the WQE. @@ -1011,12 +992,8 @@ static int mlx5_ib_mr_initiator_pfault_handler( { struct mlx5_wqe_ctrl_seg *ctrl = *wqe; u16 wqe_index = pfault->wqe.wqe_index; - u32 transport_caps; struct mlx5_base_av *av; unsigned ds, opcode; -#if defined(DEBUG) - u32 ctrl_wqe_index, ctrl_qpn; -#endif u32 qpn = qp->trans_qp.base.mqp.qpn; ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; @@ -1032,58 +1009,17 @@ static int mlx5_ib_mr_initiator_pfault_handler( return -EFAULT; } -#if defined(DEBUG) - ctrl_wqe_index = (be32_to_cpu(ctrl->opmod_idx_opcode) & - MLX5_WQE_CTRL_WQE_INDEX_MASK) >> - MLX5_WQE_CTRL_WQE_INDEX_SHIFT; - if (wqe_index != ctrl_wqe_index) { - mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n", - wqe_index, qpn, - ctrl_wqe_index); - return -EFAULT; - } - - ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >> - MLX5_WQE_CTRL_QPN_SHIFT; - if (qpn != ctrl_qpn) { - mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n", - wqe_index, qpn, - ctrl_qpn); - return -EFAULT; - } -#endif /* DEBUG */ - *wqe_end = *wqe + ds * MLX5_WQE_DS_UNITS; *wqe += sizeof(*ctrl); opcode = be32_to_cpu(ctrl->opmod_idx_opcode) & MLX5_WQE_CTRL_OPCODE_MASK; - switch (qp->ibqp.qp_type) { - case IB_QPT_XRC_INI: + if (qp->ibqp.qp_type == IB_QPT_XRC_INI) *wqe += sizeof(struct mlx5_wqe_xrc_seg); - transport_caps = dev->odp_caps.per_transport_caps.xrc_odp_caps; - break; - case IB_QPT_RC: - transport_caps = dev->odp_caps.per_transport_caps.rc_odp_caps; - break; - case IB_QPT_UD: - transport_caps = dev->odp_caps.per_transport_caps.ud_odp_caps; - break; - default: - mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport 0x%x\n", - qp->ibqp.qp_type); - return -EFAULT; - } - if (unlikely(opcode >= ARRAY_SIZE(mlx5_ib_odp_opcode_cap) || - !(transport_caps & mlx5_ib_odp_opcode_cap[opcode]))) { - mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode 0x%x\n", - opcode); - return -EFAULT; - } - - if (qp->ibqp.qp_type == IB_QPT_UD) { + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->qp_sub_type == MLX5_IB_QPT_DCI) { av = *wqe; if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) *wqe += sizeof(struct mlx5_av); @@ -1146,19 +1082,6 @@ static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev, return -EFAULT; } - switch (qp->ibqp.qp_type) { - case IB_QPT_RC: - if (!(dev->odp_caps.per_transport_caps.rc_odp_caps & - IB_ODP_SUPPORT_RECV)) - goto invalid_transport_or_opcode; - break; - default: -invalid_transport_or_opcode: - mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport. transport: 0x%x\n", - qp->ibqp.qp_type); - return -EFAULT; - } - *wqe_end = wqe + wqe_size; return 0; @@ -1208,7 +1131,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, { bool sq = pfault->type & MLX5_PFAULT_REQUESTOR; u16 wqe_index = pfault->wqe.wqe_index; - void *wqe = NULL, *wqe_end = NULL; + void *wqe, *wqe_start = NULL, *wqe_end = NULL; u32 bytes_mapped, total_wqe_bytes; struct mlx5_core_rsc_common *res; int resume_with_error = 1; @@ -1229,12 +1152,13 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, goto resolve_page_fault; } - wqe = (void *)__get_free_page(GFP_KERNEL); - if (!wqe) { + wqe_start = (void *)__get_free_page(GFP_KERNEL); + if (!wqe_start) { mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n"); goto resolve_page_fault; } + wqe = wqe_start; qp = (res->res == MLX5_RES_QP) ? res_to_qp(res) : NULL; if (qp && sq) { ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE, @@ -1289,7 +1213,7 @@ resolve_page_fault: pfault->wqe.wq_num, resume_with_error, pfault->type); mlx5_core_res_put(res); - free_page((unsigned long)wqe); + free_page((unsigned long)wqe_start); } static int pages_in_range(u64 address, u32 length) @@ -1621,14 +1545,17 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) static const struct ib_device_ops mlx5_ib_dev_odp_ops = { .advise_mr = mlx5_ib_advise_mr, + .invalidate_range = mlx5_ib_invalidate_range, }; int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) { int ret = 0; - if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops); + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT)) + return ret; + + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops); if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); @@ -1638,9 +1565,6 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) } } - if (!MLX5_CAP_GEN(dev->mdev, pg)) - return ret; - ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq); return ret; @@ -1648,7 +1572,7 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev) { - if (!MLX5_CAP_GEN(dev->mdev, pg)) + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT)) return; mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq); @@ -1771,7 +1695,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *work) num_pending_prefetch_dec(to_mdev(w->pd->device), w->sg_list, w->num_sge, 0); - kfree(w); + kvfree(w); } int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, @@ -1813,7 +1737,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (valid_req) queue_work(system_unbound_wq, &work->work); else - kfree(work); + kvfree(work); srcu_read_unlock(&dev->mr_srcu, srcu_key); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 2a97619ed603..8937d72ddcf6 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1713,7 +1713,6 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); - MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); memcpy(rss_key, ucmd.rx_hash_key, len); break; } @@ -3387,19 +3386,16 @@ static int __mlx5_ib_qp_set_counter(struct ib_qp *qp, struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); struct mlx5_qp_context context = {}; - struct mlx5_ib_port *mibport = NULL; struct mlx5_ib_qp_base *base; u32 set_id; if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id)) return 0; - if (counter) { + if (counter) set_id = counter->id; - } else { - mibport = &dev->port[mqp->port - 1]; - set_id = mibport->cnts.set_id; - } + else + set_id = mlx5_ib_get_counters_id(dev, mqp->port - 1); base = &mqp->trans_qp.base; context.qp_counter_set_usr_page &= cpu_to_be32(0xffffff); @@ -3460,7 +3456,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_qp_context *context; struct mlx5_ib_pd *pd; - struct mlx5_ib_port *mibport = NULL; enum mlx5_qp_state mlx5_cur, mlx5_new; enum mlx5_qp_optpar optpar; u32 set_id = 0; @@ -3625,11 +3620,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (qp->flags & MLX5_IB_QP_UNDERLAY) port_num = 0; - mibport = &dev->port[port_num]; if (ibqp->counter) set_id = ibqp->counter->id; else - set_id = mibport->cnts.set_id; + set_id = mlx5_ib_get_counters_id(dev, port_num); context->qp_counter_set_usr_page |= cpu_to_be32(set_id << 24); } @@ -3818,6 +3812,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry); if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + u16 set_id; + required |= IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; if (!is_valid_mask(attr_mask, required, 0)) return -EINVAL; @@ -3844,7 +3840,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, } MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index); MLX5_SET(dctc, dctc, port, attr->port_num); - MLX5_SET(dctc, dctc, counter_set_id, dev->port[attr->port_num - 1].cnts.set_id); + + set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1); + MLX5_SET(dctc, dctc, counter_set_id, set_id); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { struct mlx5_ib_modify_qp_resp resp = {}; @@ -4163,7 +4161,7 @@ static u64 get_xlt_octo(u64 bytes) MLX5_IB_UMR_OCTOWORD; } -static __be64 frwr_mkey_mask(void) +static __be64 frwr_mkey_mask(bool atomic) { u64 result; @@ -4176,10 +4174,12 @@ static __be64 frwr_mkey_mask(void) MLX5_MKEY_MASK_LW | MLX5_MKEY_MASK_RR | MLX5_MKEY_MASK_RW | - MLX5_MKEY_MASK_A | MLX5_MKEY_MASK_SMALL_FENCE | MLX5_MKEY_MASK_FREE; + if (atomic) + result |= MLX5_MKEY_MASK_A; + return cpu_to_be64(result); } @@ -4205,7 +4205,7 @@ static __be64 sig_mkey_mask(void) } static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, - struct mlx5_ib_mr *mr, u8 flags) + struct mlx5_ib_mr *mr, u8 flags, bool atomic) { int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; @@ -4213,7 +4213,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, umr->flags = flags; umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); - umr->mkey_mask = frwr_mkey_mask(); + umr->mkey_mask = frwr_mkey_mask(atomic); } static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) @@ -4295,10 +4295,14 @@ static int set_reg_umr_segment(struct mlx5_ib_dev *dev, memset(umr, 0, sizeof(*umr)); - if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) - umr->flags = MLX5_UMR_CHECK_FREE; /* fail if free */ - else - umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */ + if (!umrwr->ignore_free_state) { + if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) + /* fail if free */ + umr->flags = MLX5_UMR_CHECK_FREE; + else + /* fail if not free */ + umr->flags = MLX5_UMR_CHECK_NOT_FREE; + } umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size)); if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) { @@ -4808,10 +4812,22 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); + struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; + bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC; u8 flags = 0; + if (!mlx5_ib_can_use_umr(dev, atomic)) { + mlx5_ib_warn(to_mdev(qp->ibqp.device), + "Fast update of %s for MR is disabled\n", + (MLX5_CAP_GEN(dev->mdev, + umr_modify_entity_size_disabled)) ? + "entity size" : + "atomic access"); + return -EINVAL; + } + if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { mlx5_ib_warn(to_mdev(qp->ibqp.device), "Invalid IB_SEND_INLINE send flag\n"); @@ -4823,7 +4839,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, if (umr_inline) flags |= MLX5_UMR_INLINE; - set_reg_umr_seg(*seg, mr, flags); + set_reg_umr_seg(*seg, mr, flags, atomic); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); @@ -6328,11 +6344,13 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, } if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) { + u16 set_id; + + set_id = mlx5_ib_get_counters_id(dev, 0); if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) { MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID); - MLX5_SET(rqc, rqc, counter_set_id, - dev->port->cnts.set_id); + MLX5_SET(rqc, rqc, counter_set_id, set_id); } else dev_info_once( &dev->ib_dev.dev, diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index b0d0687c7a68..8fc3630a9d4c 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -86,7 +86,7 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) xa_lock(&table->array); srq = xa_load(&table->array, srqn); if (srq) - atomic_inc(&srq->common.refcount); + refcount_inc(&srq->common.refcount); xa_unlock(&table->array); return srq; @@ -592,7 +592,7 @@ int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, if (err) return err; - atomic_set(&srq->common.refcount, 1); + refcount_set(&srq->common.refcount, 1); init_completion(&srq->common.free); err = xa_err(xa_store_irq(&table->array, srq->srqn, srq, GFP_KERNEL)); @@ -675,7 +675,7 @@ static int srq_event_notifier(struct notifier_block *nb, xa_lock(&table->array); srq = xa_load(&table->array, srqn); if (srq) - atomic_inc(&srq->common.refcount); + refcount_inc(&srq->common.refcount); xa_unlock(&table->array); if (!srq) |