diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/Kconfig | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cong.c | 15 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 15 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/devx.c | 493 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/doorbell.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 256 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mem.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 41 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 126 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 316 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 312 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq.c | 11 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq_cmd.c | 16 |
15 files changed, 1185 insertions, 436 deletions
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig index 0440966bc6ec..8d651c05de62 100644 --- a/drivers/infiniband/hw/mlx5/Kconfig +++ b/drivers/infiniband/hw/mlx5/Kconfig @@ -1,7 +1,6 @@ config MLX5_INFINIBAND tristate "Mellanox 5th generation network adapters (ConnectX series) support" depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE - depends on INFINIBAND_USER_ACCESS || INFINIBAND_USER_ACCESS=n ---help--- This driver provides low-level InfiniBand support for Mellanox Connect-IB PCI Express host channel adapters (HCAs). diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index 7e4e358a4fd8..8ba439fabf7f 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -389,19 +389,19 @@ void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num) dev->port[port_num].dbg_cc_params = NULL; } -int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num) +void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num) { struct mlx5_ib_dbg_cc_params *dbg_cc_params; struct mlx5_core_dev *mdev; int i; if (!mlx5_debugfs_root) - goto out; + return; /* Takes a 1-based port number */ mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL); if (!mdev) - goto out; + return; if (!MLX5_CAP_GEN(mdev, cc_query_allowed) || !MLX5_CAP_GEN(mdev, cc_modify_allowed)) @@ -415,8 +415,6 @@ int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num) dbg_cc_params->root = debugfs_create_dir("cc_params", mdev->priv.dbg_root); - if (!dbg_cc_params->root) - goto err; for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) { dbg_cc_params->params[i].offset = i; @@ -427,14 +425,11 @@ int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num) 0600, dbg_cc_params->root, &dbg_cc_params->params[i], &dbg_cc_fops); - if (!dbg_cc_params->params[i].dentry) - goto err; } put_mdev: mlx5_ib_put_native_port_mdev(dev, port_num + 1); -out: - return 0; + return; err: mlx5_ib_warn(dev, "cong debugfs failure\n"); @@ -445,5 +440,5 @@ err: * We don't want to fail driver if debugfs failed to initialize, * so we are not forwarding error to the user. */ - return 0; + return; } diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 90f1b0bae5b5..18704e503508 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -187,8 +187,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, wqe_ctr = be16_to_cpu(cqe->wqe_counter); wc->wr_id = srq->wrid[wqe_ctr]; mlx5_ib_free_srq_wqe(srq, wqe_ctr); - if (msrq && atomic_dec_and_test(&msrq->refcount)) - complete(&msrq->free); + if (msrq) + mlx5_core_res_put(&msrq->common); } } else { wq = &qp->rq; @@ -707,15 +707,15 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, *cqe_size = ucmd.cqe_size; - cq->buf.umem = ib_umem_get(context, ucmd.buf_addr, - entries * ucmd.cqe_size, - IB_ACCESS_LOCAL_WRITE, 1); + cq->buf.umem = + ib_umem_get(udata, ucmd.buf_addr, entries * ucmd.cqe_size, + IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(cq->buf.umem)) { err = PTR_ERR(cq->buf.umem); return err; } - err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr, + err = mlx5_ib_db_map_user(to_mucontext(context), udata, ucmd.db_addr, &cq->db); if (err) goto err_umem; @@ -1111,7 +1111,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, struct ib_umem *umem; int err; int npages; - struct ib_ucontext *context = cq->buf.umem->context; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) @@ -1124,7 +1123,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1) return -EINVAL; - umem = ib_umem_get(context, ucmd.buf_addr, + umem = ib_umem_get(udata, ucmd.buf_addr, (size_t)ucmd.cqe_size * entries, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(umem)) { diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 5a588f3cfb1b..9e08df7914aa 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -8,6 +8,7 @@ #include <rdma/uverbs_types.h> #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/mlx5_user_ioctl_verbs.h> #include <rdma/ib_umem.h> #include <rdma/uverbs_std_types.h> #include <linux/mlx5/driver.h> @@ -17,12 +18,32 @@ #define UVERBS_MODULE_NAME mlx5_ib #include <rdma/uverbs_named_ioctl.h> +enum devx_obj_flags { + DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0, + DEVX_OBJ_FLAGS_DCT = 1 << 1, +}; + +struct devx_async_data { + struct mlx5_ib_dev *mdev; + struct list_head list; + struct ib_uobject *fd_uobj; + struct mlx5_async_work cb_work; + u16 cmd_out_len; + /* must be last field in this structure */ + struct mlx5_ib_uapi_devx_async_cmd_hdr hdr; +}; + #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) struct devx_obj { struct mlx5_core_dev *mdev; u64 obj_id; u32 dinlen; /* destroy inbox length */ u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; + u32 flags; + union { + struct mlx5_ib_devx_mr devx_mr; + struct mlx5_core_dct core_dct; + }; }; struct devx_umem { @@ -330,7 +351,6 @@ static u64 devx_get_obj_id(const void *in) obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, MLX5_GET(arm_rq_in, in, srq_number)); break; - case MLX5_CMD_OP_DRAIN_DCT: case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT, MLX5_GET(drain_dct_in, in, dctn)); @@ -601,7 +621,6 @@ static bool devx_is_obj_modify_cmd(const void *in) case MLX5_CMD_OP_2RST_QP: case MLX5_CMD_OP_ARM_XRC_SRQ: case MLX5_CMD_OP_ARM_RQ: - case MLX5_CMD_OP_DRAIN_DCT: case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: case MLX5_CMD_OP_ARM_XRQ: case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: @@ -1011,6 +1030,92 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, } } +static int devx_handle_mkey_indirect(struct devx_obj *obj, + struct mlx5_ib_dev *dev, + void *in, void *out) +{ + struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table; + struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr; + unsigned long flags; + struct mlx5_core_mkey *mkey; + void *mkc; + u8 key; + int err; + + mkey = &devx_mr->mmkey; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + key = MLX5_GET(mkc, mkc, mkey_7_0); + mkey->key = mlx5_idx_to_mkey( + MLX5_GET(create_mkey_out, out, mkey_index)) | key; + mkey->type = MLX5_MKEY_INDIRECT_DEVX; + mkey->iova = MLX5_GET64(mkc, mkc, start_addr); + mkey->size = MLX5_GET64(mkc, mkc, len); + mkey->pd = MLX5_GET(mkc, mkc, pd); + devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); + + write_lock_irqsave(&table->lock, flags); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), + mkey); + write_unlock_irqrestore(&table->lock, flags); + return err; +} + +static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, + struct devx_obj *obj, + void *in, int in_len) +{ + int min_len = MLX5_BYTE_OFF(create_mkey_in, memory_key_mkey_entry) + + MLX5_FLD_SZ_BYTES(create_mkey_in, + memory_key_mkey_entry); + void *mkc; + u8 access_mode; + + if (in_len < min_len) + return -EINVAL; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + access_mode = MLX5_GET(mkc, mkc, access_mode_1_0); + access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2; + + if (access_mode == MLX5_MKC_ACCESS_MODE_KLMS || + access_mode == MLX5_MKC_ACCESS_MODE_KSM) { + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + obj->flags |= DEVX_OBJ_FLAGS_INDIRECT_MKEY; + return 0; + } + + MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1); + return 0; +} + +static void devx_free_indirect_mkey(struct rcu_head *rcu) +{ + kfree(container_of(rcu, struct devx_obj, devx_mr.rcu)); +} + +/* This function to delete from the radix tree needs to be called before + * destroying the underlying mkey. Otherwise a race might occur in case that + * other thread will get the same mkey before this one will be deleted, + * in that case it will fail via inserting to the tree its own data. + * + * Note: + * An error in the destroy is not expected unless there is some other indirect + * mkey which points to this one. In a kernel cleanup flow it will be just + * destroyed in the iterative destruction call. In a user flow, in case + * the application didn't close in the expected order it's its own problem, + * the mkey won't be part of the tree, in both cases the kernel is safe. + */ +static void devx_cleanup_mkey(struct devx_obj *obj) +{ + struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table; + unsigned long flags; + + write_lock_irqsave(&table->lock, flags); + radix_tree_delete(&table->tree, mlx5_base_mkey(obj->devx_mr.mmkey.key)); + write_unlock_irqrestore(&table->lock, flags); +} + static int devx_obj_cleanup(struct ib_uobject *uobject, enum rdma_remove_reason why) { @@ -1018,10 +1123,25 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, struct devx_obj *obj = uobject->object; int ret; - ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) + devx_cleanup_mkey(obj); + + if (obj->flags & DEVX_OBJ_FLAGS_DCT) + ret = mlx5_core_destroy_dct(obj->mdev, &obj->core_dct); + else + ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, + sizeof(out)); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + struct mlx5_ib_dev *dev = to_mdev(uobject->context->device); + + call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu, + devx_free_indirect_mkey); + return ret; + } + kfree(obj); return ret; } @@ -1032,10 +1152,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT); + int cmd_in_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); void *cmd_out; struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE); - struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; struct devx_obj *obj; @@ -1060,11 +1183,25 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( return -ENOMEM; MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); - devx_set_umem_valid(cmd_in); + if (opcode == MLX5_CMD_OP_CREATE_MKEY) { + err = devx_handle_mkey_create(dev, obj, cmd_in, cmd_in_len); + if (err) + goto obj_free; + } else { + devx_set_umem_valid(cmd_in); + } + + if (opcode == MLX5_CMD_OP_CREATE_DCT) { + obj->flags |= DEVX_OBJ_FLAGS_DCT; + err = mlx5_core_create_dct(dev->mdev, &obj->core_dct, + cmd_in, cmd_in_len, + cmd_out, cmd_out_len); + } else { + err = mlx5_cmd_exec(dev->mdev, cmd_in, + cmd_in_len, + cmd_out, cmd_out_len); + } - err = mlx5_cmd_exec(dev->mdev, cmd_in, - uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN), - cmd_out, cmd_out_len); if (err) goto obj_free; @@ -1074,15 +1211,28 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( &obj_id); WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); + if (err) + goto obj_destroy; + } + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); if (err) - goto obj_destroy; + goto err_copy; obj->obj_id = get_enc_obj_id(opcode, obj_id); return 0; +err_copy: + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) + devx_cleanup_mkey(obj); obj_destroy: - mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); + if (obj->flags & DEVX_OBJ_FLAGS_DCT) + mlx5_core_destroy_dct(obj->mdev, &obj->core_dct); + else + mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, + sizeof(out)); obj_free: kfree(obj); return err; @@ -1096,8 +1246,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT); struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE); - struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); - struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device); void *cmd_out; int err; int uid; @@ -1137,11 +1288,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT); struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE); - struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); void *cmd_out; int err; int uid; - struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device); uid = devx_get_uid(c, cmd_in); if (uid < 0) @@ -1168,6 +1320,154 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( cmd_out, cmd_out_len); } +struct devx_async_event_queue { + spinlock_t lock; + wait_queue_head_t poll_wait; + struct list_head event_list; + atomic_t bytes_in_use; + u8 is_destroyed:1; +}; + +struct devx_async_cmd_event_file { + struct ib_uobject uobj; + struct devx_async_event_queue ev_queue; + struct mlx5_async_ctx async_ctx; +}; + +static void devx_init_event_queue(struct devx_async_event_queue *ev_queue) +{ + spin_lock_init(&ev_queue->lock); + INIT_LIST_HEAD(&ev_queue->event_list); + init_waitqueue_head(&ev_queue->poll_wait); + atomic_set(&ev_queue->bytes_in_use, 0); + ev_queue->is_destroyed = 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)( + struct uverbs_attr_bundle *attrs) +{ + struct devx_async_cmd_event_file *ev_file; + + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE); + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + + ev_file = container_of(uobj, struct devx_async_cmd_event_file, + uobj); + devx_init_event_queue(&ev_file->ev_queue); + mlx5_cmd_init_async_ctx(mdev->mdev, &ev_file->async_ctx); + return 0; +} + +static void devx_query_callback(int status, struct mlx5_async_work *context) +{ + struct devx_async_data *async_data = + container_of(context, struct devx_async_data, cb_work); + struct ib_uobject *fd_uobj = async_data->fd_uobj; + struct devx_async_cmd_event_file *ev_file; + struct devx_async_event_queue *ev_queue; + unsigned long flags; + + ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file, + uobj); + ev_queue = &ev_file->ev_queue; + + spin_lock_irqsave(&ev_queue->lock, flags); + list_add_tail(&async_data->list, &ev_queue->event_list); + spin_unlock_irqrestore(&ev_queue->lock, flags); + + wake_up_interruptible(&ev_queue->poll_wait); + fput(fd_uobj->object); +} + +#define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */ + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)( + struct uverbs_attr_bundle *attrs) +{ + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN); + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE); + u16 cmd_out_len; + struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + struct ib_uobject *fd_uobj; + int err; + int uid; + struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device); + struct devx_async_cmd_event_file *ev_file; + struct devx_async_data *async_data; + + uid = devx_get_uid(c, cmd_in); + if (uid < 0) + return uid; + + if (!devx_is_obj_query_cmd(cmd_in)) + return -EINVAL; + + err = uverbs_get_const(&cmd_out_len, attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN); + if (err) + return err; + + if (!devx_is_valid_obj_id(uobj, cmd_in)) + return -EINVAL; + + fd_uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD); + if (IS_ERR(fd_uobj)) + return PTR_ERR(fd_uobj); + + ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file, + uobj); + + if (atomic_add_return(cmd_out_len, &ev_file->ev_queue.bytes_in_use) > + MAX_ASYNC_BYTES_IN_USE) { + atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use); + return -EAGAIN; + } + + async_data = kvzalloc(struct_size(async_data, hdr.out_data, + cmd_out_len), GFP_KERNEL); + if (!async_data) { + err = -ENOMEM; + goto sub_bytes; + } + + err = uverbs_copy_from(&async_data->hdr.wr_id, attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID); + if (err) + goto free_async; + + async_data->cmd_out_len = cmd_out_len; + async_data->mdev = mdev; + async_data->fd_uobj = fd_uobj; + + get_file(fd_uobj->object); + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); + err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in, + uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN), + async_data->hdr.out_data, + async_data->cmd_out_len, + devx_query_callback, &async_data->cb_work); + + if (err) + goto cb_err; + + return 0; + +cb_err: + fput(fd_uobj->object); +free_async: + kvfree(async_data); +sub_bytes: + atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use); + return err; +} + static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, struct uverbs_attr_bundle *attrs, struct devx_umem *obj) @@ -1195,7 +1495,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - obj->umem = ib_umem_get(ucontext, addr, size, access, 0); + obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access, 0); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); @@ -1252,7 +1552,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE); u32 obj_id; - struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); int err; @@ -1313,6 +1614,123 @@ static int devx_umem_cleanup(struct ib_uobject *uobject, return 0; } +static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct devx_async_cmd_event_file *comp_ev_file = filp->private_data; + struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue; + struct devx_async_data *event; + int ret = 0; + size_t eventsz; + + spin_lock_irq(&ev_queue->lock); + + while (list_empty(&ev_queue->event_list)) { + spin_unlock_irq(&ev_queue->lock); + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible( + ev_queue->poll_wait, + (!list_empty(&ev_queue->event_list) || + ev_queue->is_destroyed))) { + return -ERESTARTSYS; + } + + if (list_empty(&ev_queue->event_list) && + ev_queue->is_destroyed) + return -EIO; + + spin_lock_irq(&ev_queue->lock); + } + + event = list_entry(ev_queue->event_list.next, + struct devx_async_data, list); + eventsz = event->cmd_out_len + + sizeof(struct mlx5_ib_uapi_devx_async_cmd_hdr); + + if (eventsz > count) { + spin_unlock_irq(&ev_queue->lock); + return -ENOSPC; + } + + list_del(ev_queue->event_list.next); + spin_unlock_irq(&ev_queue->lock); + + if (copy_to_user(buf, &event->hdr, eventsz)) + ret = -EFAULT; + else + ret = eventsz; + + atomic_sub(event->cmd_out_len, &ev_queue->bytes_in_use); + kvfree(event); + return ret; +} + +static int devx_async_cmd_event_close(struct inode *inode, struct file *filp) +{ + struct ib_uobject *uobj = filp->private_data; + struct devx_async_cmd_event_file *comp_ev_file = container_of( + uobj, struct devx_async_cmd_event_file, uobj); + struct devx_async_data *entry, *tmp; + + spin_lock_irq(&comp_ev_file->ev_queue.lock); + list_for_each_entry_safe(entry, tmp, + &comp_ev_file->ev_queue.event_list, list) + kvfree(entry); + spin_unlock_irq(&comp_ev_file->ev_queue.lock); + + uverbs_close_fd(filp); + return 0; +} + +static __poll_t devx_async_cmd_event_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct devx_async_cmd_event_file *comp_ev_file = filp->private_data; + struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue; + __poll_t pollflags = 0; + + poll_wait(filp, &ev_queue->poll_wait, wait); + + spin_lock_irq(&ev_queue->lock); + if (ev_queue->is_destroyed) + pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; + else if (!list_empty(&ev_queue->event_list)) + pollflags = EPOLLIN | EPOLLRDNORM; + spin_unlock_irq(&ev_queue->lock); + + return pollflags; +} + +const struct file_operations devx_async_cmd_event_fops = { + .owner = THIS_MODULE, + .read = devx_async_cmd_event_read, + .poll = devx_async_cmd_event_poll, + .release = devx_async_cmd_event_close, + .llseek = no_llseek, +}; + +static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + struct devx_async_cmd_event_file *comp_ev_file = + container_of(uobj, struct devx_async_cmd_event_file, + uobj); + struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue; + + spin_lock_irq(&ev_queue->lock); + ev_queue->is_destroyed = 1; + spin_unlock_irq(&ev_queue->lock); + + if (why == RDMA_REMOVE_DRIVER_REMOVE) + wake_up_interruptible(&ev_queue->poll_wait); + + mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx); + return 0; +}; + DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_UMEM_REG, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE, @@ -1423,6 +1841,27 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), UA_MANDATORY)); +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, + UVERBS_IDR_ANY_OBJECT, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN, + u16, UA_MANDATORY), + UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD, + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY)); + DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR), @@ -1433,13 +1872,30 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY), - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY)); + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)); DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC, + UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE, + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UVERBS_ACCESS_NEW, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file), + devx_hot_unplug_async_cmd_event_file, + &devx_async_cmd_event_fops, "[devx_async_cmd]", + O_RDONLY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)); + static bool devx_is_supported(struct ib_device *device) { struct mlx5_ib_dev *dev = to_mdev(device); @@ -1457,5 +1913,8 @@ const struct uapi_definition mlx5_ib_devx_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_DEVX_UMEM, UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), {}, }; diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index a0e4e6ddb71a..8f4e5f22b84c 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -43,7 +43,8 @@ struct mlx5_ib_user_db_page { int refcnt; }; -int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, +int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, + struct ib_udata *udata, unsigned long virt, struct mlx5_db *db) { struct mlx5_ib_user_db_page *page; @@ -63,8 +64,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK, - PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 4700cffb5a00..b8639ac71336 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -57,7 +57,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) else profile = &vf_rep_profile; - ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev)); + ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!ibdev) return -ENOMEM; @@ -65,8 +65,10 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) ibdev->mdev = dev; ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports), MLX5_CAP_GEN(dev, num_vhca_ports)); - if (!__mlx5_ib_add(ibdev, profile)) + if (!__mlx5_ib_add(ibdev, profile)) { + ib_dealloc_device(&ibdev->ib_dev); return -EINVAL; + } rep->rep_if[REP_IB].priv = ibdev; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 581ae11e2fc9..531ff20b32ad 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -415,10 +415,17 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, *active_speed = IB_SPEED_EDR; break; case MLX5E_PROT_MASK(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2): + *active_width = IB_WIDTH_2X; + *active_speed = IB_SPEED_EDR; + break; case MLX5E_PROT_MASK(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR): *active_width = IB_WIDTH_1X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_CAUI_4_100GBASE_CR4_KR4): + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_EDR; + break; case MLX5E_PROT_MASK(MLX5E_100GAUI_2_100GBASE_CR2_KR2): *active_width = IB_WIDTH_2X; *active_speed = IB_SPEED_HDR; @@ -535,24 +542,51 @@ out: return err; } +struct mlx5_ib_vlan_info { + u16 vlan_id; + bool vlan; +}; + +static int get_lower_dev_vlan(struct net_device *lower_dev, void *data) +{ + struct mlx5_ib_vlan_info *vlan_info = data; + + if (is_vlan_dev(lower_dev)) { + vlan_info->vlan = true; + vlan_info->vlan_id = vlan_dev_vlan_id(lower_dev); + } + /* We are interested only in first level vlan device, so + * always return 1 to stop iterating over next level devices. + */ + return 1; +} + static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr) { enum ib_gid_type gid_type = IB_GID_TYPE_IB; + struct mlx5_ib_vlan_info vlan_info = { }; u8 roce_version = 0; u8 roce_l3_type = 0; - bool vlan = false; u8 mac[ETH_ALEN]; - u16 vlan_id = 0; if (gid) { gid_type = attr->gid_type; ether_addr_copy(mac, attr->ndev->dev_addr); if (is_vlan_dev(attr->ndev)) { - vlan = true; - vlan_id = vlan_dev_vlan_id(attr->ndev); + vlan_info.vlan = true; + vlan_info.vlan_id = vlan_dev_vlan_id(attr->ndev); + } else { + /* If the netdev is upper device and if it's lower + * lower device is vlan device, consider vlan id of + * the lower vlan device for this gid entry. + */ + rcu_read_lock(); + netdev_walk_all_lower_dev_rcu(attr->ndev, + get_lower_dev_vlan, &vlan_info); + rcu_read_unlock(); } } @@ -573,8 +607,9 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, } return mlx5_core_roce_gid_set(dev->mdev, index, roce_version, - roce_l3_type, gid->raw, mac, vlan, - vlan_id, port_num); + roce_l3_type, gid->raw, mac, + vlan_info.vlan, vlan_info.vlan_id, + port_num); } static int mlx5_ib_add_gid(const struct ib_gid_attr *attr, @@ -982,11 +1017,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(mdev, pg)) - props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; - props->odp_caps = dev->odp_caps; -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + if (MLX5_CAP_GEN(mdev, pg)) + props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; + props->odp_caps = dev->odp_caps; + } if (MLX5_CAP_GEN(mdev, cd)) props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; @@ -1717,14 +1752,15 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn, mlx5_ib_disable_lb(dev, true, false); } -static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, + struct ib_udata *udata) { + struct ib_device *ibdev = uctx->device; struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_alloc_ucontext_req_v2 req = {}; struct mlx5_ib_alloc_ucontext_resp resp = {}; struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_ib_ucontext *context; + struct mlx5_ib_ucontext *context = to_mucontext(uctx); struct mlx5_bfreg_info *bfregi; int ver; int err; @@ -1734,29 +1770,29 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, bool lib_uar_4k; if (!dev->ib_active) - return ERR_PTR(-EAGAIN); + return -EAGAIN; if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) ver = 0; else if (udata->inlen >= min_req_v2) ver = 2; else - return ERR_PTR(-EINVAL); + return -EINVAL; err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); if (err) - return ERR_PTR(err); + return err; if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; req.total_num_bfregs = ALIGN(req.total_num_bfregs, MLX5_NON_FP_BFREGS_PER_UAR); if (req.num_low_latency_bfregs > req.total_num_bfregs - 1) - return ERR_PTR(-EINVAL); + return -EINVAL; resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) @@ -1789,10 +1825,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ } - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); - lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; bfregi = &context->bfregi; @@ -1822,9 +1854,9 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (err) goto out_sys_pages; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; -#endif + if (ibdev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING) + context->ibucontext.invalidate_range = + &mlx5_ib_invalidate_range; if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { err = mlx5_ib_devx_create(dev, true); @@ -1927,7 +1959,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, 1, &dev->roce[port].tx_port_affinity)); } - return &context->ibucontext; + return 0; out_mdev: mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); @@ -1945,23 +1977,19 @@ out_count: kfree(bfregi->count); out_ctx: - kfree(context); - - return ERR_PTR(err); + return err; } -static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); struct mlx5_bfreg_info *bfregi; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* All umem's must be destroyed before destroying the ucontext. */ mutex_lock(&ibcontext->per_mm_list_lock); WARN_ON(!list_empty(&ibcontext->per_mm_list)); mutex_unlock(&ibcontext->per_mm_list_lock); -#endif bfregi = &context->bfregi; mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); @@ -1972,9 +2000,6 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) deallocate_uars(dev, context); kfree(bfregi->sys_pages); kfree(bfregi->count); - kfree(context); - - return 0; } static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, @@ -2313,30 +2338,24 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm) return 0; } -static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { + struct mlx5_ib_pd *pd = to_mpd(ibpd); + struct ib_device *ibdev = ibpd->device; struct mlx5_ib_alloc_pd_resp resp; - struct mlx5_ib_pd *pd; int err; u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; u16 uid = 0; - pd = kmalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - uid = context ? to_mucontext(context)->devx_uid : 0; MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); MLX5_SET(alloc_pd_in, in, uid, uid); err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in), out, sizeof(out)); - if (err) { - kfree(pd); - return ERR_PTR(err); - } + if (err) + return err; pd->pdn = MLX5_GET(alloc_pd_out, out, pd); pd->uid = uid; @@ -2344,23 +2363,19 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, resp.pdn = pd->pdn; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid); - kfree(pd); - return ERR_PTR(-EFAULT); + return -EFAULT; } } - return &pd->ibpd; + return 0; } -static int mlx5_ib_dealloc_pd(struct ib_pd *pd) +static void mlx5_ib_dealloc_pd(struct ib_pd *pd) { struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); - kfree(mpd); - - return 0; } enum { @@ -2394,10 +2409,29 @@ static u8 get_match_criteria_enable(u32 *match_criteria) return match_criteria_enable; } -static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) +static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) { - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); + u8 entry_mask; + u8 entry_val; + int err = 0; + + if (!mask) + goto out; + + entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c, + ip_protocol); + entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v, + ip_protocol); + if (!entry_mask) { + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); + goto out; + } + /* Don't override existing ip protocol */ + if (mask != entry_mask || val != entry_val) + err = -EINVAL; +out: + return err; } static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, @@ -2631,8 +2665,10 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, set_tos(headers_c, headers_v, ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); - set_proto(headers_c, headers_v, - ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto); + if (set_proto(headers_c, headers_v, + ib_spec->ipv4.mask.proto, + ib_spec->ipv4.val.proto)) + return -EINVAL; break; case IB_FLOW_SPEC_IPV6: if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) @@ -2671,9 +2707,10 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, ib_spec->ipv6.mask.traffic_class, ib_spec->ipv6.val.traffic_class); - set_proto(headers_c, headers_v, - ib_spec->ipv6.mask.next_hdr, - ib_spec->ipv6.val.next_hdr); + if (set_proto(headers_c, headers_v, + ib_spec->ipv6.mask.next_hdr, + ib_spec->ipv6.val.next_hdr)) + return -EINVAL; set_flow_label(misc_params_c, misc_params_v, ntohl(ib_spec->ipv6.mask.flow_label), @@ -2694,10 +2731,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, LAST_TCP_UDP_FIELD)) return -EOPNOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, - 0xff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, - IPPROTO_TCP); + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP)) + return -EINVAL; MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, ntohs(ib_spec->tcp_udp.mask.src_port)); @@ -2714,10 +2749,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, LAST_TCP_UDP_FIELD)) return -EOPNOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, - 0xff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, - IPPROTO_UDP); + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP)) + return -EINVAL; MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(ib_spec->tcp_udp.mask.src_port)); @@ -2733,6 +2766,9 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, if (ib_spec->gre.mask.c_ks_res0_ver) return -EOPNOTSUPP; + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE)) + return -EINVAL; + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 0xff); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, @@ -3884,7 +3920,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); - dst = kzalloc(sizeof(*dst) * 2, GFP_KERNEL); + dst = kcalloc(2, sizeof(*dst), GFP_KERNEL); if (!dst) return ERR_PTR(-ENOMEM); @@ -4165,7 +4201,7 @@ static ssize_t fw_pages_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); } @@ -4175,7 +4211,7 @@ static ssize_t reg_pages_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); } @@ -4185,7 +4221,8 @@ static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); + return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -4194,7 +4231,8 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); + return sprintf(buf, "%x\n", dev->mdev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -4203,7 +4241,8 @@ static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); + return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, dev->mdev->board_id); } @@ -4689,23 +4728,28 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) { struct ib_srq_init_attr attr; struct mlx5_ib_dev *dev; + struct ib_device *ibdev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; int port; int ret = 0; dev = container_of(devr, struct mlx5_ib_dev, devr); + ibdev = &dev->ib_dev; mutex_init(&devr->mutex); - devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); - if (IS_ERR(devr->p0)) { - ret = PTR_ERR(devr->p0); - goto error0; - } - devr->p0->device = &dev->ib_dev; + devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd); + if (!devr->p0) + return -ENOMEM; + + devr->p0->device = ibdev; devr->p0->uobject = NULL; atomic_set(&devr->p0->usecnt, 0); + ret = mlx5_ib_alloc_pd(devr->p0, NULL, NULL); + if (ret) + goto error0; + devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL); if (IS_ERR(devr->c0)) { ret = PTR_ERR(devr->c0); @@ -4803,6 +4847,7 @@ error2: error1: mlx5_ib_dealloc_pd(devr->p0); error0: + kfree(devr->p0); return ret; } @@ -4818,6 +4863,7 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) mlx5_ib_dealloc_xrcd(devr->x1); mlx5_ib_destroy_cq(devr->c0); mlx5_ib_dealloc_pd(devr->p0); + kfree(devr->p0); /* Make sure no change P_Key work items are still executing */ for (port = 0; port < dev->num_ports; ++port) @@ -5567,9 +5613,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port; mlx5_notifier_register(mpi->mdev, &mpi->mdev_events); - err = mlx5_ib_init_cong_debugfs(ibdev, port_num); - if (err) - goto unbind; + mlx5_ib_init_cong_debugfs(ibdev, port_num); return true; @@ -5781,11 +5825,10 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - cleanup_srcu_struct(&dev->mr_srcu); - drain_workqueue(dev->advise_mr_wq); - destroy_workqueue(dev->advise_mr_wq); -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + srcu_barrier(&dev->mr_srcu); + cleanup_srcu_struct(&dev->mr_srcu); + } kfree(dev->port); } @@ -5838,19 +5881,11 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->memic.memic_lock); dev->memic.dev = mdev; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - dev->advise_mr_wq = alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0); - if (!dev->advise_mr_wq) { - err = -ENOMEM; - goto err_mp; - } - - err = init_srcu_struct(&dev->mr_srcu); - if (err) { - destroy_workqueue(dev->advise_mr_wq); - goto err_mp; + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + err = init_srcu_struct(&dev->mr_srcu); + if (err) + goto err_mp; } -#endif return 0; err_mp: @@ -5947,6 +5982,8 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext), }; static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = { @@ -6213,7 +6250,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) return mlx5_ib_odp_init_one(dev); } -void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_odp_cleanup_one(dev); } @@ -6242,8 +6279,9 @@ void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev) { - return mlx5_ib_init_cong_debugfs(dev, - mlx5_core_native_port_num(dev->mdev) - 1); + mlx5_ib_init_cong_debugfs(dev, + mlx5_core_native_port_num(dev->mdev) - 1); + return 0; } static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev) @@ -6293,7 +6331,7 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) name = "mlx5_%d"; else name = "mlx5_bond_%d"; - return ib_register_device(&dev->ib_dev, name, NULL); + return ib_register_device(&dev->ib_dev, name); } void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) @@ -6550,7 +6588,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) return mlx5_ib_add_slave_port(mdev); - dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); + dev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!dev) return NULL; diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 549234988bb4..9f90be296ee0 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -111,7 +111,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, *count = i; } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING static u64 umem_dma_to_mtt(dma_addr_t umem_dma) { u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK; @@ -123,7 +122,6 @@ static u64 umem_dma_to_mtt(dma_addr_t umem_dma) return mtt_entry; } -#endif /* * Populate the given array with bus addresses from the umem. @@ -151,7 +149,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int len; struct scatterlist *sg; int entry; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (umem->is_odp) { WARN_ON(shift != 0); WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)); @@ -164,7 +162,6 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, } return; } -#endif i = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index eedba0d2ec4b..4a617d78eae1 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -36,6 +36,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <rdma/ib_verbs.h> +#include <rdma/ib_umem.h> #include <rdma/ib_smi.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/cq.h> @@ -588,14 +589,27 @@ struct mlx5_ib_mr { atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; struct mlx5_async_work cb_work; + atomic_t num_pending_prefetch; }; +static inline bool is_odp_mr(struct mlx5_ib_mr *mr) +{ + return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && + mr->umem->is_odp; +} + struct mlx5_ib_mw { struct ib_mw ibmw; struct mlx5_core_mkey mmkey; int ndescs; }; +struct mlx5_ib_devx_mr { + struct mlx5_core_mkey mmkey; + int ndescs; + struct rcu_head rcu; +}; + struct mlx5_ib_umr_context { struct ib_cqe cqe; enum ib_wc_status status; @@ -624,7 +638,6 @@ struct mlx5_cache_ent { spinlock_t lock; - struct dentry *dir; char name[4]; u32 order; u32 xlt; @@ -636,11 +649,6 @@ struct mlx5_cache_ent { u32 miss; u32 limit; - struct dentry *fsize; - struct dentry *fcur; - struct dentry *fmiss; - struct dentry *flimit; - struct mlx5_ib_dev *dev; struct work_struct work; struct delayed_work dwork; @@ -912,7 +920,6 @@ struct mlx5_ib_dev { /* Prevents soft lock on massive reg MRs */ struct mutex slow_path_mutex; int fill_delay; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_odp_caps odp_caps; u64 odp_max_size; struct mlx5_ib_pf_eq odp_pf_eq; @@ -923,8 +930,6 @@ struct mlx5_ib_dev { */ struct srcu_struct mr_srcu; u32 null_mkey; - struct workqueue_struct *advise_mr_wq; -#endif struct mlx5_ib_flow_db *flow_db; /* protect resources needed as part of reset flow */ spinlock_t reset_flow_resource_lock; @@ -1034,7 +1039,8 @@ to_mflow_act(struct ib_flow_action *ibact) return container_of(ibact, struct mlx5_ib_flow_action, ib_action); } -int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, +int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, + struct ib_udata *udata, unsigned long virt, struct mlx5_db *db); void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db); void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); @@ -1069,9 +1075,12 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); -int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, - void *buffer, u32 length, - struct mlx5_ib_qp_base *base); +int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, + int buflen, size_t *bc); +int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, + int buflen, size_t *bc); +int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, + void *buffer, int buflen, size_t *bc); struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, @@ -1097,6 +1106,7 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags); struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, + struct ib_udata *udata, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, @@ -1214,6 +1224,9 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { return -EOPNOTSUPP; } +static inline void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, + unsigned long start, + unsigned long end){}; #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ /* Needed for rep profile */ @@ -1253,7 +1266,7 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, const struct ib_gid_attr *attr); void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); -int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); +void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); /* GSI QP helper functions */ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index bf2b6ea23851..c85f00255884 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -71,10 +71,9 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - /* Wait until all page fault handlers using the mr complete. */ - synchronize_srcu(&dev->mr_srcu); -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + /* Wait until all page fault handlers using the mr complete. */ + synchronize_srcu(&dev->mr_srcu); return err; } @@ -95,10 +94,9 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING static void update_odp_mr(struct mlx5_ib_mr *mr) { - if (mr->umem->is_odp) { + if (is_odp_mr(mr)) { /* * This barrier prevents the compiler from moving the * setting of umem->odp_data->private to point to our @@ -121,7 +119,6 @@ static void update_odp_mr(struct mlx5_ib_mr *mr) smp_wmb(); } } -#endif static void reg_mr_callback(int status, struct mlx5_async_work *context) { @@ -257,9 +254,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + synchronize_srcu(&dev->mr_srcu); list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { list_del(&mr->list); @@ -611,52 +607,27 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) dev->cache.root = NULL; } -static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) +static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; + struct dentry *dir; int i; if (!mlx5_debugfs_root || dev->rep) - return 0; + return; cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); - if (!cache->root) - return -ENOMEM; for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { ent = &cache->ent[i]; sprintf(ent->name, "%d", ent->order); - ent->dir = debugfs_create_dir(ent->name, cache->root); - if (!ent->dir) - goto err; - - ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, - &size_fops); - if (!ent->fsize) - goto err; - - ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, - &limit_fops); - if (!ent->flimit) - goto err; - - ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, - &ent->cur); - if (!ent->fcur) - goto err; - - ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, - &ent->miss); - if (!ent->fmiss) - goto err; + dir = debugfs_create_dir(ent->name, cache->root); + debugfs_create_file("size", 0600, dir, ent, &size_fops); + debugfs_create_file("limit", 0600, dir, ent, &limit_fops); + debugfs_create_u32("cur", 0400, dir, &ent->cur); + debugfs_create_u32("miss", 0600, dir, &ent->miss); } - - return 0; -err: - mlx5_mr_cache_debugfs_cleanup(dev); - - return -ENOMEM; } static void delay_time_func(struct timer_list *t) @@ -670,7 +641,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; - int err; int i; mutex_init(&dev->slow_path_mutex); @@ -715,14 +685,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) queue_work(cache->wq, &ent->work); } - err = mlx5_mr_cache_debugfs_init(dev); - if (err) - mlx5_ib_warn(dev, "cache debugfs failure\n"); - - /* - * We don't want to fail driver if debugfs failed to initialize, - * so we are not forwarding error to the user. - */ + mlx5_mr_cache_debugfs_init(dev); return 0; } @@ -822,18 +785,17 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev) return MLX5_MAX_UMR_SHIFT; } -static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, - int access_flags, struct ib_umem **umem, - int *npages, int *page_shift, int *ncont, - int *order) +static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, + u64 start, u64 length, int access_flags, + struct ib_umem **umem, int *npages, int *page_shift, + int *ncont, int *order) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); struct ib_umem *u; int err; *umem = NULL; - u = ib_umem_get(pd->uobject->context, start, length, access_flags, 0); + u = ib_umem_get(udata, start, length, access_flags, 0); err = PTR_ERR_OR_ZERO(u); if (err) { mlx5_ib_dbg(dev, "umem get failed (%d)\n", err); @@ -1306,21 +1268,20 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (!start && length == U64_MAX) { + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && + length == U64_MAX) { if (!(access_flags & IB_ACCESS_ON_DEMAND) || !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) return ERR_PTR(-EINVAL); - mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags); + mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); if (IS_ERR(mr)) return ERR_CAST(mr); return &mr->ibmr; } -#endif - err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, - &page_shift, &ncont, &order); + err = mr_umem_get(dev, udata, start, length, access_flags, &umem, + &npages, &page_shift, &ncont, &order); if (err < 0) return ERR_PTR(err); @@ -1361,9 +1322,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->umem = umem; set_mr_fields(dev, mr, npages, length, access_flags); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); -#endif if (!populate_mtts) { int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; @@ -1380,9 +1339,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - mr->live = 1; -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + mr->live = 1; + atomic_set(&mr->num_pending_prefetch, 0); + } + return &mr->ibmr; error: ib_umem_release(umem); @@ -1470,8 +1431,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, flags |= IB_MR_REREG_TRANS; ib_umem_release(mr->umem); mr->umem = NULL; - err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, - &npages, &page_shift, &ncont, &order); + err = mr_umem_get(dev, udata, addr, len, access_flags, + &mr->umem, &npages, &page_shift, &ncont, + &order); if (err) goto err; } @@ -1497,9 +1459,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } mr->allocated_from_cache = 0; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - mr->live = 1; -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + mr->live = 1; } else { /* * Send a UMR WQE @@ -1528,9 +1489,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, set_mr_fields(dev, mr, npages, len, access_flags); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); -#endif return 0; err: @@ -1616,12 +1575,19 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) int npages = mr->npages; struct ib_umem *umem = mr->umem; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (umem && umem->is_odp) { + if (is_odp_mr(mr)) { struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem); - /* Prevent new page faults from succeeding */ + /* Prevent new page faults and + * prefetch requests from succeeding + */ mr->live = 0; + + /* dequeue pending prefetch requests for the mr */ + if (atomic_read(&mr->num_pending_prefetch)) + flush_workqueue(system_unbound_wq); + WARN_ON(atomic_read(&mr->num_pending_prefetch)); + /* Wait for all running page-fault handlers to finish. */ synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ @@ -1641,7 +1607,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Avoid double-freeing the umem. */ umem = NULL; } -#endif + clean_mr(dev, mr); /* diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 4ee32964e1dd..c20bfc41ecf1 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -101,9 +101,9 @@ static int check_parent(struct ib_umem_odp *odp, return mr && mr->parent == parent && !odp->dying; } -struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr) +static struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr) { - if (WARN_ON(!mr || !mr->umem || !mr->umem->is_odp)) + if (WARN_ON(!mr || !is_odp_mr(mr))) return NULL; return to_ib_umem_odp(mr->umem)->per_mm; @@ -315,6 +315,9 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.send)) caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND; + if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.srq_receive)) + caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.send)) caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND; @@ -330,6 +333,27 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic)) caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.srq_receive)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.send)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SEND; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.receive)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_RECV; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.write)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_WRITE; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.read)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_READ; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.atomic)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.srq_receive)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; + if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) && MLX5_CAP_GEN(dev->mdev, null_mkey) && MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) @@ -439,7 +463,7 @@ next_mr: if (nentries) nentries++; } else { - odp = ib_alloc_odp_umem(odp_mr->per_mm, addr, + odp = ib_alloc_odp_umem(odp_mr, addr, MLX5_IMR_MTT_SIZE); if (IS_ERR(odp)) { mutex_unlock(&odp_mr->umem_mutex); @@ -492,13 +516,13 @@ next_mr: } struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, + struct ib_udata *udata, int access_flags) { - struct ib_ucontext *ctx = pd->ibpd.uobject->context; struct mlx5_ib_mr *imr; struct ib_umem *umem; - umem = ib_umem_get(ctx, 0, 0, IB_ACCESS_ON_DEMAND, 0); + umem = ib_umem_get(udata, 0, 0, access_flags, 0); if (IS_ERR(umem)) return ERR_CAST(umem); @@ -511,6 +535,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->umem = umem; init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); + atomic_set(&imr->num_pending_prefetch, 0); return imr; } @@ -685,6 +710,21 @@ struct pf_frame { int depth; }; +static int get_indirect_num_descs(struct mlx5_core_mkey *mmkey) +{ + struct mlx5_ib_mw *mw; + struct mlx5_ib_devx_mr *devx_mr; + + if (mmkey->type == MLX5_MKEY_MW) { + mw = container_of(mmkey, struct mlx5_ib_mw, mmkey); + return mw->ndescs; + } + + devx_mr = container_of(mmkey, struct mlx5_ib_devx_mr, + mmkey); + return devx_mr->ndescs; +} + /* * Handle a single data segment in a page-fault WQE or RDMA region. * @@ -696,7 +736,8 @@ struct pf_frame { * -EFAULT when there's an error mapping the requested pages. The caller will * abort the page fault handling. */ -static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key, +static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, + struct ib_pd *pd, u32 key, u64 io_virt, size_t bcnt, u32 *bytes_committed, u32 *bytes_mapped, u32 flags) @@ -705,11 +746,11 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key, bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mw *mw; struct mlx5_ib_mr *mr; struct mlx5_klm *pklm; u32 *out = NULL; size_t offset; + int ndescs; srcu_key = srcu_read_lock(&dev->mr_srcu); @@ -739,12 +780,18 @@ next_mr: goto srcu_unlock; } - if (prefetch && !mr->umem->is_odp) { - ret = -EINVAL; - goto srcu_unlock; + if (prefetch) { + if (!is_odp_mr(mr) || + mr->ibmr.pd != pd) { + mlx5_ib_dbg(dev, "Invalid prefetch request: %s\n", + is_odp_mr(mr) ? "MR is not ODP" : + "PD is not of the MR"); + ret = -EINVAL; + goto srcu_unlock; + } } - if (!mr->umem->is_odp) { + if (!is_odp_mr(mr)) { mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", key); if (bytes_mapped) @@ -762,7 +809,8 @@ next_mr: break; case MLX5_MKEY_MW: - mw = container_of(mmkey, struct mlx5_ib_mw, mmkey); + case MLX5_MKEY_INDIRECT_DEVX: + ndescs = get_indirect_num_descs(mmkey); if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) { mlx5_ib_dbg(dev, "indirection level exceeded\n"); @@ -771,7 +819,7 @@ next_mr: } outlen = MLX5_ST_SZ_BYTES(query_mkey_out) + - sizeof(*pklm) * (mw->ndescs - 2); + sizeof(*pklm) * (ndescs - 2); if (outlen > cur_outlen) { kfree(out); @@ -786,14 +834,14 @@ next_mr: pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out, bsf0_klm0_pas_mtt0_1); - ret = mlx5_core_query_mkey(dev->mdev, &mw->mmkey, out, outlen); + ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen); if (ret) goto srcu_unlock; offset = io_virt - MLX5_GET64(query_mkey_out, out, memory_key_mkey_entry.start_addr); - for (i = 0; bcnt && i < mw->ndescs; i++, pklm++) { + for (i = 0; bcnt && i < ndescs; i++, pklm++) { if (offset >= be32_to_cpu(pklm->bcount)) { offset -= be32_to_cpu(pklm->bcount); continue; @@ -853,7 +901,6 @@ srcu_unlock: /** * Parse a series of data segments for page fault handling. * - * @qp the QP on which the fault occurred. * @pfault contains page fault information. * @wqe points at the first data segment in the WQE. * @wqe_end points after the end of the WQE. @@ -870,7 +917,7 @@ srcu_unlock: */ static int pagefault_data_segments(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, - struct mlx5_ib_qp *qp, void *wqe, + void *wqe, void *wqe_end, u32 *bytes_mapped, u32 *total_wqe_bytes, int receive_queue) { @@ -881,10 +928,6 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, size_t bcnt; int inline_segment; - /* Skip SRQ next-WQE segment. */ - if (receive_queue && qp->ibqp.srq) - wqe += sizeof(struct mlx5_wqe_srq_next_seg); - if (bytes_mapped) *bytes_mapped = 0; if (total_wqe_bytes) @@ -928,7 +971,8 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, continue; } - ret = pagefault_single_data_segment(dev, key, io_virt, bcnt, + ret = pagefault_single_data_segment(dev, NULL, key, + io_virt, bcnt, &pfault->bytes_committed, bytes_mapped, 0); if (ret < 0) @@ -1009,6 +1053,10 @@ static int mlx5_ib_mr_initiator_pfault_handler( MLX5_WQE_CTRL_OPCODE_MASK; switch (qp->ibqp.qp_type) { + case IB_QPT_XRC_INI: + *wqe += sizeof(struct mlx5_wqe_xrc_seg); + transport_caps = dev->odp_caps.per_transport_caps.xrc_odp_caps; + break; case IB_QPT_RC: transport_caps = dev->odp_caps.per_transport_caps.rc_odp_caps; break; @@ -1028,7 +1076,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( return -EFAULT; } - if (qp->ibqp.qp_type != IB_QPT_RC) { + if (qp->ibqp.qp_type == IB_QPT_UD) { av = *wqe; if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) *wqe += sizeof(struct mlx5_av); @@ -1053,21 +1101,34 @@ static int mlx5_ib_mr_initiator_pfault_handler( } /* - * Parse responder WQE. Advances the wqe pointer to point at the - * scatter-gather list, and set wqe_end to the end of the WQE. + * Parse responder WQE and set wqe_end to the end of the WQE. */ -static int mlx5_ib_mr_responder_pfault_handler( - struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, - struct mlx5_ib_qp *qp, void **wqe, void **wqe_end, int wqe_length) +static int mlx5_ib_mr_responder_pfault_handler_srq(struct mlx5_ib_dev *dev, + struct mlx5_ib_srq *srq, + void **wqe, void **wqe_end, + int wqe_length) { - struct mlx5_ib_wq *wq = &qp->rq; - int wqe_size = 1 << wq->wqe_shift; + int wqe_size = 1 << srq->msrq.wqe_shift; - if (qp->ibqp.srq) { - mlx5_ib_err(dev, "ODP fault on SRQ is not supported\n"); + if (wqe_size > wqe_length) { + mlx5_ib_err(dev, "Couldn't read all of the receive WQE's content\n"); return -EFAULT; } + *wqe_end = *wqe + wqe_size; + *wqe += sizeof(struct mlx5_wqe_srq_next_seg); + + return 0; +} + +static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + void *wqe, void **wqe_end, + int wqe_length) +{ + struct mlx5_ib_wq *wq = &qp->rq; + int wqe_size = 1 << wq->wqe_shift; + if (qp->wq_sig) { mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n"); return -EFAULT; @@ -1091,7 +1152,7 @@ invalid_transport_or_opcode: return -EFAULT; } - *wqe_end = *wqe + wqe_size; + *wqe_end = wqe + wqe_size; return 0; } @@ -1099,22 +1160,25 @@ invalid_transport_or_opcode: static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev, u32 wq_num, int pf_type) { - enum mlx5_res_type res_type; + struct mlx5_core_rsc_common *common = NULL; + struct mlx5_core_srq *srq; switch (pf_type) { case MLX5_WQE_PF_TYPE_RMP: - res_type = MLX5_RES_SRQ; + srq = mlx5_cmd_get_srq(dev, wq_num); + if (srq) + common = &srq->common; break; case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE: case MLX5_WQE_PF_TYPE_RESP: case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC: - res_type = MLX5_RES_QP; + common = mlx5_core_res_hold(dev->mdev, wq_num, MLX5_RES_QP); break; default: - return NULL; + break; } - return mlx5_core_res_hold(dev->mdev, wq_num, res_type); + return common; } static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res) @@ -1124,6 +1188,14 @@ static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res) return to_mibqp(mqp); } +static inline struct mlx5_ib_srq *res_to_srq(struct mlx5_core_rsc_common *res) +{ + struct mlx5_core_srq *msrq = + container_of(res, struct mlx5_core_srq, common); + + return to_mibsrq(msrq); +} + static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) { @@ -1134,8 +1206,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, int resume_with_error = 1; u16 wqe_index = pfault->wqe.wqe_index; int requestor = pfault->type & MLX5_PFAULT_REQUESTOR; - struct mlx5_core_rsc_common *res; - struct mlx5_ib_qp *qp; + struct mlx5_core_rsc_common *res = NULL; + struct mlx5_ib_qp *qp = NULL; + struct mlx5_ib_srq *srq = NULL; + size_t bytes_copied; res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type); if (!res) { @@ -1147,6 +1221,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, case MLX5_RES_QP: qp = res_to_qp(res); break; + case MLX5_RES_SRQ: + case MLX5_RES_XSRQ: + srq = res_to_srq(res); + break; default: mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type); goto resolve_page_fault; @@ -1158,9 +1236,23 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, goto resolve_page_fault; } - ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer, - PAGE_SIZE, &qp->trans_qp.base); - if (ret < 0) { + if (qp) { + if (requestor) { + ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, + buffer, PAGE_SIZE, + &bytes_copied); + } else { + ret = mlx5_ib_read_user_wqe_rq(qp, wqe_index, + buffer, PAGE_SIZE, + &bytes_copied); + } + } else { + ret = mlx5_ib_read_user_wqe_srq(srq, wqe_index, + buffer, PAGE_SIZE, + &bytes_copied); + } + + if (ret) { mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%d, wqe_index=%x, qpn=%x\n", ret, wqe_index, pfault->token); goto resolve_page_fault; @@ -1168,11 +1260,18 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, wqe = buffer; if (requestor) - ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, &wqe, - &wqe_end, ret); + ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, + &wqe, &wqe_end, + bytes_copied); + else if (qp) + ret = mlx5_ib_mr_responder_pfault_handler_rq(dev, qp, + wqe, &wqe_end, + bytes_copied); else - ret = mlx5_ib_mr_responder_pfault_handler(dev, pfault, qp, &wqe, - &wqe_end, ret); + ret = mlx5_ib_mr_responder_pfault_handler_srq(dev, srq, + &wqe, &wqe_end, + bytes_copied); + if (ret < 0) goto resolve_page_fault; @@ -1181,7 +1280,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, goto resolve_page_fault; } - ret = pagefault_data_segments(dev, pfault, qp, wqe, wqe_end, + ret = pagefault_data_segments(dev, pfault, wqe, wqe_end, &bytes_mapped, &total_wqe_bytes, !requestor); if (ret == -EAGAIN) { @@ -1240,7 +1339,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len); } - ret = pagefault_single_data_segment(dev, rkey, address, length, + ret = pagefault_single_data_segment(dev, NULL, rkey, address, length, &pfault->bytes_committed, NULL, 0); if (ret == -EAGAIN) { @@ -1267,7 +1366,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, if (prefetch_activated) { u32 bytes_committed = 0; - ret = pagefault_single_data_segment(dev, rkey, address, + ret = pagefault_single_data_segment(dev, NULL, rkey, address, prefetch_len, &bytes_committed, NULL, 0); @@ -1564,30 +1663,98 @@ int mlx5_ib_odp_init(void) struct prefetch_mr_work { struct work_struct work; - struct mlx5_ib_dev *dev; + struct ib_pd *pd; u32 pf_flags; u32 num_sge; struct ib_sge sg_list[0]; }; -static int mlx5_ib_prefetch_sg_list(struct mlx5_ib_dev *dev, u32 pf_flags, +static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev, + struct ib_sge *sg_list, u32 num_sge, + u32 from) +{ + u32 i; + int srcu_key; + + srcu_key = srcu_read_lock(&dev->mr_srcu); + + for (i = from; i < num_sge; ++i) { + struct mlx5_core_mkey *mmkey; + struct mlx5_ib_mr *mr; + + mmkey = __mlx5_mr_lookup(dev->mdev, + mlx5_base_mkey(sg_list[i].lkey)); + mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); + atomic_dec(&mr->num_pending_prefetch); + } + + srcu_read_unlock(&dev->mr_srcu, srcu_key); +} + +static bool num_pending_prefetch_inc(struct ib_pd *pd, + struct ib_sge *sg_list, u32 num_sge) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + bool ret = true; + u32 i; + + for (i = 0; i < num_sge; ++i) { + struct mlx5_core_mkey *mmkey; + struct mlx5_ib_mr *mr; + + mmkey = __mlx5_mr_lookup(dev->mdev, + mlx5_base_mkey(sg_list[i].lkey)); + if (!mmkey || mmkey->key != sg_list[i].lkey) { + ret = false; + break; + } + + if (mmkey->type != MLX5_MKEY_MR) { + ret = false; + break; + } + + mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); + + if (mr->ibmr.pd != pd) { + ret = false; + break; + } + + if (!mr->live) { + ret = false; + break; + } + + atomic_inc(&mr->num_pending_prefetch); + } + + if (!ret) + num_pending_prefetch_dec(dev, sg_list, i, 0); + + return ret; +} + +static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, u32 pf_flags, struct ib_sge *sg_list, u32 num_sge) { - int i; + u32 i; + int ret = 0; + struct mlx5_ib_dev *dev = to_mdev(pd->device); for (i = 0; i < num_sge; ++i) { struct ib_sge *sg = &sg_list[i]; int bytes_committed = 0; - int ret; - ret = pagefault_single_data_segment(dev, sg->lkey, sg->addr, + ret = pagefault_single_data_segment(dev, pd, sg->lkey, sg->addr, sg->length, &bytes_committed, NULL, pf_flags); if (ret < 0) - return ret; + break; } - return 0; + + return ret < 0 ? ret : 0; } static void mlx5_ib_prefetch_mr_work(struct work_struct *work) @@ -1595,12 +1762,14 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *work) struct prefetch_mr_work *w = container_of(work, struct prefetch_mr_work, work); - if (ib_device_try_get(&w->dev->ib_dev)) { - mlx5_ib_prefetch_sg_list(w->dev, w->pf_flags, w->sg_list, + if (ib_device_try_get(w->pd->device)) { + mlx5_ib_prefetch_sg_list(w->pd, w->pf_flags, w->sg_list, w->num_sge); - ib_device_put(&w->dev->ib_dev); + ib_device_put(w->pd->device); } - put_device(&w->dev->ib_dev.dev); + + num_pending_prefetch_dec(to_mdev(w->pd->device), w->sg_list, + w->num_sge, 0); kfree(w); } @@ -1611,12 +1780,14 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, struct mlx5_ib_dev *dev = to_mdev(pd->device); u32 pf_flags = MLX5_PF_FLAGS_PREFETCH; struct prefetch_mr_work *work; + bool valid_req; + int srcu_key; if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH) - return mlx5_ib_prefetch_sg_list(dev, pf_flags, sg_list, + return mlx5_ib_prefetch_sg_list(pd, pf_flags, sg_list, num_sge); work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL); @@ -1625,12 +1796,25 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge)); - get_device(&dev->ib_dev.dev); - work->dev = dev; + /* It is guaranteed that the pd when work is executed is the pd when + * work was queued since pd can't be destroyed while it holds MRs and + * destroying a MR leads to flushing the workquque + */ + work->pd = pd; work->pf_flags = pf_flags; work->num_sge = num_sge; INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work); - schedule_work(&work->work); - return 0; + + srcu_key = srcu_read_lock(&dev->mr_srcu); + + valid_req = num_pending_prefetch_inc(pd, sg_list, num_sge); + if (valid_req) + queue_work(system_unbound_wq, &work->work); + else + kfree(work); + + srcu_read_unlock(&dev->mr_srcu, srcu_key); + + return valid_req ? 0 : -EINVAL; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7db778d96ef5..7cd006da1dae 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -109,75 +109,173 @@ static int is_sqp(enum ib_qp_type qp_type) } /** - * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space. + * mlx5_ib_read_user_wqe_common() - Copy a WQE (or part of) from user WQ + * to kernel buffer * - * @qp: QP to copy from. - * @send: copy from the send queue when non-zero, use the receive queue - * otherwise. - * @wqe_index: index to start copying from. For send work queues, the - * wqe_index is in units of MLX5_SEND_WQE_BB. - * For receive work queue, it is the number of work queue - * element in the queue. - * @buffer: destination buffer. - * @length: maximum number of bytes to copy. + * @umem: User space memory where the WQ is + * @buffer: buffer to copy to + * @buflen: buffer length + * @wqe_index: index of WQE to copy from + * @wq_offset: offset to start of WQ + * @wq_wqe_cnt: number of WQEs in WQ + * @wq_wqe_shift: log2 of WQE size + * @bcnt: number of bytes to copy + * @bytes_copied: number of bytes to copy (return value) * - * Copies at least a single WQE, but may copy more data. + * Copies from start of WQE bcnt or less bytes. + * Does not gurantee to copy the entire WQE. * - * Return: the number of bytes copied, or an error code. + * Return: zero on success, or an error code. */ -int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, - void *buffer, u32 length, - struct mlx5_ib_qp_base *base) +static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem, + void *buffer, + u32 buflen, + int wqe_index, + int wq_offset, + int wq_wqe_cnt, + int wq_wqe_shift, + int bcnt, + size_t *bytes_copied) +{ + size_t offset = wq_offset + ((wqe_index % wq_wqe_cnt) << wq_wqe_shift); + size_t wq_end = wq_offset + (wq_wqe_cnt << wq_wqe_shift); + size_t copy_length; + int ret; + + /* don't copy more than requested, more than buffer length or + * beyond WQ end + */ + copy_length = min_t(u32, buflen, wq_end - offset); + copy_length = min_t(u32, copy_length, bcnt); + + ret = ib_umem_copy_from(buffer, umem, offset, copy_length); + if (ret) + return ret; + + if (!ret && bytes_copied) + *bytes_copied = copy_length; + + return 0; +} + +int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, + int wqe_index, + void *buffer, + int buflen, + size_t *bc) { - struct ib_device *ibdev = qp->ibqp.device; - struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq; - size_t offset; - size_t wq_end; + struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct ib_umem *umem = base->ubuffer.umem; - u32 first_copy_length; - int wqe_length; + struct mlx5_ib_wq *wq = &qp->sq; + struct mlx5_wqe_ctrl_seg *ctrl; + size_t bytes_copied; + size_t bytes_copied2; + size_t wqe_length; int ret; + int ds; - if (wq->wqe_cnt == 0) { - mlx5_ib_dbg(dev, "mlx5_ib_read_user_wqe for a QP with wqe_cnt == 0. qp_type: 0x%x\n", - qp->ibqp.qp_type); + if (buflen < sizeof(*ctrl)) return -EINVAL; - } - offset = wq->offset + ((wqe_index % wq->wqe_cnt) << wq->wqe_shift); - wq_end = wq->offset + (wq->wqe_cnt << wq->wqe_shift); + /* at first read as much as possible */ + ret = mlx5_ib_read_user_wqe_common(umem, + buffer, + buflen, + wqe_index, + wq->offset, + wq->wqe_cnt, + wq->wqe_shift, + buflen, + &bytes_copied); + if (ret) + return ret; - if (send && length < sizeof(struct mlx5_wqe_ctrl_seg)) + /* we need at least control segment size to proceed */ + if (bytes_copied < sizeof(*ctrl)) return -EINVAL; - if (offset > umem->length || - (send && offset + sizeof(struct mlx5_wqe_ctrl_seg) > umem->length)) - return -EINVAL; + ctrl = buffer; + ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; + wqe_length = ds * MLX5_WQE_DS_UNITS; + + /* if we copied enough then we are done */ + if (bytes_copied >= wqe_length) { + *bc = bytes_copied; + return 0; + } + + /* otherwise this a wrapped around wqe + * so read the remaining bytes starting + * from wqe_index 0 + */ + ret = mlx5_ib_read_user_wqe_common(umem, + buffer + bytes_copied, + buflen - bytes_copied, + 0, + wq->offset, + wq->wqe_cnt, + wq->wqe_shift, + wqe_length - bytes_copied, + &bytes_copied2); - first_copy_length = min_t(u32, offset + length, wq_end) - offset; - ret = ib_umem_copy_from(buffer, umem, offset, first_copy_length); if (ret) return ret; + *bc = bytes_copied + bytes_copied2; + return 0; +} - if (send) { - struct mlx5_wqe_ctrl_seg *ctrl = buffer; - int ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; - - wqe_length = ds * MLX5_WQE_DS_UNITS; - } else { - wqe_length = 1 << wq->wqe_shift; - } +int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, + int wqe_index, + void *buffer, + int buflen, + size_t *bc) +{ + struct mlx5_ib_qp_base *base = &qp->trans_qp.base; + struct ib_umem *umem = base->ubuffer.umem; + struct mlx5_ib_wq *wq = &qp->rq; + size_t bytes_copied; + int ret; - if (wqe_length <= first_copy_length) - return first_copy_length; + ret = mlx5_ib_read_user_wqe_common(umem, + buffer, + buflen, + wqe_index, + wq->offset, + wq->wqe_cnt, + wq->wqe_shift, + buflen, + &bytes_copied); - ret = ib_umem_copy_from(buffer + first_copy_length, umem, wq->offset, - wqe_length - first_copy_length); if (ret) return ret; + *bc = bytes_copied; + return 0; +} - return wqe_length; +int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, + int wqe_index, + void *buffer, + int buflen, + size_t *bc) +{ + struct ib_umem *umem = srq->umem; + size_t bytes_copied; + int ret; + + ret = mlx5_ib_read_user_wqe_common(umem, + buffer, + buflen, + wqe_index, + 0, + srq->msrq.max, + srq->msrq.wqe_shift, + buflen, + &bytes_copied); + + if (ret) + return ret; + *bc = bytes_copied; + return 0; } static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) @@ -435,9 +533,9 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, return -EINVAL; } - if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) { - mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n", - ucmd->sq_wqe_count, ucmd->sq_wqe_count); + if (ucmd->sq_wqe_count && !is_power_of_2(ucmd->sq_wqe_count)) { + mlx5_ib_warn(dev, "sq_wqe_count %d is not a power of two\n", + ucmd->sq_wqe_count); return -EINVAL; } @@ -645,16 +743,14 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, return bfregi->sys_pages[index_of_sys_page] + offset; } -static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, - struct ib_pd *pd, +static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, unsigned long addr, size_t size, - struct ib_umem **umem, - int *npages, int *page_shift, int *ncont, - u32 *offset) + struct ib_umem **umem, int *npages, int *page_shift, + int *ncont, u32 *offset) { int err; - *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0); + *umem = ib_umem_get(udata, addr, size, 0, 0); if (IS_ERR(*umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); return PTR_ERR(*umem); @@ -695,10 +791,11 @@ static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, } static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct mlx5_ib_rwq *rwq, + struct ib_udata *udata, struct mlx5_ib_rwq *rwq, struct mlx5_ib_create_wq *ucmd) { - struct mlx5_ib_ucontext *context; + struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); int page_shift = 0; int npages; u32 offset = 0; @@ -708,9 +805,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (!ucmd->buf_addr) return -EINVAL; - context = to_mucontext(pd->uobject->context); - rwq->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr, - rwq->buf_size, 0, 0); + rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0, 0); if (IS_ERR(rwq->umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); err = PTR_ERR(rwq->umem); @@ -735,7 +830,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, (unsigned long long)ucmd->buf_addr, rwq->buf_size, npages, page_shift, ncont, offset); - err = mlx5_ib_db_map_user(context, ucmd->db_addr, &rwq->db); + err = mlx5_ib_db_map_user(ucontext, udata, ucmd->db_addr, &rwq->db); if (err) { mlx5_ib_dbg(dev, "map failed\n"); goto err_umem; @@ -783,7 +878,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } - context = to_mucontext(pd->uobject->context); + context = rdma_udata_to_drv_context(udata, struct mlx5_ib_ucontext, + ibucontext); if (ucmd.flags & MLX5_QP_FLAG_BFREG_INDEX) { uar_index = bfregn_to_uar_index(dev, &context->bfregi, ucmd.bfreg_index, true); @@ -819,10 +915,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd.buf_addr && ubuffer->buf_size) { ubuffer->buf_addr = ucmd.buf_addr; - err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, - ubuffer->buf_size, - &ubuffer->umem, &npages, &page_shift, - &ncont, &offset); + err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, + ubuffer->buf_size, &ubuffer->umem, + &npages, &page_shift, &ncont, &offset); if (err) goto err_bfreg; } else { @@ -856,7 +951,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, resp->bfreg_index = MLX5_IB_INVALID_BFREG; qp->bfregn = bfregn; - err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db); + err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &qp->db); if (err) { mlx5_ib_dbg(dev, "map failed\n"); goto err_free; @@ -1119,6 +1214,7 @@ static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev, } static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, + struct ib_udata *udata, struct mlx5_ib_sq *sq, void *qpin, struct ib_pd *pd) { @@ -1135,9 +1231,9 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, int ncont = 0; u32 offset = 0; - err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size, - &sq->ubuffer.umem, &npages, &page_shift, - &ncont, &offset); + err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, + &sq->ubuffer.umem, &npages, &page_shift, &ncont, + &offset); if (err) return err; @@ -1362,9 +1458,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_sq *sq = &raw_packet_qp->sq; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; - struct ib_uobject *uobj = pd->uobject; - struct ib_ucontext *ucontext = uobj->context; - struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext); + struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); int err; u32 tdn = mucontext->tdn; u16 uid = to_mpd(pd)->uid; @@ -1374,7 +1469,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (err) return err; - err = create_raw_packet_qp_sq(dev, sq, in, pd); + err = create_raw_packet_qp_sq(dev, udata, sq, in, pd); if (err) goto err_destroy_tis; @@ -1478,9 +1573,8 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { - struct ib_uobject *uobj = pd->uobject; - struct ib_ucontext *ucontext = uobj->context; - struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext); + struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_ib_create_qp_resp resp = {}; int inlen; int err; @@ -1822,6 +1916,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_create_qp_resp resp = {}; + struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_ib_cq *send_cq; struct mlx5_ib_cq *recv_cq; unsigned long flags; @@ -1924,8 +2020,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_QP_FLAG_TYPE_DCT)) return -EINVAL; - err = get_qp_user_index(to_mucontext(pd->uobject->context), - &ucmd, udata->inlen, &uidx); + err = get_qp_user_index(ucontext, &ucmd, udata->inlen, &uidx); if (err) return err; @@ -2409,8 +2504,11 @@ static const char *ib_qp_type_str(enum ib_qp_type type) static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd, struct ib_qp_init_attr *attr, - struct mlx5_ib_create_qp *ucmd) + struct mlx5_ib_create_qp *ucmd, + struct ib_udata *udata) { + struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_ib_qp *qp; int err = 0; u32 uidx = MLX5_IB_DEFAULT_UIDX; @@ -2419,8 +2517,7 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd, if (!attr->srq || !attr->recv_cq) return ERR_PTR(-EINVAL); - err = get_qp_user_index(to_mucontext(pd->uobject->context), - ucmd, sizeof(*ucmd), &uidx); + err = get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), &uidx); if (err) return ERR_PTR(err); @@ -2502,15 +2599,17 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, int err; struct ib_qp_init_attr mlx_init_attr; struct ib_qp_init_attr *init_attr = verbs_init_attr; + struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); if (pd) { dev = to_mdev(pd->device); if (init_attr->qp_type == IB_QPT_RAW_PACKET) { - if (!udata) { + if (!ucontext) { mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n"); return ERR_PTR(-EINVAL); - } else if (!to_mucontext(pd->uobject->context)->cqe_version) { + } else if (!ucontext->cqe_version) { mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n"); return ERR_PTR(-EINVAL); } @@ -2542,7 +2641,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } } else { - return mlx5_ib_create_dct(pd, init_attr, &ucmd); + return mlx5_ib_create_dct(pd, init_attr, &ucmd, udata); } } @@ -2653,10 +2752,10 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp) static int to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr, - int attr_mask, __be32 *hw_access_flags) + int attr_mask, __be32 *hw_access_flags_be) { u8 dest_rd_atomic; - u32 access_flags; + u32 access_flags, hw_access_flags = 0; struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); @@ -2674,7 +2773,7 @@ static int to_mlx5_access_flags(struct mlx5_ib_qp *qp, access_flags &= IB_ACCESS_REMOTE_WRITE; if (access_flags & IB_ACCESS_REMOTE_READ) - *hw_access_flags |= MLX5_QP_BIT_RRE; + hw_access_flags |= MLX5_QP_BIT_RRE; if (access_flags & IB_ACCESS_REMOTE_ATOMIC) { int atomic_mode; @@ -2682,14 +2781,14 @@ static int to_mlx5_access_flags(struct mlx5_ib_qp *qp, if (atomic_mode < 0) return -EOPNOTSUPP; - *hw_access_flags |= MLX5_QP_BIT_RAE; - *hw_access_flags |= atomic_mode << MLX5_ATOMIC_MODE_OFFSET; + hw_access_flags |= MLX5_QP_BIT_RAE; + hw_access_flags |= atomic_mode << MLX5_ATOMIC_MODE_OFFSET; } if (access_flags & IB_ACCESS_REMOTE_WRITE) - *hw_access_flags |= MLX5_QP_BIT_RWE; + hw_access_flags |= MLX5_QP_BIT_RWE; - *hw_access_flags = cpu_to_be32(*hw_access_flags); + *hw_access_flags_be = cpu_to_be32(hw_access_flags); return 0; } @@ -3180,14 +3279,12 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp_base *qp_base, - u8 port_num) + u8 port_num, struct ib_udata *udata) { - struct mlx5_ib_ucontext *ucontext = NULL; + struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); unsigned int tx_port_affinity; - if (pd && pd->ibpd.uobject && pd->ibpd.uobject->context) - ucontext = to_mucontext(pd->ibpd.uobject->context); - if (ucontext) { tx_port_affinity = (unsigned int)atomic_add_return( 1, &ucontext->tx_port_affinity) % @@ -3210,8 +3307,10 @@ static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev, static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, - enum ib_qp_state cur_state, enum ib_qp_state new_state, - const struct mlx5_ib_modify_qp *ucmd) + enum ib_qp_state cur_state, + enum ib_qp_state new_state, + const struct mlx5_ib_modify_qp *ucmd, + struct ib_udata *udata) { static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = { [MLX5_QP_STATE_RST] = { @@ -3302,7 +3401,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, (ibqp->qp_type == IB_QPT_XRC_TGT)) { if (dev->lag_active) { u8 p = mlx5_core_native_port_num(dev->mdev); - tx_affinity = get_tx_affinity(dev, pd, base, p); + tx_affinity = get_tx_affinity(dev, pd, base, p, + udata); context->flags |= cpu_to_be32(tx_affinity << 24); } } @@ -3390,7 +3490,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { - __be32 access_flags = 0; + __be32 access_flags; err = to_mlx5_access_flags(qp, attr, attr_mask, &access_flags); if (err) @@ -3629,6 +3729,7 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { struct mlx5_ib_modify_qp_resp resp = {}; + u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0}; u32 min_resp_len = offsetof(typeof(resp), dctn) + sizeof(resp.dctn); @@ -3647,7 +3748,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit); err = mlx5_core_create_dct(dev->mdev, &qp->dct.mdct, qp->dct.in, - MLX5_ST_SZ_BYTES(create_dct_in)); + MLX5_ST_SZ_BYTES(create_dct_in), out, + sizeof(out)); if (err) return err; resp.dctn = qp->dct.mdct.mqp.qpn; @@ -3785,7 +3887,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, - new_state, &ucmd); + new_state, &ucmd, udata); out: mutex_unlock(&qp->mutex); @@ -5795,7 +5897,7 @@ static int prepare_user_rq(struct ib_pd *pd, return err; } - err = create_user_rq(dev, pd, rwq, &ucmd); + err = create_user_rq(dev, pd, udata, rwq, &ucmd); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); return err; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 4e8d18009f58..1ec1beb1296b 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -47,6 +47,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_create_srq ucmd = {}; + struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); size_t ucmdlen; int err; int npages; @@ -71,16 +73,14 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, return -EINVAL; if (in->type != IB_SRQT_BASIC) { - err = get_srq_user_index(to_mucontext(pd->uobject->context), - &ucmd, udata->inlen, &uidx); + err = get_srq_user_index(ucontext, &ucmd, udata->inlen, &uidx); if (err) return err; } srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); - srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size, - 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); if (IS_ERR(srq->umem)) { mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size); err = PTR_ERR(srq->umem); @@ -104,8 +104,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0); - err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), - ucmd.db_addr, &srq->db); + err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db); if (err) { mlx5_ib_dbg(dev, "map doorbell failed\n"); goto err_in; diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index 75eb5839ae95..c330af35ff10 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -46,8 +46,6 @@ struct mlx5_core_srq { int wqe_shift; void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e); - atomic_t refcount; - struct completion free; u16 uid; }; diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 7aaaffbd4afa..63ac38bb3498 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -87,7 +87,7 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) srq = radix_tree_lookup(&table->tree, srqn); if (srq) - atomic_inc(&srq->refcount); + atomic_inc(&srq->common.refcount); spin_unlock(&table->lock); @@ -594,8 +594,8 @@ int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, if (err) return err; - atomic_set(&srq->refcount, 1); - init_completion(&srq->free); + atomic_set(&srq->common.refcount, 1); + init_completion(&srq->common.free); spin_lock_irq(&table->lock); err = radix_tree_insert(&table->tree, srq->srqn, srq); @@ -627,9 +627,8 @@ int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) if (err) return err; - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); - wait_for_completion(&srq->free); + mlx5_core_res_put(&srq->common); + wait_for_completion(&srq->common.free); return 0; } @@ -685,7 +684,7 @@ static int srq_event_notifier(struct notifier_block *nb, srq = radix_tree_lookup(&table->tree, srqn); if (srq) - atomic_inc(&srq->refcount); + atomic_inc(&srq->common.refcount); spin_unlock(&table->lock); @@ -694,8 +693,7 @@ static int srq_event_notifier(struct notifier_block *nb, srq->event(srq, eqe->type); - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); + mlx5_core_res_put(&srq->common); return NOTIFY_OK; } |