diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/ah.c | 12 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.c | 104 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.h | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 572 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 71 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 257 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 156 |
7 files changed, 1016 insertions, 160 deletions
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index fe269f680103..e6bde32a83f3 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -36,6 +36,9 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, struct rdma_ah_attr *ah_attr) { + enum ib_gid_type gid_type; + int err; + if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); @@ -50,6 +53,12 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { + err = mlx5_get_roce_gid_type(dev, ah_attr->port_num, + ah_attr->grh.sgid_index, + &gid_type); + if (err) + return ERR_PTR(err); + memcpy(ah->av.rmac, ah_attr->roce.dmac, sizeof(ah_attr->roce.dmac)); ah->av.udp_sport = @@ -57,6 +66,9 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, rdma_ah_get_port_num(ah_attr), rdma_ah_read_grh(ah_attr)->sgid_index); ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1; + if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) +#define MLX5_ECN_ENABLED BIT(1) + ah->av.tclass |= MLX5_ECN_ENABLED; } else { ah->av.rlid = cpu_to_be16(rdma_ah_get_dlid(ah_attr)); ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f; diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 6f6712f87a73..188512bf46e6 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -66,3 +66,107 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out)); } + +int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, + u64 length, u32 alignment) +{ + struct mlx5_core_dev *dev = memic->dev; + u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size) + >> PAGE_SHIFT; + u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); + u32 max_alignment = MLX5_CAP_DEV_MEM(dev, log_max_memic_addr_alignment); + u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE); + u32 out[MLX5_ST_SZ_DW(alloc_memic_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_memic_in)] = {}; + u32 mlx5_alignment; + u64 page_idx = 0; + int ret = 0; + + if (!length || (length & MLX5_MEMIC_ALLOC_SIZE_MASK)) + return -EINVAL; + + /* mlx5 device sets alignment as 64*2^driver_value + * so normalizing is needed. + */ + mlx5_alignment = (alignment < MLX5_MEMIC_BASE_ALIGN) ? 0 : + alignment - MLX5_MEMIC_BASE_ALIGN; + if (mlx5_alignment > max_alignment) + return -EINVAL; + + MLX5_SET(alloc_memic_in, in, opcode, MLX5_CMD_OP_ALLOC_MEMIC); + MLX5_SET(alloc_memic_in, in, range_size, num_pages * PAGE_SIZE); + MLX5_SET(alloc_memic_in, in, memic_size, length); + MLX5_SET(alloc_memic_in, in, log_memic_addr_alignment, + mlx5_alignment); + + while (page_idx < num_memic_hw_pages) { + spin_lock(&memic->memic_lock); + page_idx = bitmap_find_next_zero_area(memic->memic_alloc_pages, + num_memic_hw_pages, + page_idx, + num_pages, 0); + + if (page_idx < num_memic_hw_pages) + bitmap_set(memic->memic_alloc_pages, + page_idx, num_pages); + + spin_unlock(&memic->memic_lock); + + if (page_idx >= num_memic_hw_pages) + break; + + MLX5_SET64(alloc_memic_in, in, range_start_addr, + hw_start_addr + (page_idx * PAGE_SIZE)); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) { + spin_lock(&memic->memic_lock); + bitmap_clear(memic->memic_alloc_pages, + page_idx, num_pages); + spin_unlock(&memic->memic_lock); + + if (ret == -EAGAIN) { + page_idx++; + continue; + } + + return ret; + } + + *addr = pci_resource_start(dev->pdev, 0) + + MLX5_GET64(alloc_memic_out, out, memic_start_addr); + + return 0; + } + + return -ENOMEM; +} + +int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length) +{ + struct mlx5_core_dev *dev = memic->dev; + u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); + u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE); + u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {0}; + u64 start_page_idx; + int err; + + addr -= pci_resource_start(dev->pdev, 0); + start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT; + + MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC); + MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr); + MLX5_SET(dealloc_memic_in, in, memic_size, length); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + + if (!err) { + spin_lock(&memic->memic_lock); + bitmap_clear(memic->memic_alloc_pages, + start_page_idx, num_pages); + spin_unlock(&memic->memic_lock); + } + + return err; +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 78ffded7cc2c..e7206c8a8011 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -33,6 +33,7 @@ #ifndef MLX5_IB_CMD_H #define MLX5_IB_CMD_H +#include "mlx5_ib.h" #include <linux/kernel.h> #include <linux/mlx5/driver.h> @@ -41,4 +42,7 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out, int out_size); int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, void *in, int in_size); +int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, + u64 length, u32 alignment); +int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 071fd9a7b919..daa919e5a442 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -38,6 +38,7 @@ #include <linux/pci.h> #include <linux/dma-mapping.h> #include <linux/slab.h> +#include <linux/bitmap.h> #if defined(CONFIG_X86) #include <asm/pat.h> #endif @@ -51,6 +52,7 @@ #include <linux/mlx5/port.h> #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> +#include <linux/mlx5/fs_helpers.h> #include <linux/list.h> #include <rdma/ib_smi.h> #include <rdma/ib_umem.h> @@ -60,6 +62,13 @@ #include "ib_rep.h" #include "cmd.h" #include <linux/mlx5/fs_helpers.h> +#include <linux/mlx5/accel.h> +#include <rdma/uverbs_std_types.h> +#include <rdma/mlx5_user_ioctl_verbs.h> +#include <rdma/mlx5_user_ioctl_cmds.h> + +#define UVERBS_MODULE_NAME mlx5_ib +#include <rdma/uverbs_named_ioctl.h> #define DRIVER_NAME "mlx5_ib" #define DRIVER_VERSION "5.0-0" @@ -92,6 +101,12 @@ static LIST_HEAD(mlx5_ib_dev_list); */ static DEFINE_MUTEX(mlx5_ib_multiport_mutex); +/* We can't use an array for xlt_emergency_page because dma_map_single + * doesn't work on kernel modules memory + */ +static unsigned long xlt_emergency_page; +static struct mutex xlt_emergency_page_mutex; + struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi) { struct mlx5_ib_dev *dev; @@ -399,6 +414,9 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, if (err) goto out; + props->active_width = IB_WIDTH_4X; + props->active_speed = IB_SPEED_QDR; + translate_eth_proto_oper(eth_prot_oper, &props->active_speed, &props->active_width); @@ -493,18 +511,19 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, vlan_id, port_num); } -static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num, - unsigned int index, const union ib_gid *gid, +static int mlx5_ib_add_gid(const union ib_gid *gid, const struct ib_gid_attr *attr, __always_unused void **context) { - return set_roce_addr(to_mdev(device), port_num, index, gid, attr); + return set_roce_addr(to_mdev(attr->device), attr->port_num, + attr->index, gid, attr); } -static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num, - unsigned int index, __always_unused void **context) +static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, + __always_unused void **context) { - return set_roce_addr(to_mdev(device), port_num, index, NULL, NULL); + return set_roce_addr(to_mdev(attr->device), attr->port_num, + attr->index, NULL, NULL); } __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, @@ -516,9 +535,6 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr)) return 0; - if (!attr.ndev) - return 0; - dev_put(attr.ndev); if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) @@ -538,9 +554,6 @@ int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, if (ret) return ret; - if (!attr.ndev) - return -ENODEV; - dev_put(attr.ndev); *gid_type = attr.gid_type; @@ -844,6 +857,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_RX_HASH_SRC_PORT_UDP | MLX5_RX_HASH_DST_PORT_UDP | MLX5_RX_HASH_INNER; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) + resp.rss_caps.rx_hash_fields_mask |= + MLX5_RX_HASH_IPSEC_SPI; resp.response_length += sizeof(resp.rss_caps); } } else { @@ -875,6 +892,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS; } + if (MLX5_CAP_DEV_MEM(mdev, memic)) { + props->max_dm_size = + MLX5_CAP_DEV_MEM(mdev, max_memic_size); + } + if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; @@ -980,6 +1002,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_CAP_QOS(mdev, packet_pacing_min_rate); resp.packet_pacing_caps.supported_qpts |= 1 << IB_QPT_RAW_PACKET; + if (MLX5_CAP_QOS(mdev, packet_pacing_burst_bound) && + MLX5_CAP_QOS(mdev, packet_pacing_typical_size)) + resp.packet_pacing_caps.cap_flags |= + MLX5_IB_PP_SUPPORT_BURST; } resp.response_length += sizeof(resp.packet_pacing_caps); } @@ -1665,6 +1691,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.response_length = min(offsetof(typeof(resp), response_length) + sizeof(resp.response_length), udata->outlen); + if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) { + if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS)) + resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) + resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; + if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) + resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) + resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; + /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ + } + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return ERR_PTR(-ENOMEM); @@ -1702,17 +1740,10 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; #endif - context->upd_xlt_page = __get_free_page(GFP_KERNEL); - if (!context->upd_xlt_page) { - err = -ENOMEM; - goto out_uars; - } - mutex_init(&context->upd_xlt_page_mutex); - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); if (err) - goto out_page; + goto out_uars; } INIT_LIST_HEAD(&context->vma_private_list); @@ -1789,9 +1820,6 @@ out_td: if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) mlx5_ib_dealloc_transport_domain(dev, context->tdn); -out_page: - free_page(context->upd_xlt_page); - out_uars: deallocate_uars(dev, context); @@ -1817,7 +1845,6 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) mlx5_ib_dealloc_transport_domain(dev, context->tdn); - free_page(context->upd_xlt_page); deallocate_uars(dev, context); kfree(bfregi->sys_pages); kfree(bfregi->count); @@ -1993,6 +2020,8 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) return "best effort WC"; case MLX5_IB_MMAP_NC_PAGE: return "NC"; + case MLX5_IB_MMAP_DEVICE_MEM: + return "Device Memory"; default: return NULL; } @@ -2151,6 +2180,34 @@ free_bfreg: return err; } +static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct mlx5_ib_ucontext *mctx = to_mucontext(context); + struct mlx5_ib_dev *dev = to_mdev(context->device); + u16 page_idx = get_extended_index(vma->vm_pgoff); + size_t map_size = vma->vm_end - vma->vm_start; + u32 npages = map_size >> PAGE_SHIFT; + phys_addr_t pfn; + pgprot_t prot; + + if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) != + page_idx + npages) + return -EINVAL; + + pfn = ((pci_resource_start(dev->mdev->pdev, 0) + + MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >> + PAGE_SHIFT) + + page_idx; + prot = pgprot_writecombine(vma->vm_page_prot); + vma->vm_page_prot = prot; + + if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size, + vma->vm_page_prot)) + return -EAGAIN; + + return mlx5_ib_set_vma_data(vma, mctx); +} + static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); @@ -2195,6 +2252,9 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context); + case MLX5_IB_MMAP_DEVICE_MEM: + return dm_mmap(ibcontext, vma); + default: return -EINVAL; } @@ -2202,6 +2262,87 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm return 0; } +struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs) +{ + u64 act_size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE); + struct mlx5_memic *memic = &to_mdev(ibdev)->memic; + phys_addr_t memic_addr; + struct mlx5_ib_dm *dm; + u64 start_offset; + u32 page_idx; + int err; + + dm = kzalloc(sizeof(*dm), GFP_KERNEL); + if (!dm) + return ERR_PTR(-ENOMEM); + + mlx5_ib_dbg(to_mdev(ibdev), "alloc_memic req: user_length=0x%llx act_length=0x%llx log_alignment=%d\n", + attr->length, act_size, attr->alignment); + + err = mlx5_cmd_alloc_memic(memic, &memic_addr, + act_size, attr->alignment); + if (err) + goto err_free; + + start_offset = memic_addr & ~PAGE_MASK; + page_idx = (memic_addr - pci_resource_start(memic->dev->pdev, 0) - + MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >> + PAGE_SHIFT; + + err = uverbs_copy_to(attrs, + MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, + &start_offset, sizeof(start_offset)); + if (err) + goto err_dealloc; + + err = uverbs_copy_to(attrs, + MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + &page_idx, sizeof(page_idx)); + if (err) + goto err_dealloc; + + bitmap_set(to_mucontext(context)->dm_pages, page_idx, + DIV_ROUND_UP(act_size, PAGE_SIZE)); + + dm->dev_addr = memic_addr; + + return &dm->ibdm; + +err_dealloc: + mlx5_cmd_dealloc_memic(memic, memic_addr, + act_size); +err_free: + kfree(dm); + return ERR_PTR(err); +} + +int mlx5_ib_dealloc_dm(struct ib_dm *ibdm) +{ + struct mlx5_memic *memic = &to_mdev(ibdm->device)->memic; + struct mlx5_ib_dm *dm = to_mdm(ibdm); + u64 act_size = roundup(dm->ibdm.length, MLX5_MEMIC_BASE_SIZE); + u32 page_idx; + int ret; + + ret = mlx5_cmd_dealloc_memic(memic, dm->dev_addr, act_size); + if (ret) + return ret; + + page_idx = (dm->dev_addr - pci_resource_start(memic->dev->pdev, 0) - + MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >> + PAGE_SHIFT; + bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages, + page_idx, + DIV_ROUND_UP(act_size, PAGE_SIZE)); + + kfree(dm); + + return 0; +} + static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) @@ -2317,8 +2458,28 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) offsetof(typeof(filter), field) -\ sizeof(filter.field)) +static int parse_flow_flow_action(const union ib_flow_spec *ib_spec, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_act *action) +{ + struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act); + + switch (maction->ib_action.type) { + case IB_FLOW_ACTION_ESP: + /* Currently only AES_GCM keymat is supported by the driver */ + action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; + action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ? + MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : + MLX5_FLOW_CONTEXT_ACTION_DECRYPT; + return 0; + default: + return -EOPNOTSUPP; + } +} + static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, u32 *match_v, const union ib_flow_spec *ib_spec, + const struct ib_flow_attr *flow_attr, struct mlx5_flow_act *action) { void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, @@ -2328,6 +2489,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, void *headers_c; void *headers_v; int match_ipv; + int ret; if (ib_spec->type & IB_FLOW_SPEC_INNER) { headers_c = MLX5_ADDR_OF(fte_match_param, match_c, @@ -2478,7 +2640,15 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, ntohl(ib_spec->ipv6.mask.flow_label), ntohl(ib_spec->ipv6.val.flow_label), ib_spec->type & IB_FLOW_SPEC_INNER); + break; + case IB_FLOW_SPEC_ESP: + if (ib_spec->esp.mask.seq) + return -EOPNOTSUPP; + MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, + ntohl(ib_spec->esp.mask.spi)); + MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, + ntohl(ib_spec->esp.val.spi)); break; case IB_FLOW_SPEC_TCP: if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, @@ -2546,6 +2716,11 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, return -EOPNOTSUPP; action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; break; + case IB_FLOW_SPEC_ACTION_HANDLE: + ret = parse_flow_flow_action(ib_spec, flow_attr, action); + if (ret) + return ret; + break; default: return -EINVAL; } @@ -2587,6 +2762,46 @@ static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) return false; } +enum valid_spec { + VALID_SPEC_INVALID, + VALID_SPEC_VALID, + VALID_SPEC_NA, +}; + +static enum valid_spec +is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, + const struct mlx5_flow_spec *spec, + const struct mlx5_flow_act *flow_act, + bool egress) +{ + const u32 *match_c = spec->match_criteria; + bool is_crypto = + (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); + bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); + bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; + + /* + * Currently only crypto is supported in egress, when regular egress + * rules would be supported, always return VALID_SPEC_NA. + */ + if (!is_crypto) + return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA; + + return is_crypto && is_ipsec && + (!egress || (!is_drop && !flow_act->has_flow_tag)) ? + VALID_SPEC_VALID : VALID_SPEC_INVALID; +} + +static bool is_valid_spec(struct mlx5_core_dev *mdev, + const struct mlx5_flow_spec *spec, + const struct mlx5_flow_act *flow_act, + bool egress) +{ + /* We curretly only support ipsec egress flow */ + return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; +} + static bool is_valid_ethertype(struct mlx5_core_dev *mdev, const struct ib_flow_attr *flow_attr, bool check_inner) @@ -2711,13 +2926,17 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - if (flow_is_multicast_only(flow_attr) && - !dont_trap) + if (ft_type == MLX5_IB_FT_TX) + priority = 0; + else if (flow_is_multicast_only(flow_attr) && + !dont_trap) priority = MLX5_IB_FLOW_MCAST_PRIO; else priority = ib_prio_to_core_prio(flow_attr->priority, dont_trap); ns = mlx5_get_flow_namespace(dev->mdev, + ft_type == MLX5_IB_FT_TX ? + MLX5_FLOW_NAMESPACE_EGRESS : MLX5_FLOW_NAMESPACE_BYPASS); num_entries = MLX5_FS_MAX_ENTRIES; num_groups = MLX5_FS_MAX_TYPES; @@ -2804,6 +3023,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, unsigned int spec_index; int err = 0; int dest_num = 1; + bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; if (!is_valid_attr(dev->mdev, flow_attr)) return ERR_PTR(-EINVAL); @@ -2820,7 +3040,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { err = parse_flow_attr(dev->mdev, spec->match_criteria, spec->match_value, - ib_flow, &flow_act); + ib_flow, flow_attr, &flow_act); if (err < 0) goto free; @@ -2843,12 +3063,23 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); + + if (is_egress && + !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { + err = -EINVAL; + goto free; + } + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { rule_dst = NULL; dest_num = 0; } else { - flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : - MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + if (is_egress) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; + else + flow_act.action |= + dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; } if (flow_act.has_flow_tag && @@ -3022,6 +3253,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct mlx5_flow_destination *dst = NULL; struct mlx5_ib_flow_prio *ft_prio_tx = NULL; struct mlx5_ib_flow_prio *ft_prio; + bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; int err; int underlay_qpn; @@ -3030,7 +3262,13 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, if (domain != IB_FLOW_DOMAIN_USER || flow_attr->port > dev->num_ports || - (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)) + (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | + IB_FLOW_ATTR_FLAGS_EGRESS))) + return ERR_PTR(-EINVAL); + + if (is_egress && + (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) return ERR_PTR(-EINVAL); dst = kzalloc(sizeof(*dst), GFP_KERNEL); @@ -3039,7 +3277,8 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, mutex_lock(&dev->flow_db->lock); - ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX); + ft_prio = get_flow_table(dev, flow_attr, + is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX); if (IS_ERR(ft_prio)) { err = PTR_ERR(ft_prio); goto unlock; @@ -3053,11 +3292,15 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, } } - dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - if (mqp->flags & MLX5_IB_QP_RSS) - dst->tir_num = mqp->rss_qp.tirn; - else - dst->tir_num = mqp->raw_packet_qp.rq.tirn; + if (is_egress) { + dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; + } else { + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; + if (mqp->flags & MLX5_IB_QP_RSS) + dst->tir_num = mqp->rss_qp.tirn; + else + dst->tir_num = mqp->raw_packet_qp.rq.tirn; + } if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { @@ -3102,6 +3345,170 @@ unlock: return ERR_PTR(err); } +static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) +{ + u32 flags = 0; + + if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) + flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; + + return flags; +} + +#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA +static struct ib_flow_action * +mlx5_ib_create_flow_action_esp(struct ib_device *device, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_dev *mdev = to_mdev(device); + struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; + struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; + struct mlx5_ib_flow_action *action; + u64 action_flags; + u64 flags; + int err = 0; + + if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&action_flags, attrs, + MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS))) + return ERR_PTR(-EFAULT); + + if (action_flags >= (MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1)) + return ERR_PTR(-EOPNOTSUPP); + + flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); + + /* We current only support a subset of the standard features. Only a + * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn + * (with overlap). Full offload mode isn't supported. + */ + if (!attr->keymat || attr->replay || attr->encap || + attr->spi || attr->seq || attr->tfc_pad || + attr->hard_limit_pkts || + (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) + return ERR_PTR(-EOPNOTSUPP); + + if (attr->keymat->protocol != + IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) + return ERR_PTR(-EOPNOTSUPP); + + aes_gcm = &attr->keymat->keymat.aes_gcm; + + if (aes_gcm->icv_len != 16 || + aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) + return ERR_PTR(-EOPNOTSUPP); + + action = kmalloc(sizeof(*action), GFP_KERNEL); + if (!action) + return ERR_PTR(-ENOMEM); + + action->esp_aes_gcm.ib_flags = attr->flags; + memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, + sizeof(accel_attrs.keymat.aes_gcm.aes_key)); + accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; + memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, + sizeof(accel_attrs.keymat.aes_gcm.salt)); + memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, + sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); + accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; + accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; + accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; + + accel_attrs.esn = attr->esn; + if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) + accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; + + action->esp_aes_gcm.ctx = + mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); + if (IS_ERR(action->esp_aes_gcm.ctx)) { + err = PTR_ERR(action->esp_aes_gcm.ctx); + goto err_parse; + } + + action->esp_aes_gcm.ib_flags = attr->flags; + + return &action->ib_action; + +err_parse: + kfree(action); + return ERR_PTR(err); +} + +static int +mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_flow_action *maction = to_mflow_act(action); + struct mlx5_accel_esp_xfrm_attrs accel_attrs; + int err = 0; + + if (attr->keymat || attr->replay || attr->encap || + attr->spi || attr->seq || attr->tfc_pad || + attr->hard_limit_pkts || + (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | + IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) + return -EOPNOTSUPP; + + /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can + * be modified. + */ + if (!(maction->esp_aes_gcm.ib_flags & + IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && + attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) + return -EINVAL; + + memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, + sizeof(accel_attrs)); + + accel_attrs.esn = attr->esn; + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + else + accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + + err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, + &accel_attrs); + if (err) + return err; + + maction->esp_aes_gcm.ib_flags &= + ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; + maction->esp_aes_gcm.ib_flags |= + attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; + + return 0; +} + +static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) +{ + struct mlx5_ib_flow_action *maction = to_mflow_act(action); + + switch (action->type) { + case IB_FLOW_ACTION_ESP: + /* + * We only support aes_gcm by now, so we implicitly know this is + * the underline crypto. + */ + mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); + break; + default: + WARN_ON(true); + break; + } + + kfree(maction); + return 0; +} + static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -4553,6 +4960,47 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } +ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM, + UVERBS_METHOD_DM_ALLOC, + &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + UVERBS_ATTR_TYPE(u16), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION, + UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, + &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, + UVERBS_ATTR_TYPE(u64), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +#define NUM_TREES 2 +static int populate_specs_root(struct mlx5_ib_dev *dev) +{ + const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = { + uverbs_default_get_objects()}; + size_t num_trees = 1; + + if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE && + !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) + default_root[num_trees++] = &mlx5_ib_flow_action; + + if (MLX5_CAP_DEV_MEM(dev->mdev, memic) && + !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) + default_root[num_trees++] = &mlx5_ib_dm; + + dev->ib_dev.specs_root = + uverbs_alloc_spec_tree(num_trees, default_root); + + return PTR_ERR_OR_ZERO(dev->ib_dev.specs_root); +} + +static void depopulate_specs_root(struct mlx5_ib_dev *dev) +{ + uverbs_free_spec_tree(dev->ib_dev.specs_root); +} + void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); @@ -4616,6 +5064,9 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); + spin_lock_init(&dev->memic.memic_lock); + dev->memic.dev = mdev; + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING err = init_srcu_struct(&dev->mr_srcu); if (err) @@ -4778,11 +5229,21 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } + if (MLX5_CAP_DEV_MEM(mdev, memic)) { + dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm; + dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm; + dev->ib_dev.reg_dm_mr = mlx5_ib_reg_dm_mr; + } + dev->ib_dev.create_flow = mlx5_ib_create_flow; dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp; + dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action; + dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp; + dev->ib_dev.driver_id = RDMA_DRIVER_MLX5; err = init_node_data(dev); if (err) @@ -4997,11 +5458,21 @@ void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) mlx5_free_bfreg(dev->mdev, &dev->bfreg); } +static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev) +{ + return populate_specs_root(dev); +} + int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { return ib_register_device(&dev->ib_dev, NULL); } +static void mlx5_ib_stage_depopulate_specs(struct mlx5_ib_dev *dev) +{ + depopulate_specs_root(dev); +} + void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { destroy_umrc_res(dev); @@ -5136,6 +5607,9 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SPECS, + mlx5_ib_stage_populate_specs, + mlx5_ib_stage_depopulate_specs), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -5181,6 +5655,9 @@ static const struct mlx5_ib_profile nic_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SPECS, + mlx5_ib_stage_populate_specs, + mlx5_ib_stage_depopulate_specs), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -5301,13 +5778,32 @@ static struct mlx5_interface mlx5_ib_interface = { .protocol = MLX5_INTERFACE_PROTOCOL_IB, }; +unsigned long mlx5_ib_get_xlt_emergency_page(void) +{ + mutex_lock(&xlt_emergency_page_mutex); + return xlt_emergency_page; +} + +void mlx5_ib_put_xlt_emergency_page(void) +{ + mutex_unlock(&xlt_emergency_page_mutex); +} + static int __init mlx5_ib_init(void) { int err; + xlt_emergency_page = __get_free_page(GFP_KERNEL); + if (!xlt_emergency_page) + return -ENOMEM; + + mutex_init(&xlt_emergency_page_mutex); + mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0); - if (!mlx5_ib_event_wq) + if (!mlx5_ib_event_wq) { + free_page(xlt_emergency_page); return -ENOMEM; + } mlx5_ib_odp_init(); @@ -5320,6 +5816,8 @@ static void __exit mlx5_ib_cleanup(void) { mlx5_unregister_interface(&mlx5_ib_interface); destroy_workqueue(mlx5_ib_event_wq); + mutex_destroy(&xlt_emergency_page_mutex); + free_page(xlt_emergency_page); } module_init(mlx5_ib_init); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index c33bf1523d67..49a1aa0ff429 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -45,6 +45,7 @@ #include <linux/mlx5/transobj.h> #include <rdma/ib_user_verbs.h> #include <rdma/mlx5-abi.h> +#include <rdma/uverbs_ioctl.h> #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ @@ -108,6 +109,16 @@ enum { MLX5_IB_INVALID_BFREG = BIT(31), }; +enum { + MLX5_MAX_MEMIC_PAGES = 0x100, + MLX5_MEMIC_ALLOC_SIZE_MASK = 0x3f, +}; + +enum { + MLX5_MEMIC_BASE_ALIGN = 6, + MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN, +}; + struct mlx5_ib_vma_private_data { struct list_head list; struct vm_area_struct *vma; @@ -130,10 +141,8 @@ struct mlx5_ib_ucontext { /* protect vma_private_list add/del */ struct mutex vma_private_list_mutex; - unsigned long upd_xlt_page; - /* protect ODP/KSM */ - struct mutex upd_xlt_page_mutex; u64 lib_caps; + DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES); }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -155,6 +164,7 @@ struct mlx5_ib_pd { #define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1) #define MLX5_IB_NUM_SNIFFER_FTS 2 +#define MLX5_IB_NUM_EGRESS_FTS 1 struct mlx5_ib_flow_prio { struct mlx5_flow_table *flow_table; unsigned int refcount; @@ -170,6 +180,7 @@ struct mlx5_ib_flow_handler { struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; + struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS]; struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. @@ -406,7 +417,7 @@ struct mlx5_ib_qp { struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; - u32 rate_limit; + struct mlx5_rate_limit rl; u32 underlay_qpn; bool tunnel_offload_en; /* storage for qp sub type when core qp type is IB_QPT_DRIVER */ @@ -522,8 +533,19 @@ enum mlx5_ib_mtt_access_flags { MLX5_IB_MTT_WRITE = (1 << 1), }; +struct mlx5_ib_dm { + struct ib_dm ibdm; + phys_addr_t dev_addr; +}; + #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) +#define MLX5_IB_DM_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\ + IB_ACCESS_REMOTE_WRITE |\ + IB_ACCESS_REMOTE_READ |\ + IB_ACCESS_REMOTE_ATOMIC |\ + IB_ZERO_BASED) + struct mlx5_ib_mr { struct ib_mr ibmr; void *descs; @@ -743,6 +765,7 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_UAR, MLX5_IB_STAGE_BFREG, MLX5_IB_STAGE_PRE_IB_REG_UMR, + MLX5_IB_STAGE_SPECS, MLX5_IB_STAGE_IB_REG, MLX5_IB_STAGE_POST_IB_REG_UMR, MLX5_IB_STAGE_DELAY_DROP, @@ -774,6 +797,22 @@ struct mlx5_ib_multiport_info { bool unaffiliate; }; +struct mlx5_ib_flow_action { + struct ib_flow_action ib_action; + union { + struct { + u64 ib_flags; + struct mlx5_accel_esp_xfrm *ctx; + } esp_aes_gcm; + }; +}; + +struct mlx5_memic { + struct mlx5_core_dev *dev; + spinlock_t memic_lock; + DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES); +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -820,6 +859,7 @@ struct mlx5_ib_dev { u8 umr_fence; struct list_head ib_dev_list; u64 sys_image_guid; + struct mlx5_memic memic; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -887,6 +927,11 @@ static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq) return container_of(msrq, struct mlx5_ib_srq, msrq); } +static inline struct mlx5_ib_dm *to_mdm(struct ib_dm *ibdm) +{ + return container_of(ibdm, struct mlx5_ib_dm, ibdm); +} + static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr) { return container_of(ibmr, struct mlx5_ib_mr, ibmr); @@ -897,6 +942,12 @@ static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw) return container_of(ibmw, struct mlx5_ib_mw, ibmw); } +static inline struct mlx5_ib_flow_action * +to_mflow_act(struct ib_flow_action *ibact) +{ + return container_of(ibact, struct mlx5_ib_flow_action, ib_action); +} + int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, struct mlx5_db *db); void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db); @@ -1025,7 +1076,14 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, struct ib_udata *udata); int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev); - +struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs); +int mlx5_ib_dealloc_dm(struct ib_dm *ibdm); +struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, + struct ib_dm_mr_attr *attr, + struct uverbs_attr_bundle *attrs); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev); @@ -1221,4 +1279,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev, return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages; } +unsigned long mlx5_ib_get_xlt_emergency_page(void); +void mlx5_ib_put_xlt_emergency_page(void); + #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 654bc31bc428..1520a2f20f98 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -47,10 +47,25 @@ enum { #define MLX5_UMR_ALIGN 2048 -static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); -static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); +static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); +static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); +static bool umr_can_modify_entity_size(struct mlx5_ib_dev *dev) +{ + return !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled); +} + +static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) +{ + return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); +} + +static bool use_umr(struct mlx5_ib_dev *dev, int order) +{ + return order <= mr_cache_max_order(dev) && + umr_can_modify_entity_size(dev); +} static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { @@ -189,7 +204,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET(mkc, mkc, access_mode, ent->access_mode); + MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, + (ent->access_mode >> 2) & 0x7); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); @@ -220,26 +237,32 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; + struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; - int err; + LIST_HEAD(del_list); int i; for (i = 0; i < num; i++) { spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { spin_unlock_irq(&ent->lock); - return; + break; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_del(&mr->list); + list_move(&mr->list, &del_list); ent->cur--; ent->size--; spin_unlock_irq(&ent->lock); - err = destroy_mkey(dev, mr); - if (err) - mlx5_ib_warn(dev, "failed destroy mkey\n"); - else - kfree(mr); + mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); + } + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + synchronize_srcu(&dev->mr_srcu); +#endif + + list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { + list_del(&mr->list); + kfree(mr); } } @@ -562,26 +585,32 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; + struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; - int err; + LIST_HEAD(del_list); cancel_delayed_work(&ent->dwork); while (1) { spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { spin_unlock_irq(&ent->lock); - return; + break; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_del(&mr->list); + list_move(&mr->list, &del_list); ent->cur--; ent->size--; spin_unlock_irq(&ent->lock); - err = destroy_mkey(dev, mr); - if (err) - mlx5_ib_warn(dev, "failed destroy mkey\n"); - else - kfree(mr); + mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); + } + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + synchronize_srcu(&dev->mr_srcu); +#endif + + list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { + list_del(&mr->list); + kfree(mr); } } @@ -780,7 +809,7 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); @@ -947,7 +976,10 @@ static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages, { struct mlx5_ib_dev *dev = mr->dev; struct ib_umem *umem = mr->umem; + if (flags & MLX5_IB_UPD_XLT_INDIRECT) { + if (!umr_can_use_indirect_mkey(dev)) + return -EPERM; mlx5_odp_populate_klm(xlt, idx, npages, mr, flags); return npages; } @@ -977,7 +1009,6 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, { struct mlx5_ib_dev *dev = mr->dev; struct device *ddev = dev->ib_dev.dev.parent; - struct mlx5_ib_ucontext *uctx = NULL; int size; void *xlt; dma_addr_t dma; @@ -993,6 +1024,11 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, size_t pages_to_map = 0; size_t pages_iter = 0; gfp_t gfp; + bool use_emergency_page = false; + + if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && + !umr_can_use_indirect_mkey(dev)) + return -EPERM; /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, * so we need to align the offset and length accordingly @@ -1019,12 +1055,11 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, } if (!xlt) { - uctx = to_mucontext(mr->ibmr.pd->uobject->context); mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); + xlt = (void *)mlx5_ib_get_xlt_emergency_page(); size = PAGE_SIZE; - xlt = (void *)uctx->upd_xlt_page; - mutex_lock(&uctx->upd_xlt_page_mutex); memset(xlt, 0, size); + use_emergency_page = true; } pages_iter = size / desc_size; dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE); @@ -1088,8 +1123,8 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); free_xlt: - if (uctx) - mutex_unlock(&uctx->upd_xlt_page_mutex); + if (use_emergency_page) + mlx5_ib_put_xlt_emergency_page(); else free_pages((unsigned long)xlt, get_order(size)); @@ -1141,7 +1176,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, free, !populate); - MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); @@ -1197,22 +1232,96 @@ static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, mr->access_flags = access_flags; } +static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, + u64 length, int acc) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_ib_mr *mr; + void *mkc; + u32 *in; + int err; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MEMIC & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, + (MLX5_MKC_ACCESS_MODE_MEMIC >> 2) & 0x7); + MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET64(mkc, mkc, len, length); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, + memic_addr - pci_resource_start(dev->mdev->pdev, 0)); + + err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); + if (err) + goto err_in; + + kfree(in); + + mr->umem = NULL; + set_mr_fileds(dev, mr, 0, length, acc); + + return &mr->ibmr; + +err_in: + kfree(in); + +err_free: + kfree(mr); + + return ERR_PTR(err); +} + +struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, + struct ib_dm_mr_attr *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_dm *mdm = to_mdm(dm); + u64 memic_addr; + + if (attr->access_flags & ~MLX5_IB_DM_ALLOWED_ACCESS) + return ERR_PTR(-EINVAL); + + memic_addr = mdm->dev_addr + attr->offset; + + return mlx5_ib_get_memic_mr(pd, memic_addr, attr->length, + attr->access_flags); +} + struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr = NULL; + bool populate_mtts = false; struct ib_umem *umem; int page_shift; int npages; int ncont; int order; int err; - bool use_umr = true; if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); @@ -1224,6 +1333,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(-EINVAL); mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags); + if (IS_ERR(mr)) + return ERR_CAST(mr); return &mr->ibmr; } #endif @@ -1234,26 +1345,29 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err < 0) return ERR_PTR(err); - if (order <= mr_cache_max_order(dev)) { + if (use_umr(dev, order)) { mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, page_shift, order, access_flags); if (PTR_ERR(mr) == -EAGAIN) { mlx5_ib_dbg(dev, "cache empty for order %d\n", order); mr = NULL; } + populate_mtts = false; } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { if (access_flags & IB_ACCESS_ON_DEMAND) { err = -EINVAL; pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n"); goto error; } - use_umr = false; + populate_mtts = true; } if (!mr) { + if (!umr_can_modify_entity_size(dev)) + populate_mtts = true; mutex_lock(&dev->slow_path_mutex); mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags, !use_umr); + page_shift, access_flags, populate_mtts); mutex_unlock(&dev->slow_path_mutex); } @@ -1271,7 +1385,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, update_odp_mr(mr); #endif - if (use_umr) { + if (!populate_mtts) { int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; if (access_flags & IB_ACCESS_ON_DEMAND) @@ -1286,7 +1400,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } } +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING mr->live = 1; +#endif return &mr->ibmr; error: ib_umem_release(umem); @@ -1365,36 +1481,34 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, ib_umem_release(mr->umem); err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, &npages, &page_shift, &ncont, &order); - if (err < 0) { - clean_mr(dev, mr); - return err; - } + if (err) + goto err; } if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) { /* * UMR can't be used - MKey needs to be replaced. */ - if (mr->allocated_from_cache) { + if (mr->allocated_from_cache) err = unreg_umr(dev, mr); - if (err) - mlx5_ib_warn(dev, "Failed to unregister MR\n"); - } else { + else err = destroy_mkey(dev, mr); - if (err) - mlx5_ib_warn(dev, "Failed to destroy MKey\n"); - } if (err) - return err; + goto err; mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, page_shift, access_flags, true); - if (IS_ERR(mr)) - return PTR_ERR(mr); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + mr = to_mmr(ib_mr); + goto err; + } mr->allocated_from_cache = 0; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING mr->live = 1; +#endif } else { /* * Send a UMR WQE @@ -1417,13 +1531,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, err = rereg_umr(pd, mr, access_flags, flags); } - if (err) { - mlx5_ib_warn(dev, "Failed to rereg UMR\n"); - ib_umem_release(mr->umem); - mr->umem = NULL; - clean_mr(dev, mr); - return err; - } + if (err) + goto err; } set_mr_fileds(dev, mr, npages, len, access_flags); @@ -1432,6 +1541,14 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, update_odp_mr(mr); #endif return 0; + +err: + if (mr->umem) { + ib_umem_release(mr->umem); + mr->umem = NULL; + } + clean_mr(dev, mr); + return err; } static int @@ -1480,10 +1597,9 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) } } -static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { int allocated_from_cache = mr->allocated_from_cache; - int err; if (mr->sig) { if (mlx5_core_destroy_psv(dev->mdev, @@ -1500,21 +1616,11 @@ static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mlx5_free_priv_descs(mr); - if (!allocated_from_cache) { - u32 key = mr->mmkey.key; - - err = destroy_mkey(dev, mr); - if (err) { - mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", - key, err); - return err; - } - } - - return 0; + if (!allocated_from_cache) + destroy_mkey(dev, mr); } -static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { int npages = mr->npages; struct ib_umem *umem = mr->umem; @@ -1555,16 +1661,12 @@ static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) kfree(mr); else mlx5_mr_cache_free(dev, mr); - - return 0; } int mlx5_ib_dereg_mr(struct ib_mr *ibmr) { - struct mlx5_ib_dev *dev = to_mdev(ibmr->device); - struct mlx5_ib_mr *mr = to_mmr(ibmr); - - return dereg_mr(dev, mr); + dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr)); + return 0; } struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, @@ -1645,7 +1747,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, goto err_free_in; } - MLX5_SET(mkc, mkc, access_mode, mr->access_mode); + MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7); MLX5_SET(mkc, mkc, umr_en, 1); mr->ibmr.device = pd->device; @@ -1726,7 +1829,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); MLX5_SET(mkc, mkc, qpn, 0xffffff); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0d0b0b8dad98..7ed4b70f6447 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -86,7 +86,9 @@ struct mlx5_modify_raw_qp_param { u16 operation; u32 set_mask; /* raw_qp_set_mask_map */ - u32 rate_limit; + + struct mlx5_rate_limit rl; + u8 rq_q_ctr_id; }; @@ -878,7 +880,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_free; } - err = ib_copy_to_udata(udata, resp, sizeof(*resp)); + err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp))); if (err) { mlx5_ib_dbg(dev, "copy failed\n"); goto err_unmap; @@ -1411,6 +1413,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, void *tirc; void *hfso; u32 selected_fields = 0; + u32 outer_l4; size_t min_resp_len; u32 tdn = mucontext->tdn; struct mlx5_ib_create_qp_rss ucmd = {}; @@ -1466,7 +1469,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EOPNOTSUPP; } - err = ib_copy_to_udata(udata, &resp, min_resp_len); + err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (err) { mlx5_ib_dbg(dev, "copy failed\n"); return -EINVAL; @@ -1541,10 +1544,14 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); - if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) && - ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) { + outer_l4 = ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) << 0 | + ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) << 1 | + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2; + + /* Check that only one l4 protocol is set */ + if (outer_l4 & (outer_l4 - 1)) { err = -EINVAL; goto err; } @@ -1575,6 +1582,9 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT; + if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) + selected_fields |= MLX5_HASH_FIELD_SEL_IPSEC_SPI; + MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); create_tir: @@ -2774,8 +2784,9 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, const struct mlx5_modify_raw_qp_param *raw_qp_param) { struct mlx5_ib_qp *ibqp = sq->base.container_mibqp; - u32 old_rate = ibqp->rate_limit; - u32 new_rate = old_rate; + struct mlx5_rate_limit old_rl = ibqp->rl; + struct mlx5_rate_limit new_rl = old_rl; + bool new_rate_added = false; u16 rl_index = 0; void *in; void *sqc; @@ -2797,39 +2808,43 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n", __func__); else - new_rate = raw_qp_param->rate_limit; + new_rl = raw_qp_param->rl; } - if (old_rate != new_rate) { - if (new_rate) { - err = mlx5_rl_add_rate(dev, new_rate, &rl_index); + if (!mlx5_rl_are_equal(&old_rl, &new_rl)) { + if (new_rl.rate) { + err = mlx5_rl_add_rate(dev, &rl_index, &new_rl); if (err) { - pr_err("Failed configuring rate %u: %d\n", - new_rate, err); + pr_err("Failed configuring rate limit(err %d): \ + rate %u, max_burst_sz %u, typical_pkt_sz %u\n", + err, new_rl.rate, new_rl.max_burst_sz, + new_rl.typical_pkt_sz); + goto out; } + new_rate_added = true; } MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); + /* index 0 means no limit */ MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); } err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen); if (err) { /* Remove new rate from table if failed */ - if (new_rate && - old_rate != new_rate) - mlx5_rl_remove_rate(dev, new_rate); + if (new_rate_added) + mlx5_rl_remove_rate(dev, &new_rl); goto out; } /* Only remove the old rate after new rate was set */ - if ((old_rate && - (old_rate != new_rate)) || + if ((old_rl.rate && + !mlx5_rl_are_equal(&old_rl, &new_rl)) || (new_state != MLX5_SQC_STATE_RDY)) - mlx5_rl_remove_rate(dev, old_rate); + mlx5_rl_remove_rate(dev, &old_rl); - ibqp->rate_limit = new_rate; + ibqp->rl = new_rl; sq->state = new_state; out: @@ -2906,7 +2921,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, - enum ib_qp_state cur_state, enum ib_qp_state new_state) + enum ib_qp_state cur_state, enum ib_qp_state new_state, + const struct mlx5_ib_modify_qp *ucmd) { static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = { [MLX5_QP_STATE_RST] = { @@ -2959,18 +2975,16 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, u16 op; u8 tx_affinity = 0; + mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ? + qp->qp_sub_type : ibqp->qp_type); + if (mlx5_st < 0) + return -EINVAL; + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM; - err = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ? - qp->qp_sub_type : ibqp->qp_type); - if (err < 0) { - mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type); - goto out; - } - - context->flags = cpu_to_be32(err << 16); + context->flags = cpu_to_be32(mlx5_st << 16); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) { context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); @@ -3124,10 +3138,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, mlx5_cur = to_mlx5_state(cur_state); mlx5_new = to_mlx5_state(new_state); - mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ? - qp->qp_sub_type : ibqp->qp_type); - if (mlx5_st < 0) - goto out; if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE || !optab[mlx5_cur][mlx5_new]) { @@ -3150,7 +3160,30 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_RATE_LIMIT) { - raw_qp_param.rate_limit = attr->rate_limit; + raw_qp_param.rl.rate = attr->rate_limit; + + if (ucmd->burst_info.max_burst_sz) { + if (attr->rate_limit && + MLX5_CAP_QOS(dev->mdev, packet_pacing_burst_bound)) { + raw_qp_param.rl.max_burst_sz = + ucmd->burst_info.max_burst_sz; + } else { + err = -EINVAL; + goto out; + } + } + + if (ucmd->burst_info.typical_pkt_sz) { + if (attr->rate_limit && + MLX5_CAP_QOS(dev->mdev, packet_pacing_typical_size)) { + raw_qp_param.rl.typical_pkt_sz = + ucmd->burst_info.typical_pkt_sz; + } else { + err = -EINVAL; + goto out; + } + } + raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT; } @@ -3178,7 +3211,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, * If we moved a kernel QP to RESET, clean up all old CQ * entries and reinitialize the QP. */ - if (new_state == IB_QPS_RESET && !ibqp->uobject) { + if (new_state == IB_QPS_RESET && + !ibqp->uobject && ibqp->qp_type != IB_QPT_XRC_TGT) { mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, ibqp->srq ? to_msrq(ibqp->srq) : NULL); if (send_cq != recv_cq) @@ -3337,8 +3371,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_modify_qp ucmd = {}; enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; + size_t required_cmd_sz; int err = -EINVAL; int port; enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; @@ -3346,6 +3382,28 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (ibqp->rwq_ind_tbl) return -ENOSYS; + if (udata && udata->inlen) { + required_cmd_sz = offsetof(typeof(ucmd), reserved) + + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) + return -EINVAL; + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) + return -EOPNOTSUPP; + + if (ib_copy_from_udata(&ucmd, udata, + min(udata->inlen, sizeof(ucmd)))) + return -EFAULT; + + if (ucmd.comp_mask || + memchr_inv(&ucmd.reserved, 0, sizeof(ucmd.reserved)) || + memchr_inv(&ucmd.burst_info.reserved, 0, + sizeof(ucmd.burst_info.reserved))) + return -EOPNOTSUPP; + } + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); @@ -3426,7 +3484,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, + new_state, &ucmd); out: mutex_unlock(&qp->mutex); @@ -3646,8 +3705,19 @@ static __be64 get_umr_update_pd_mask(void) return cpu_to_be64(result); } -static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr, int atomic) +static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) +{ + if ((mask & MLX5_MKEY_MASK_PAGE_SIZE && + MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) || + (mask & MLX5_MKEY_MASK_A && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))) + return -EPERM; + return 0; +} + +static int set_reg_umr_segment(struct mlx5_ib_dev *dev, + struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr, int atomic) { struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -3679,6 +3749,8 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, if (!wr->num_sge) umr->flags |= MLX5_UMR_INLINE; + + return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask)); } static u8 get_umr_flags(int acc) @@ -4501,7 +4573,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey); - set_reg_umr_segment(seg, wr, !!(MLX5_CAP_GEN(mdev, atomic))); + err = set_reg_umr_segment(dev, seg, wr, !!(MLX5_CAP_GEN(mdev, atomic))); + if (unlikely(err)) + goto out; seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((seg == qend))) |