summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx5
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c12
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c104
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h4
-rw-r--r--drivers/infiniband/hw/mlx5/main.c572
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h71
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c257
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c156
7 files changed, 1016 insertions, 160 deletions
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index fe269f680103..e6bde32a83f3 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -36,6 +36,9 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
struct mlx5_ib_ah *ah,
struct rdma_ah_attr *ah_attr)
{
+ enum ib_gid_type gid_type;
+ int err;
+
if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
@@ -50,6 +53,12 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);
if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ err = mlx5_get_roce_gid_type(dev, ah_attr->port_num,
+ ah_attr->grh.sgid_index,
+ &gid_type);
+ if (err)
+ return ERR_PTR(err);
+
memcpy(ah->av.rmac, ah_attr->roce.dmac,
sizeof(ah_attr->roce.dmac));
ah->av.udp_sport =
@@ -57,6 +66,9 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
rdma_ah_get_port_num(ah_attr),
rdma_ah_read_grh(ah_attr)->sgid_index);
ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1;
+ if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+#define MLX5_ECN_ENABLED BIT(1)
+ ah->av.tclass |= MLX5_ECN_ENABLED;
} else {
ah->av.rlid = cpu_to_be16(rdma_ah_get_dlid(ah_attr));
ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f;
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 6f6712f87a73..188512bf46e6 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -66,3 +66,107 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev,
return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out));
}
+
+int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
+ u64 length, u32 alignment)
+{
+ struct mlx5_core_dev *dev = memic->dev;
+ u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size)
+ >> PAGE_SHIFT;
+ u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
+ u32 max_alignment = MLX5_CAP_DEV_MEM(dev, log_max_memic_addr_alignment);
+ u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
+ u32 out[MLX5_ST_SZ_DW(alloc_memic_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_memic_in)] = {};
+ u32 mlx5_alignment;
+ u64 page_idx = 0;
+ int ret = 0;
+
+ if (!length || (length & MLX5_MEMIC_ALLOC_SIZE_MASK))
+ return -EINVAL;
+
+ /* mlx5 device sets alignment as 64*2^driver_value
+ * so normalizing is needed.
+ */
+ mlx5_alignment = (alignment < MLX5_MEMIC_BASE_ALIGN) ? 0 :
+ alignment - MLX5_MEMIC_BASE_ALIGN;
+ if (mlx5_alignment > max_alignment)
+ return -EINVAL;
+
+ MLX5_SET(alloc_memic_in, in, opcode, MLX5_CMD_OP_ALLOC_MEMIC);
+ MLX5_SET(alloc_memic_in, in, range_size, num_pages * PAGE_SIZE);
+ MLX5_SET(alloc_memic_in, in, memic_size, length);
+ MLX5_SET(alloc_memic_in, in, log_memic_addr_alignment,
+ mlx5_alignment);
+
+ while (page_idx < num_memic_hw_pages) {
+ spin_lock(&memic->memic_lock);
+ page_idx = bitmap_find_next_zero_area(memic->memic_alloc_pages,
+ num_memic_hw_pages,
+ page_idx,
+ num_pages, 0);
+
+ if (page_idx < num_memic_hw_pages)
+ bitmap_set(memic->memic_alloc_pages,
+ page_idx, num_pages);
+
+ spin_unlock(&memic->memic_lock);
+
+ if (page_idx >= num_memic_hw_pages)
+ break;
+
+ MLX5_SET64(alloc_memic_in, in, range_start_addr,
+ hw_start_addr + (page_idx * PAGE_SIZE));
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret) {
+ spin_lock(&memic->memic_lock);
+ bitmap_clear(memic->memic_alloc_pages,
+ page_idx, num_pages);
+ spin_unlock(&memic->memic_lock);
+
+ if (ret == -EAGAIN) {
+ page_idx++;
+ continue;
+ }
+
+ return ret;
+ }
+
+ *addr = pci_resource_start(dev->pdev, 0) +
+ MLX5_GET64(alloc_memic_out, out, memic_start_addr);
+
+ return 0;
+ }
+
+ return -ENOMEM;
+}
+
+int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length)
+{
+ struct mlx5_core_dev *dev = memic->dev;
+ u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
+ u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
+ u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {0};
+ u64 start_page_idx;
+ int err;
+
+ addr -= pci_resource_start(dev->pdev, 0);
+ start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT;
+
+ MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC);
+ MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr);
+ MLX5_SET(dealloc_memic_in, in, memic_size, length);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+
+ if (!err) {
+ spin_lock(&memic->memic_lock);
+ bitmap_clear(memic->memic_alloc_pages,
+ start_page_idx, num_pages);
+ spin_unlock(&memic->memic_lock);
+ }
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 78ffded7cc2c..e7206c8a8011 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -33,6 +33,7 @@
#ifndef MLX5_IB_CMD_H
#define MLX5_IB_CMD_H
+#include "mlx5_ib.h"
#include <linux/kernel.h>
#include <linux/mlx5/driver.h>
@@ -41,4 +42,7 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out, int out_size);
int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
void *in, int in_size);
+int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
+ u64 length, u32 alignment);
+int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 071fd9a7b919..daa919e5a442 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -38,6 +38,7 @@
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
+#include <linux/bitmap.h>
#if defined(CONFIG_X86)
#include <asm/pat.h>
#endif
@@ -51,6 +52,7 @@
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
+#include <linux/mlx5/fs_helpers.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
@@ -60,6 +62,13 @@
#include "ib_rep.h"
#include "cmd.h"
#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/accel.h>
+#include <rdma/uverbs_std_types.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
#define DRIVER_NAME "mlx5_ib"
#define DRIVER_VERSION "5.0-0"
@@ -92,6 +101,12 @@ static LIST_HEAD(mlx5_ib_dev_list);
*/
static DEFINE_MUTEX(mlx5_ib_multiport_mutex);
+/* We can't use an array for xlt_emergency_page because dma_map_single
+ * doesn't work on kernel modules memory
+ */
+static unsigned long xlt_emergency_page;
+static struct mutex xlt_emergency_page_mutex;
+
struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi)
{
struct mlx5_ib_dev *dev;
@@ -399,6 +414,9 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
if (err)
goto out;
+ props->active_width = IB_WIDTH_4X;
+ props->active_speed = IB_SPEED_QDR;
+
translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
&props->active_width);
@@ -493,18 +511,19 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
vlan_id, port_num);
}
-static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
- unsigned int index, const union ib_gid *gid,
+static int mlx5_ib_add_gid(const union ib_gid *gid,
const struct ib_gid_attr *attr,
__always_unused void **context)
{
- return set_roce_addr(to_mdev(device), port_num, index, gid, attr);
+ return set_roce_addr(to_mdev(attr->device), attr->port_num,
+ attr->index, gid, attr);
}
-static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
- unsigned int index, __always_unused void **context)
+static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
+ __always_unused void **context)
{
- return set_roce_addr(to_mdev(device), port_num, index, NULL, NULL);
+ return set_roce_addr(to_mdev(attr->device), attr->port_num,
+ attr->index, NULL, NULL);
}
__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
@@ -516,9 +535,6 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
return 0;
- if (!attr.ndev)
- return 0;
-
dev_put(attr.ndev);
if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
@@ -538,9 +554,6 @@ int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
if (ret)
return ret;
- if (!attr.ndev)
- return -ENODEV;
-
dev_put(attr.ndev);
*gid_type = attr.gid_type;
@@ -844,6 +857,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_RX_HASH_SRC_PORT_UDP |
MLX5_RX_HASH_DST_PORT_UDP |
MLX5_RX_HASH_INNER;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_DEVICE)
+ resp.rss_caps.rx_hash_fields_mask |=
+ MLX5_RX_HASH_IPSEC_SPI;
resp.response_length += sizeof(resp.rss_caps);
}
} else {
@@ -875,6 +892,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
}
+ if (MLX5_CAP_DEV_MEM(mdev, memic)) {
+ props->max_dm_size =
+ MLX5_CAP_DEV_MEM(mdev, max_memic_size);
+ }
+
if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
@@ -980,6 +1002,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_CAP_QOS(mdev, packet_pacing_min_rate);
resp.packet_pacing_caps.supported_qpts |=
1 << IB_QPT_RAW_PACKET;
+ if (MLX5_CAP_QOS(mdev, packet_pacing_burst_bound) &&
+ MLX5_CAP_QOS(mdev, packet_pacing_typical_size))
+ resp.packet_pacing_caps.cap_flags |=
+ MLX5_IB_PP_SUPPORT_BURST;
}
resp.response_length += sizeof(resp.packet_pacing_caps);
}
@@ -1665,6 +1691,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.response_length = min(offsetof(typeof(resp), response_length) +
sizeof(resp.response_length), udata->outlen);
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) {
+ if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS))
+ resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
+ resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
+ if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+ resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
+ resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
+ /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
+ }
+
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
@@ -1702,17 +1740,10 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
- context->upd_xlt_page = __get_free_page(GFP_KERNEL);
- if (!context->upd_xlt_page) {
- err = -ENOMEM;
- goto out_uars;
- }
- mutex_init(&context->upd_xlt_page_mutex);
-
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
if (err)
- goto out_page;
+ goto out_uars;
}
INIT_LIST_HEAD(&context->vma_private_list);
@@ -1789,9 +1820,6 @@ out_td:
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_ib_dealloc_transport_domain(dev, context->tdn);
-out_page:
- free_page(context->upd_xlt_page);
-
out_uars:
deallocate_uars(dev, context);
@@ -1817,7 +1845,6 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_ib_dealloc_transport_domain(dev, context->tdn);
- free_page(context->upd_xlt_page);
deallocate_uars(dev, context);
kfree(bfregi->sys_pages);
kfree(bfregi->count);
@@ -1993,6 +2020,8 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
return "best effort WC";
case MLX5_IB_MMAP_NC_PAGE:
return "NC";
+ case MLX5_IB_MMAP_DEVICE_MEM:
+ return "Device Memory";
default:
return NULL;
}
@@ -2151,6 +2180,34 @@ free_bfreg:
return err;
}
+static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ struct mlx5_ib_ucontext *mctx = to_mucontext(context);
+ struct mlx5_ib_dev *dev = to_mdev(context->device);
+ u16 page_idx = get_extended_index(vma->vm_pgoff);
+ size_t map_size = vma->vm_end - vma->vm_start;
+ u32 npages = map_size >> PAGE_SHIFT;
+ phys_addr_t pfn;
+ pgprot_t prot;
+
+ if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
+ page_idx + npages)
+ return -EINVAL;
+
+ pfn = ((pci_resource_start(dev->mdev->pdev, 0) +
+ MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
+ PAGE_SHIFT) +
+ page_idx;
+ prot = pgprot_writecombine(vma->vm_page_prot);
+ vma->vm_page_prot = prot;
+
+ if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size,
+ vma->vm_page_prot))
+ return -EAGAIN;
+
+ return mlx5_ib_set_vma_data(vma, mctx);
+}
+
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
@@ -2195,6 +2252,9 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
case MLX5_IB_MMAP_CLOCK_INFO:
return mlx5_ib_mmap_clock_info_page(dev, vma, context);
+ case MLX5_IB_MMAP_DEVICE_MEM:
+ return dm_mmap(ibcontext, vma);
+
default:
return -EINVAL;
}
@@ -2202,6 +2262,87 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
return 0;
}
+struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ u64 act_size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
+ struct mlx5_memic *memic = &to_mdev(ibdev)->memic;
+ phys_addr_t memic_addr;
+ struct mlx5_ib_dm *dm;
+ u64 start_offset;
+ u32 page_idx;
+ int err;
+
+ dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+ if (!dm)
+ return ERR_PTR(-ENOMEM);
+
+ mlx5_ib_dbg(to_mdev(ibdev), "alloc_memic req: user_length=0x%llx act_length=0x%llx log_alignment=%d\n",
+ attr->length, act_size, attr->alignment);
+
+ err = mlx5_cmd_alloc_memic(memic, &memic_addr,
+ act_size, attr->alignment);
+ if (err)
+ goto err_free;
+
+ start_offset = memic_addr & ~PAGE_MASK;
+ page_idx = (memic_addr - pci_resource_start(memic->dev->pdev, 0) -
+ MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
+ PAGE_SHIFT;
+
+ err = uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ &start_offset, sizeof(start_offset));
+ if (err)
+ goto err_dealloc;
+
+ err = uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ &page_idx, sizeof(page_idx));
+ if (err)
+ goto err_dealloc;
+
+ bitmap_set(to_mucontext(context)->dm_pages, page_idx,
+ DIV_ROUND_UP(act_size, PAGE_SIZE));
+
+ dm->dev_addr = memic_addr;
+
+ return &dm->ibdm;
+
+err_dealloc:
+ mlx5_cmd_dealloc_memic(memic, memic_addr,
+ act_size);
+err_free:
+ kfree(dm);
+ return ERR_PTR(err);
+}
+
+int mlx5_ib_dealloc_dm(struct ib_dm *ibdm)
+{
+ struct mlx5_memic *memic = &to_mdev(ibdm->device)->memic;
+ struct mlx5_ib_dm *dm = to_mdm(ibdm);
+ u64 act_size = roundup(dm->ibdm.length, MLX5_MEMIC_BASE_SIZE);
+ u32 page_idx;
+ int ret;
+
+ ret = mlx5_cmd_dealloc_memic(memic, dm->dev_addr, act_size);
+ if (ret)
+ return ret;
+
+ page_idx = (dm->dev_addr - pci_resource_start(memic->dev->pdev, 0) -
+ MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
+ PAGE_SHIFT;
+ bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages,
+ page_idx,
+ DIV_ROUND_UP(act_size, PAGE_SIZE));
+
+ kfree(dm);
+
+ return 0;
+}
+
static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
@@ -2317,8 +2458,28 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
offsetof(typeof(filter), field) -\
sizeof(filter.field))
+static int parse_flow_flow_action(const union ib_flow_spec *ib_spec,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_act *action)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act);
+
+ switch (maction->ib_action.type) {
+ case IB_FLOW_ACTION_ESP:
+ /* Currently only AES_GCM keymat is supported by the driver */
+ action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
+ action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ?
+ MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
u32 *match_v, const union ib_flow_spec *ib_spec,
+ const struct ib_flow_attr *flow_attr,
struct mlx5_flow_act *action)
{
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
@@ -2328,6 +2489,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
void *headers_c;
void *headers_v;
int match_ipv;
+ int ret;
if (ib_spec->type & IB_FLOW_SPEC_INNER) {
headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
@@ -2478,7 +2640,15 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
ntohl(ib_spec->ipv6.mask.flow_label),
ntohl(ib_spec->ipv6.val.flow_label),
ib_spec->type & IB_FLOW_SPEC_INNER);
+ break;
+ case IB_FLOW_SPEC_ESP:
+ if (ib_spec->esp.mask.seq)
+ return -EOPNOTSUPP;
+ MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
+ ntohl(ib_spec->esp.mask.spi));
+ MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
+ ntohl(ib_spec->esp.val.spi));
break;
case IB_FLOW_SPEC_TCP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
@@ -2546,6 +2716,11 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
return -EOPNOTSUPP;
action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
break;
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ ret = parse_flow_flow_action(ib_spec, flow_attr, action);
+ if (ret)
+ return ret;
+ break;
default:
return -EINVAL;
}
@@ -2587,6 +2762,46 @@ static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
return false;
}
+enum valid_spec {
+ VALID_SPEC_INVALID,
+ VALID_SPEC_VALID,
+ VALID_SPEC_NA,
+};
+
+static enum valid_spec
+is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
+ const struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_act *flow_act,
+ bool egress)
+{
+ const u32 *match_c = spec->match_criteria;
+ bool is_crypto =
+ (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT));
+ bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c);
+ bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ /*
+ * Currently only crypto is supported in egress, when regular egress
+ * rules would be supported, always return VALID_SPEC_NA.
+ */
+ if (!is_crypto)
+ return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA;
+
+ return is_crypto && is_ipsec &&
+ (!egress || (!is_drop && !flow_act->has_flow_tag)) ?
+ VALID_SPEC_VALID : VALID_SPEC_INVALID;
+}
+
+static bool is_valid_spec(struct mlx5_core_dev *mdev,
+ const struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_act *flow_act,
+ bool egress)
+{
+ /* We curretly only support ipsec egress flow */
+ return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID;
+}
+
static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
const struct ib_flow_attr *flow_attr,
bool check_inner)
@@ -2711,13 +2926,17 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
log_max_ft_size));
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- if (flow_is_multicast_only(flow_attr) &&
- !dont_trap)
+ if (ft_type == MLX5_IB_FT_TX)
+ priority = 0;
+ else if (flow_is_multicast_only(flow_attr) &&
+ !dont_trap)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
priority = ib_prio_to_core_prio(flow_attr->priority,
dont_trap);
ns = mlx5_get_flow_namespace(dev->mdev,
+ ft_type == MLX5_IB_FT_TX ?
+ MLX5_FLOW_NAMESPACE_EGRESS :
MLX5_FLOW_NAMESPACE_BYPASS);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
@@ -2804,6 +3023,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
unsigned int spec_index;
int err = 0;
int dest_num = 1;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
if (!is_valid_attr(dev->mdev, flow_attr))
return ERR_PTR(-EINVAL);
@@ -2820,7 +3040,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(dev->mdev, spec->match_criteria,
spec->match_value,
- ib_flow, &flow_act);
+ ib_flow, flow_attr, &flow_act);
if (err < 0)
goto free;
@@ -2843,12 +3063,23 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
+
+ if (is_egress &&
+ !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) {
+ err = -EINVAL;
+ goto free;
+ }
+
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
rule_dst = NULL;
dest_num = 0;
} else {
- flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
- MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ if (is_egress)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ else
+ flow_act.action |=
+ dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
}
if (flow_act.has_flow_tag &&
@@ -3022,6 +3253,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
struct mlx5_flow_destination *dst = NULL;
struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
struct mlx5_ib_flow_prio *ft_prio;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
int err;
int underlay_qpn;
@@ -3030,7 +3262,13 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
if (domain != IB_FLOW_DOMAIN_USER ||
flow_attr->port > dev->num_ports ||
- (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
+ (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
+ IB_FLOW_ATTR_FLAGS_EGRESS)))
+ return ERR_PTR(-EINVAL);
+
+ if (is_egress &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT))
return ERR_PTR(-EINVAL);
dst = kzalloc(sizeof(*dst), GFP_KERNEL);
@@ -3039,7 +3277,8 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
mutex_lock(&dev->flow_db->lock);
- ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
+ ft_prio = get_flow_table(dev, flow_attr,
+ is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto unlock;
@@ -3053,11 +3292,15 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
}
}
- dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- if (mqp->flags & MLX5_IB_QP_RSS)
- dst->tir_num = mqp->rss_qp.tirn;
- else
- dst->tir_num = mqp->raw_packet_qp.rq.tirn;
+ if (is_egress) {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ } else {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ if (mqp->flags & MLX5_IB_QP_RSS)
+ dst->tir_num = mqp->rss_qp.tirn;
+ else
+ dst->tir_num = mqp->raw_packet_qp.rq.tirn;
+ }
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
@@ -3102,6 +3345,170 @@ unlock:
return ERR_PTR(err);
}
+static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
+{
+ u32 flags = 0;
+
+ if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)
+ flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA;
+
+ return flags;
+}
+
+#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
+static struct ib_flow_action *
+mlx5_ib_create_flow_action_esp(struct ib_device *device,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dev *mdev = to_mdev(device);
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm;
+ struct mlx5_accel_esp_xfrm_attrs accel_attrs = {};
+ struct mlx5_ib_flow_action *action;
+ u64 action_flags;
+ u64 flags;
+ int err = 0;
+
+ if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&action_flags, attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS)))
+ return ERR_PTR(-EFAULT);
+
+ if (action_flags >= (MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
+
+ /* We current only support a subset of the standard features. Only a
+ * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn
+ * (with overlap). Full offload mode isn't supported.
+ */
+ if (!attr->keymat || attr->replay || attr->encap ||
+ attr->spi || attr->seq || attr->tfc_pad ||
+ attr->hard_limit_pkts ||
+ (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (attr->keymat->protocol !=
+ IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ aes_gcm = &attr->keymat->keymat.aes_gcm;
+
+ if (aes_gcm->icv_len != 16 ||
+ aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ action = kmalloc(sizeof(*action), GFP_KERNEL);
+ if (!action)
+ return ERR_PTR(-ENOMEM);
+
+ action->esp_aes_gcm.ib_flags = attr->flags;
+ memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key,
+ sizeof(accel_attrs.keymat.aes_gcm.aes_key));
+ accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8;
+ memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt,
+ sizeof(accel_attrs.keymat.aes_gcm.salt));
+ memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv,
+ sizeof(accel_attrs.keymat.aes_gcm.seq_iv));
+ accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8;
+ accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ;
+ accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
+
+ accel_attrs.esn = attr->esn;
+ if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)
+ accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT;
+
+ action->esp_aes_gcm.ctx =
+ mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags);
+ if (IS_ERR(action->esp_aes_gcm.ctx)) {
+ err = PTR_ERR(action->esp_aes_gcm.ctx);
+ goto err_parse;
+ }
+
+ action->esp_aes_gcm.ib_flags = attr->flags;
+
+ return &action->ib_action;
+
+err_parse:
+ kfree(action);
+ return ERR_PTR(err);
+}
+
+static int
+mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(action);
+ struct mlx5_accel_esp_xfrm_attrs accel_attrs;
+ int err = 0;
+
+ if (attr->keymat || attr->replay || attr->encap ||
+ attr->spi || attr->seq || attr->tfc_pad ||
+ attr->hard_limit_pkts ||
+ (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)))
+ return -EOPNOTSUPP;
+
+ /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can
+ * be modified.
+ */
+ if (!(maction->esp_aes_gcm.ib_flags &
+ IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) &&
+ attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))
+ return -EINVAL;
+
+ memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs,
+ sizeof(accel_attrs));
+
+ accel_attrs.esn = attr->esn;
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+ else
+ accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+
+ err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx,
+ &accel_attrs);
+ if (err)
+ return err;
+
+ maction->esp_aes_gcm.ib_flags &=
+ ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
+ maction->esp_aes_gcm.ib_flags |=
+ attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
+
+ return 0;
+}
+
+static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(action);
+
+ switch (action->type) {
+ case IB_FLOW_ACTION_ESP:
+ /*
+ * We only support aes_gcm by now, so we implicitly know this is
+ * the underline crypto.
+ */
+ mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
+ break;
+ default:
+ WARN_ON(true);
+ break;
+ }
+
+ kfree(maction);
+ return 0;
+}
+
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -4553,6 +4960,47 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
+ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM,
+ UVERBS_METHOD_DM_ALLOC,
+ &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ UVERBS_ATTR_TYPE(u16),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+ &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+#define NUM_TREES 2
+static int populate_specs_root(struct mlx5_ib_dev *dev)
+{
+ const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = {
+ uverbs_default_get_objects()};
+ size_t num_trees = 1;
+
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
+ !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
+ default_root[num_trees++] = &mlx5_ib_flow_action;
+
+ if (MLX5_CAP_DEV_MEM(dev->mdev, memic) &&
+ !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
+ default_root[num_trees++] = &mlx5_ib_dm;
+
+ dev->ib_dev.specs_root =
+ uverbs_alloc_spec_tree(num_trees, default_root);
+
+ return PTR_ERR_OR_ZERO(dev->ib_dev.specs_root);
+}
+
+static void depopulate_specs_root(struct mlx5_ib_dev *dev)
+{
+ uverbs_free_spec_tree(dev->ib_dev.specs_root);
+}
+
void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
@@ -4616,6 +5064,9 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
+ spin_lock_init(&dev->memic.memic_lock);
+ dev->memic.dev = mdev;
+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
err = init_srcu_struct(&dev->mr_srcu);
if (err)
@@ -4778,11 +5229,21 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
+ if (MLX5_CAP_DEV_MEM(mdev, memic)) {
+ dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm;
+ dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm;
+ dev->ib_dev.reg_dm_mr = mlx5_ib_reg_dm_mr;
+ }
+
dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+ dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp;
+ dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action;
+ dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp;
+ dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
err = init_node_data(dev);
if (err)
@@ -4997,11 +5458,21 @@ void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
}
+static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev)
+{
+ return populate_specs_root(dev);
+}
+
int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
return ib_register_device(&dev->ib_dev, NULL);
}
+static void mlx5_ib_stage_depopulate_specs(struct mlx5_ib_dev *dev)
+{
+ depopulate_specs_root(dev);
+}
+
void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
destroy_umrc_res(dev);
@@ -5136,6 +5607,9 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SPECS,
+ mlx5_ib_stage_populate_specs,
+ mlx5_ib_stage_depopulate_specs),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
@@ -5181,6 +5655,9 @@ static const struct mlx5_ib_profile nic_rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SPECS,
+ mlx5_ib_stage_populate_specs,
+ mlx5_ib_stage_depopulate_specs),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
@@ -5301,13 +5778,32 @@ static struct mlx5_interface mlx5_ib_interface = {
.protocol = MLX5_INTERFACE_PROTOCOL_IB,
};
+unsigned long mlx5_ib_get_xlt_emergency_page(void)
+{
+ mutex_lock(&xlt_emergency_page_mutex);
+ return xlt_emergency_page;
+}
+
+void mlx5_ib_put_xlt_emergency_page(void)
+{
+ mutex_unlock(&xlt_emergency_page_mutex);
+}
+
static int __init mlx5_ib_init(void)
{
int err;
+ xlt_emergency_page = __get_free_page(GFP_KERNEL);
+ if (!xlt_emergency_page)
+ return -ENOMEM;
+
+ mutex_init(&xlt_emergency_page_mutex);
+
mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0);
- if (!mlx5_ib_event_wq)
+ if (!mlx5_ib_event_wq) {
+ free_page(xlt_emergency_page);
return -ENOMEM;
+ }
mlx5_ib_odp_init();
@@ -5320,6 +5816,8 @@ static void __exit mlx5_ib_cleanup(void)
{
mlx5_unregister_interface(&mlx5_ib_interface);
destroy_workqueue(mlx5_ib_event_wq);
+ mutex_destroy(&xlt_emergency_page_mutex);
+ free_page(xlt_emergency_page);
}
module_init(mlx5_ib_init);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index c33bf1523d67..49a1aa0ff429 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -45,6 +45,7 @@
#include <linux/mlx5/transobj.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/mlx5-abi.h>
+#include <rdma/uverbs_ioctl.h>
#define mlx5_ib_dbg(dev, format, arg...) \
pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
@@ -108,6 +109,16 @@ enum {
MLX5_IB_INVALID_BFREG = BIT(31),
};
+enum {
+ MLX5_MAX_MEMIC_PAGES = 0x100,
+ MLX5_MEMIC_ALLOC_SIZE_MASK = 0x3f,
+};
+
+enum {
+ MLX5_MEMIC_BASE_ALIGN = 6,
+ MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN,
+};
+
struct mlx5_ib_vma_private_data {
struct list_head list;
struct vm_area_struct *vma;
@@ -130,10 +141,8 @@ struct mlx5_ib_ucontext {
/* protect vma_private_list add/del */
struct mutex vma_private_list_mutex;
- unsigned long upd_xlt_page;
- /* protect ODP/KSM */
- struct mutex upd_xlt_page_mutex;
u64 lib_caps;
+ DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -155,6 +164,7 @@ struct mlx5_ib_pd {
#define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1)
#define MLX5_IB_NUM_SNIFFER_FTS 2
+#define MLX5_IB_NUM_EGRESS_FTS 1
struct mlx5_ib_flow_prio {
struct mlx5_flow_table *flow_table;
unsigned int refcount;
@@ -170,6 +180,7 @@ struct mlx5_ib_flow_handler {
struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS];
+ struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS];
struct mlx5_flow_table *lag_demux_ft;
/* Protect flow steering bypass flow tables
* when add/del flow rules.
@@ -406,7 +417,7 @@ struct mlx5_ib_qp {
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
- u32 rate_limit;
+ struct mlx5_rate_limit rl;
u32 underlay_qpn;
bool tunnel_offload_en;
/* storage for qp sub type when core qp type is IB_QPT_DRIVER */
@@ -522,8 +533,19 @@ enum mlx5_ib_mtt_access_flags {
MLX5_IB_MTT_WRITE = (1 << 1),
};
+struct mlx5_ib_dm {
+ struct ib_dm ibdm;
+ phys_addr_t dev_addr;
+};
+
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
+#define MLX5_IB_DM_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\
+ IB_ACCESS_REMOTE_WRITE |\
+ IB_ACCESS_REMOTE_READ |\
+ IB_ACCESS_REMOTE_ATOMIC |\
+ IB_ZERO_BASED)
+
struct mlx5_ib_mr {
struct ib_mr ibmr;
void *descs;
@@ -743,6 +765,7 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_UAR,
MLX5_IB_STAGE_BFREG,
MLX5_IB_STAGE_PRE_IB_REG_UMR,
+ MLX5_IB_STAGE_SPECS,
MLX5_IB_STAGE_IB_REG,
MLX5_IB_STAGE_POST_IB_REG_UMR,
MLX5_IB_STAGE_DELAY_DROP,
@@ -774,6 +797,22 @@ struct mlx5_ib_multiport_info {
bool unaffiliate;
};
+struct mlx5_ib_flow_action {
+ struct ib_flow_action ib_action;
+ union {
+ struct {
+ u64 ib_flags;
+ struct mlx5_accel_esp_xfrm *ctx;
+ } esp_aes_gcm;
+ };
+};
+
+struct mlx5_memic {
+ struct mlx5_core_dev *dev;
+ spinlock_t memic_lock;
+ DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
@@ -820,6 +859,7 @@ struct mlx5_ib_dev {
u8 umr_fence;
struct list_head ib_dev_list;
u64 sys_image_guid;
+ struct mlx5_memic memic;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -887,6 +927,11 @@ static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
return container_of(msrq, struct mlx5_ib_srq, msrq);
}
+static inline struct mlx5_ib_dm *to_mdm(struct ib_dm *ibdm)
+{
+ return container_of(ibdm, struct mlx5_ib_dm, ibdm);
+}
+
static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
{
return container_of(ibmr, struct mlx5_ib_mr, ibmr);
@@ -897,6 +942,12 @@ static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw)
return container_of(ibmw, struct mlx5_ib_mw, ibmw);
}
+static inline struct mlx5_ib_flow_action *
+to_mflow_act(struct ib_flow_action *ibact)
+{
+ return container_of(ibact, struct mlx5_ib_flow_action, ib_action);
+}
+
int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
struct mlx5_db *db);
void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
@@ -1025,7 +1076,14 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
struct ib_udata *udata);
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev);
-
+struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+int mlx5_ib_dealloc_dm(struct ib_dm *ibdm);
+struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
+ struct ib_dm_mr_attr *attr,
+ struct uverbs_attr_bundle *attrs);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
@@ -1221,4 +1279,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev,
return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages;
}
+unsigned long mlx5_ib_get_xlt_emergency_page(void);
+void mlx5_ib_put_xlt_emergency_page(void);
+
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 654bc31bc428..1520a2f20f98 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -47,10 +47,25 @@ enum {
#define MLX5_UMR_ALIGN 2048
-static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
-static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
+static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
+static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int mr_cache_max_order(struct mlx5_ib_dev *dev);
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
+static bool umr_can_modify_entity_size(struct mlx5_ib_dev *dev)
+{
+ return !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled);
+}
+
+static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
+{
+ return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
+}
+
+static bool use_umr(struct mlx5_ib_dev *dev, int order)
+{
+ return order <= mr_cache_max_order(dev) &&
+ umr_can_modify_entity_size(dev);
+}
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
@@ -189,7 +204,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
- MLX5_SET(mkc, mkc, access_mode, ent->access_mode);
+ MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2,
+ (ent->access_mode >> 2) & 0x7);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
@@ -220,26 +237,32 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
+ struct mlx5_ib_mr *tmp_mr;
struct mlx5_ib_mr *mr;
- int err;
+ LIST_HEAD(del_list);
int i;
for (i = 0; i < num; i++) {
spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
spin_unlock_irq(&ent->lock);
- return;
+ break;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_del(&mr->list);
+ list_move(&mr->list, &del_list);
ent->cur--;
ent->size--;
spin_unlock_irq(&ent->lock);
- err = destroy_mkey(dev, mr);
- if (err)
- mlx5_ib_warn(dev, "failed destroy mkey\n");
- else
- kfree(mr);
+ mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
+ }
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ synchronize_srcu(&dev->mr_srcu);
+#endif
+
+ list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
+ list_del(&mr->list);
+ kfree(mr);
}
}
@@ -562,26 +585,32 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
+ struct mlx5_ib_mr *tmp_mr;
struct mlx5_ib_mr *mr;
- int err;
+ LIST_HEAD(del_list);
cancel_delayed_work(&ent->dwork);
while (1) {
spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
spin_unlock_irq(&ent->lock);
- return;
+ break;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_del(&mr->list);
+ list_move(&mr->list, &del_list);
ent->cur--;
ent->size--;
spin_unlock_irq(&ent->lock);
- err = destroy_mkey(dev, mr);
- if (err)
- mlx5_ib_warn(dev, "failed destroy mkey\n");
- else
- kfree(mr);
+ mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
+ }
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ synchronize_srcu(&dev->mr_srcu);
+#endif
+
+ list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
+ list_del(&mr->list);
+ kfree(mr);
}
}
@@ -780,7 +809,7 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
@@ -947,7 +976,10 @@ static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
{
struct mlx5_ib_dev *dev = mr->dev;
struct ib_umem *umem = mr->umem;
+
if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
+ if (!umr_can_use_indirect_mkey(dev))
+ return -EPERM;
mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
return npages;
}
@@ -977,7 +1009,6 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
{
struct mlx5_ib_dev *dev = mr->dev;
struct device *ddev = dev->ib_dev.dev.parent;
- struct mlx5_ib_ucontext *uctx = NULL;
int size;
void *xlt;
dma_addr_t dma;
@@ -993,6 +1024,11 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
size_t pages_to_map = 0;
size_t pages_iter = 0;
gfp_t gfp;
+ bool use_emergency_page = false;
+
+ if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
+ !umr_can_use_indirect_mkey(dev))
+ return -EPERM;
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
* so we need to align the offset and length accordingly
@@ -1019,12 +1055,11 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
}
if (!xlt) {
- uctx = to_mucontext(mr->ibmr.pd->uobject->context);
mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
+ xlt = (void *)mlx5_ib_get_xlt_emergency_page();
size = PAGE_SIZE;
- xlt = (void *)uctx->upd_xlt_page;
- mutex_lock(&uctx->upd_xlt_page_mutex);
memset(xlt, 0, size);
+ use_emergency_page = true;
}
pages_iter = size / desc_size;
dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
@@ -1088,8 +1123,8 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
free_xlt:
- if (uctx)
- mutex_unlock(&uctx->upd_xlt_page_mutex);
+ if (use_emergency_page)
+ mlx5_ib_put_xlt_emergency_page();
else
free_pages((unsigned long)xlt, get_order(size));
@@ -1141,7 +1176,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, free, !populate);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
@@ -1197,22 +1232,96 @@ static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
mr->access_flags = access_flags;
}
+static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr,
+ u64 length, int acc)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_mr *mr;
+ void *mkc;
+ u32 *in;
+ int err;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MEMIC & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2,
+ (MLX5_MKC_ACCESS_MODE_MEMIC >> 2) & 0x7);
+ MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
+ MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
+ MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
+ MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ MLX5_SET64(mkc, mkc, len, length);
+ MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET64(mkc, mkc, start_addr,
+ memic_addr - pci_resource_start(dev->mdev->pdev, 0));
+
+ err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
+ if (err)
+ goto err_in;
+
+ kfree(in);
+
+ mr->umem = NULL;
+ set_mr_fileds(dev, mr, 0, length, acc);
+
+ return &mr->ibmr;
+
+err_in:
+ kfree(in);
+
+err_free:
+ kfree(mr);
+
+ return ERR_PTR(err);
+}
+
+struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
+ struct ib_dm_mr_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dm *mdm = to_mdm(dm);
+ u64 memic_addr;
+
+ if (attr->access_flags & ~MLX5_IB_DM_ALLOWED_ACCESS)
+ return ERR_PTR(-EINVAL);
+
+ memic_addr = mdm->dev_addr + attr->offset;
+
+ return mlx5_ib_get_memic_mr(pd, memic_addr, attr->length,
+ attr->access_flags);
+}
+
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
+ bool populate_mtts = false;
struct ib_umem *umem;
int page_shift;
int npages;
int ncont;
int order;
int err;
- bool use_umr = true;
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(-EOPNOTSUPP);
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
@@ -1224,6 +1333,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(-EINVAL);
mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
+ if (IS_ERR(mr))
+ return ERR_CAST(mr);
return &mr->ibmr;
}
#endif
@@ -1234,26 +1345,29 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (err < 0)
return ERR_PTR(err);
- if (order <= mr_cache_max_order(dev)) {
+ if (use_umr(dev, order)) {
mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
page_shift, order, access_flags);
if (PTR_ERR(mr) == -EAGAIN) {
mlx5_ib_dbg(dev, "cache empty for order %d\n", order);
mr = NULL;
}
+ populate_mtts = false;
} else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
if (access_flags & IB_ACCESS_ON_DEMAND) {
err = -EINVAL;
pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n");
goto error;
}
- use_umr = false;
+ populate_mtts = true;
}
if (!mr) {
+ if (!umr_can_modify_entity_size(dev))
+ populate_mtts = true;
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
- page_shift, access_flags, !use_umr);
+ page_shift, access_flags, populate_mtts);
mutex_unlock(&dev->slow_path_mutex);
}
@@ -1271,7 +1385,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
update_odp_mr(mr);
#endif
- if (use_umr) {
+ if (!populate_mtts) {
int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
if (access_flags & IB_ACCESS_ON_DEMAND)
@@ -1286,7 +1400,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
}
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
mr->live = 1;
+#endif
return &mr->ibmr;
error:
ib_umem_release(umem);
@@ -1365,36 +1481,34 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
ib_umem_release(mr->umem);
err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
&npages, &page_shift, &ncont, &order);
- if (err < 0) {
- clean_mr(dev, mr);
- return err;
- }
+ if (err)
+ goto err;
}
if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
/*
* UMR can't be used - MKey needs to be replaced.
*/
- if (mr->allocated_from_cache) {
+ if (mr->allocated_from_cache)
err = unreg_umr(dev, mr);
- if (err)
- mlx5_ib_warn(dev, "Failed to unregister MR\n");
- } else {
+ else
err = destroy_mkey(dev, mr);
- if (err)
- mlx5_ib_warn(dev, "Failed to destroy MKey\n");
- }
if (err)
- return err;
+ goto err;
mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
page_shift, access_flags, true);
- if (IS_ERR(mr))
- return PTR_ERR(mr);
+ if (IS_ERR(mr)) {
+ err = PTR_ERR(mr);
+ mr = to_mmr(ib_mr);
+ goto err;
+ }
mr->allocated_from_cache = 0;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
mr->live = 1;
+#endif
} else {
/*
* Send a UMR WQE
@@ -1417,13 +1531,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
err = rereg_umr(pd, mr, access_flags, flags);
}
- if (err) {
- mlx5_ib_warn(dev, "Failed to rereg UMR\n");
- ib_umem_release(mr->umem);
- mr->umem = NULL;
- clean_mr(dev, mr);
- return err;
- }
+ if (err)
+ goto err;
}
set_mr_fileds(dev, mr, npages, len, access_flags);
@@ -1432,6 +1541,14 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
update_odp_mr(mr);
#endif
return 0;
+
+err:
+ if (mr->umem) {
+ ib_umem_release(mr->umem);
+ mr->umem = NULL;
+ }
+ clean_mr(dev, mr);
+ return err;
}
static int
@@ -1480,10 +1597,9 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
}
}
-static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
int allocated_from_cache = mr->allocated_from_cache;
- int err;
if (mr->sig) {
if (mlx5_core_destroy_psv(dev->mdev,
@@ -1500,21 +1616,11 @@ static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
mlx5_free_priv_descs(mr);
- if (!allocated_from_cache) {
- u32 key = mr->mmkey.key;
-
- err = destroy_mkey(dev, mr);
- if (err) {
- mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
- key, err);
- return err;
- }
- }
-
- return 0;
+ if (!allocated_from_cache)
+ destroy_mkey(dev, mr);
}
-static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
int npages = mr->npages;
struct ib_umem *umem = mr->umem;
@@ -1555,16 +1661,12 @@ static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
kfree(mr);
else
mlx5_mr_cache_free(dev, mr);
-
- return 0;
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
{
- struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
- struct mlx5_ib_mr *mr = to_mmr(ibmr);
-
- return dereg_mr(dev, mr);
+ dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr));
+ return 0;
}
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
@@ -1645,7 +1747,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
goto err_free_in;
}
- MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
+ MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7);
MLX5_SET(mkc, mkc, umr_en, 1);
mr->ibmr.device = pd->device;
@@ -1726,7 +1829,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, lr, 1);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
MLX5_SET(mkc, mkc, qpn, 0xffffff);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 0d0b0b8dad98..7ed4b70f6447 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -86,7 +86,9 @@ struct mlx5_modify_raw_qp_param {
u16 operation;
u32 set_mask; /* raw_qp_set_mask_map */
- u32 rate_limit;
+
+ struct mlx5_rate_limit rl;
+
u8 rq_q_ctr_id;
};
@@ -878,7 +880,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
goto err_free;
}
- err = ib_copy_to_udata(udata, resp, sizeof(*resp));
+ err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp)));
if (err) {
mlx5_ib_dbg(dev, "copy failed\n");
goto err_unmap;
@@ -1411,6 +1413,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
void *tirc;
void *hfso;
u32 selected_fields = 0;
+ u32 outer_l4;
size_t min_resp_len;
u32 tdn = mucontext->tdn;
struct mlx5_ib_create_qp_rss ucmd = {};
@@ -1466,7 +1469,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
return -EOPNOTSUPP;
}
- err = ib_copy_to_udata(udata, &resp, min_resp_len);
+ err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
if (err) {
mlx5_ib_dbg(dev, "copy failed\n");
return -EINVAL;
@@ -1541,10 +1544,14 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
- if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) &&
- ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) {
+ outer_l4 = ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) << 0 |
+ ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) << 1 |
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2;
+
+ /* Check that only one l4 protocol is set */
+ if (outer_l4 & (outer_l4 - 1)) {
err = -EINVAL;
goto err;
}
@@ -1575,6 +1582,9 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
(ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT;
+ if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI)
+ selected_fields |= MLX5_HASH_FIELD_SEL_IPSEC_SPI;
+
MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
create_tir:
@@ -2774,8 +2784,9 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
const struct mlx5_modify_raw_qp_param *raw_qp_param)
{
struct mlx5_ib_qp *ibqp = sq->base.container_mibqp;
- u32 old_rate = ibqp->rate_limit;
- u32 new_rate = old_rate;
+ struct mlx5_rate_limit old_rl = ibqp->rl;
+ struct mlx5_rate_limit new_rl = old_rl;
+ bool new_rate_added = false;
u16 rl_index = 0;
void *in;
void *sqc;
@@ -2797,39 +2808,43 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n",
__func__);
else
- new_rate = raw_qp_param->rate_limit;
+ new_rl = raw_qp_param->rl;
}
- if (old_rate != new_rate) {
- if (new_rate) {
- err = mlx5_rl_add_rate(dev, new_rate, &rl_index);
+ if (!mlx5_rl_are_equal(&old_rl, &new_rl)) {
+ if (new_rl.rate) {
+ err = mlx5_rl_add_rate(dev, &rl_index, &new_rl);
if (err) {
- pr_err("Failed configuring rate %u: %d\n",
- new_rate, err);
+ pr_err("Failed configuring rate limit(err %d): \
+ rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
+ err, new_rl.rate, new_rl.max_burst_sz,
+ new_rl.typical_pkt_sz);
+
goto out;
}
+ new_rate_added = true;
}
MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
+ /* index 0 means no limit */
MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index);
}
err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
if (err) {
/* Remove new rate from table if failed */
- if (new_rate &&
- old_rate != new_rate)
- mlx5_rl_remove_rate(dev, new_rate);
+ if (new_rate_added)
+ mlx5_rl_remove_rate(dev, &new_rl);
goto out;
}
/* Only remove the old rate after new rate was set */
- if ((old_rate &&
- (old_rate != new_rate)) ||
+ if ((old_rl.rate &&
+ !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
(new_state != MLX5_SQC_STATE_RDY))
- mlx5_rl_remove_rate(dev, old_rate);
+ mlx5_rl_remove_rate(dev, &old_rl);
- ibqp->rate_limit = new_rate;
+ ibqp->rl = new_rl;
sq->state = new_state;
out:
@@ -2906,7 +2921,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
- enum ib_qp_state cur_state, enum ib_qp_state new_state)
+ enum ib_qp_state cur_state, enum ib_qp_state new_state,
+ const struct mlx5_ib_modify_qp *ucmd)
{
static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
[MLX5_QP_STATE_RST] = {
@@ -2959,18 +2975,16 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
u16 op;
u8 tx_affinity = 0;
+ mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ?
+ qp->qp_sub_type : ibqp->qp_type);
+ if (mlx5_st < 0)
+ return -EINVAL;
+
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return -ENOMEM;
- err = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ?
- qp->qp_sub_type : ibqp->qp_type);
- if (err < 0) {
- mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type);
- goto out;
- }
-
- context->flags = cpu_to_be32(err << 16);
+ context->flags = cpu_to_be32(mlx5_st << 16);
if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
@@ -3124,10 +3138,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
mlx5_cur = to_mlx5_state(cur_state);
mlx5_new = to_mlx5_state(new_state);
- mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ?
- qp->qp_sub_type : ibqp->qp_type);
- if (mlx5_st < 0)
- goto out;
if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
!optab[mlx5_cur][mlx5_new]) {
@@ -3150,7 +3160,30 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_RATE_LIMIT) {
- raw_qp_param.rate_limit = attr->rate_limit;
+ raw_qp_param.rl.rate = attr->rate_limit;
+
+ if (ucmd->burst_info.max_burst_sz) {
+ if (attr->rate_limit &&
+ MLX5_CAP_QOS(dev->mdev, packet_pacing_burst_bound)) {
+ raw_qp_param.rl.max_burst_sz =
+ ucmd->burst_info.max_burst_sz;
+ } else {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (ucmd->burst_info.typical_pkt_sz) {
+ if (attr->rate_limit &&
+ MLX5_CAP_QOS(dev->mdev, packet_pacing_typical_size)) {
+ raw_qp_param.rl.typical_pkt_sz =
+ ucmd->burst_info.typical_pkt_sz;
+ } else {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT;
}
@@ -3178,7 +3211,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
- if (new_state == IB_QPS_RESET && !ibqp->uobject) {
+ if (new_state == IB_QPS_RESET &&
+ !ibqp->uobject && ibqp->qp_type != IB_QPT_XRC_TGT) {
mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
ibqp->srq ? to_msrq(ibqp->srq) : NULL);
if (send_cq != recv_cq)
@@ -3337,8 +3371,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_ib_modify_qp ucmd = {};
enum ib_qp_type qp_type;
enum ib_qp_state cur_state, new_state;
+ size_t required_cmd_sz;
int err = -EINVAL;
int port;
enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
@@ -3346,6 +3382,28 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (ibqp->rwq_ind_tbl)
return -ENOSYS;
+ if (udata && udata->inlen) {
+ required_cmd_sz = offsetof(typeof(ucmd), reserved) +
+ sizeof(ucmd.reserved);
+ if (udata->inlen < required_cmd_sz)
+ return -EINVAL;
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd)))
+ return -EOPNOTSUPP;
+
+ if (ib_copy_from_udata(&ucmd, udata,
+ min(udata->inlen, sizeof(ucmd))))
+ return -EFAULT;
+
+ if (ucmd.comp_mask ||
+ memchr_inv(&ucmd.reserved, 0, sizeof(ucmd.reserved)) ||
+ memchr_inv(&ucmd.burst_info.reserved, 0,
+ sizeof(ucmd.burst_info.reserved)))
+ return -EOPNOTSUPP;
+ }
+
if (unlikely(ibqp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
@@ -3426,7 +3484,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
- err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+ err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state,
+ new_state, &ucmd);
out:
mutex_unlock(&qp->mutex);
@@ -3646,8 +3705,19 @@ static __be64 get_umr_update_pd_mask(void)
return cpu_to_be64(result);
}
-static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct ib_send_wr *wr, int atomic)
+static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
+{
+ if ((mask & MLX5_MKEY_MASK_PAGE_SIZE &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) ||
+ (mask & MLX5_MKEY_MASK_A &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)))
+ return -EPERM;
+ return 0;
+}
+
+static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
+ struct mlx5_wqe_umr_ctrl_seg *umr,
+ struct ib_send_wr *wr, int atomic)
{
struct mlx5_umr_wr *umrwr = umr_wr(wr);
@@ -3679,6 +3749,8 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
if (!wr->num_sge)
umr->flags |= MLX5_UMR_INLINE;
+
+ return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask));
}
static u8 get_umr_flags(int acc)
@@ -4501,7 +4573,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey);
- set_reg_umr_segment(seg, wr, !!(MLX5_CAP_GEN(mdev, atomic)));
+ err = set_reg_umr_segment(dev, seg, wr, !!(MLX5_CAP_GEN(mdev, atomic)));
+ if (unlikely(err))
+ goto out;
seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
if (unlikely((seg == qend)))
OpenPOWER on IntegriCloud