diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
| -rw-r--r-- | drivers/infiniband/hw/mlx5/Makefile | 1 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/ah.c | 12 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.c | 104 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.h | 4 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 72 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.c | 192 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.h | 72 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 976 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 109 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 265 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 197 | 
11 files changed, 1695 insertions, 309 deletions
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index bc6299697dda..d42b922bede8 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o  mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o  mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o +mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index fe269f680103..e6bde32a83f3 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -36,6 +36,9 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,  				  struct mlx5_ib_ah *ah,  				  struct rdma_ah_attr *ah_attr)  { +	enum ib_gid_type gid_type; +	int err; +  	if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {  		const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); @@ -50,6 +53,12 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,  	ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);  	if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { +		err = mlx5_get_roce_gid_type(dev, ah_attr->port_num, +					     ah_attr->grh.sgid_index, +					     &gid_type); +		if (err) +			return ERR_PTR(err); +  		memcpy(ah->av.rmac, ah_attr->roce.dmac,  		       sizeof(ah_attr->roce.dmac));  		ah->av.udp_sport = @@ -57,6 +66,9 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,  					rdma_ah_get_port_num(ah_attr),  					rdma_ah_read_grh(ah_attr)->sgid_index);  		ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1; +		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) +#define MLX5_ECN_ENABLED BIT(1) +			ah->av.tclass |= MLX5_ECN_ENABLED;  	} else {  		ah->av.rlid = cpu_to_be16(rdma_ah_get_dlid(ah_attr));  		ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f; diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 6f6712f87a73..188512bf46e6 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -66,3 +66,107 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev,  	return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out));  } + +int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, +			  u64 length, u32 alignment) +{ +	struct mlx5_core_dev *dev = memic->dev; +	u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size) +					>> PAGE_SHIFT; +	u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); +	u32 max_alignment = MLX5_CAP_DEV_MEM(dev, log_max_memic_addr_alignment); +	u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE); +	u32 out[MLX5_ST_SZ_DW(alloc_memic_out)] = {}; +	u32 in[MLX5_ST_SZ_DW(alloc_memic_in)] = {}; +	u32 mlx5_alignment; +	u64 page_idx = 0; +	int ret = 0; + +	if (!length || (length & MLX5_MEMIC_ALLOC_SIZE_MASK)) +		return -EINVAL; + +	/* mlx5 device sets alignment as 64*2^driver_value +	 * so normalizing is needed. +	 */ +	mlx5_alignment = (alignment < MLX5_MEMIC_BASE_ALIGN) ? 0 : +			 alignment - MLX5_MEMIC_BASE_ALIGN; +	if (mlx5_alignment > max_alignment) +		return -EINVAL; + +	MLX5_SET(alloc_memic_in, in, opcode, MLX5_CMD_OP_ALLOC_MEMIC); +	MLX5_SET(alloc_memic_in, in, range_size, num_pages * PAGE_SIZE); +	MLX5_SET(alloc_memic_in, in, memic_size, length); +	MLX5_SET(alloc_memic_in, in, log_memic_addr_alignment, +		 mlx5_alignment); + +	while (page_idx < num_memic_hw_pages) { +		spin_lock(&memic->memic_lock); +		page_idx = bitmap_find_next_zero_area(memic->memic_alloc_pages, +						      num_memic_hw_pages, +						      page_idx, +						      num_pages, 0); + +		if (page_idx < num_memic_hw_pages) +			bitmap_set(memic->memic_alloc_pages, +				   page_idx, num_pages); + +		spin_unlock(&memic->memic_lock); + +		if (page_idx >= num_memic_hw_pages) +			break; + +		MLX5_SET64(alloc_memic_in, in, range_start_addr, +			   hw_start_addr + (page_idx * PAGE_SIZE)); + +		ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +		if (ret) { +			spin_lock(&memic->memic_lock); +			bitmap_clear(memic->memic_alloc_pages, +				     page_idx, num_pages); +			spin_unlock(&memic->memic_lock); + +			if (ret == -EAGAIN) { +				page_idx++; +				continue; +			} + +			return ret; +		} + +		*addr = pci_resource_start(dev->pdev, 0) + +			MLX5_GET64(alloc_memic_out, out, memic_start_addr); + +		return 0; +	} + +	return -ENOMEM; +} + +int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length) +{ +	struct mlx5_core_dev *dev = memic->dev; +	u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); +	u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE); +	u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0}; +	u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {0}; +	u64 start_page_idx; +	int err; + +	addr -= pci_resource_start(dev->pdev, 0); +	start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT; + +	MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC); +	MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr); +	MLX5_SET(dealloc_memic_in, in, memic_size, length); + +	err =  mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + +	if (!err) { +		spin_lock(&memic->memic_lock); +		bitmap_clear(memic->memic_alloc_pages, +			     start_page_idx, num_pages); +		spin_unlock(&memic->memic_lock); +	} + +	return err; +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 78ffded7cc2c..e7206c8a8011 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -33,6 +33,7 @@  #ifndef MLX5_IB_CMD_H  #define MLX5_IB_CMD_H +#include "mlx5_ib.h"  #include <linux/kernel.h>  #include <linux/mlx5/driver.h> @@ -41,4 +42,7 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,  			       void *out, int out_size);  int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,  				void *in, int in_size); +int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, +			 u64 length, u32 alignment); +int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length);  #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 15457c9569a7..77d257ec899b 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -64,14 +64,9 @@ static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)  	}  } -static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size) -{ -	return mlx5_buf_offset(&buf->buf, n * size); -} -  static void *get_cqe(struct mlx5_ib_cq *cq, int n)  { -	return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz); +	return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);  }  static u8 sw_ownership_bit(int n, int nent) @@ -272,14 +267,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,  static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)  { -	__be32 *p = (__be32 *)cqe; -	int i; -  	mlx5_ib_warn(dev, "dump error cqe\n"); -	for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4) -		pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]), -			be32_to_cpu(p[1]), be32_to_cpu(p[2]), -			be32_to_cpu(p[3])); +	mlx5_dump_err_cqe(dev->mdev, cqe);  }  static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, @@ -404,7 +393,7 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,  static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)  { -	mlx5_buf_free(dev->mdev, &buf->buf); +	mlx5_frag_buf_free(dev->mdev, &buf->fbc.frag_buf);  }  static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, @@ -725,12 +714,25 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)  	return ret;  } -static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, -			int nent, int cqe_size) +static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev, +			     struct mlx5_ib_cq_buf *buf, +			     int nent, +			     int cqe_size)  { +	struct mlx5_frag_buf_ctrl *c = &buf->fbc; +	struct mlx5_frag_buf *frag_buf = &c->frag_buf; +	u32 cqc_buff[MLX5_ST_SZ_DW(cqc)] = {0};  	int err; -	err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, &buf->buf); +	MLX5_SET(cqc, cqc_buff, log_cq_size, ilog2(cqe_size)); +	MLX5_SET(cqc, cqc_buff, cqe_sz, (cqe_size == 128) ? 1 : 0); + +	mlx5_core_init_cq_frag_buf(&buf->fbc, cqc_buff); + +	err = mlx5_frag_buf_alloc_node(dev->mdev, +				       nent * cqe_size, +				       frag_buf, +				       dev->mdev->priv.numa_node);  	if (err)  		return err; @@ -863,14 +865,15 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)  	ib_umem_release(cq->buf.umem);  } -static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf) +static void init_cq_frag_buf(struct mlx5_ib_cq *cq, +			     struct mlx5_ib_cq_buf *buf)  {  	int i;  	void *cqe;  	struct mlx5_cqe64 *cqe64;  	for (i = 0; i < buf->nent; i++) { -		cqe = get_cqe_from_buf(buf, i, buf->cqe_size); +		cqe = get_cqe(cq, i);  		cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;  		cqe64->op_own = MLX5_CQE_INVALID << 4;  	} @@ -892,14 +895,15 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,  	cq->mcq.arm_db     = cq->db.db + 1;  	cq->mcq.cqe_sz = cqe_size; -	err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size); +	err = alloc_cq_frag_buf(dev, &cq->buf, entries, cqe_size);  	if (err)  		goto err_db; -	init_cq_buf(cq, &cq->buf); +	init_cq_frag_buf(cq, &cq->buf);  	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) + -		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages; +		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * +		 cq->buf.fbc.frag_buf.npages;  	*cqb = kvzalloc(*inlen, GFP_KERNEL);  	if (!*cqb) {  		err = -ENOMEM; @@ -907,11 +911,12 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,  	}  	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); -	mlx5_fill_page_array(&cq->buf.buf, pas); +	mlx5_fill_page_frag_array(&cq->buf.fbc.frag_buf, pas);  	cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);  	MLX5_SET(cqc, cqc, log_page_size, -		 cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); +		 cq->buf.fbc.frag_buf.page_shift - +		 MLX5_ADAPTER_PAGE_SHIFT);  	*index = dev->mdev->priv.uar->index; @@ -1213,11 +1218,11 @@ static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,  	if (!cq->resize_buf)  		return -ENOMEM; -	err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size); +	err = alloc_cq_frag_buf(dev, cq->resize_buf, entries, cqe_size);  	if (err)  		goto ex; -	init_cq_buf(cq, cq->resize_buf); +	init_cq_frag_buf(cq, cq->resize_buf);  	return 0; @@ -1262,9 +1267,8 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq)  	}  	while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) { -		dcqe = get_cqe_from_buf(cq->resize_buf, -					(i + 1) & (cq->resize_buf->nent), -					dsize); +		dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc, +					     (i + 1) & cq->resize_buf->nent);  		dcqe64 = dsize == 64 ? dcqe : dcqe + 64;  		sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);  		memcpy(dcqe, scqe, dsize); @@ -1330,8 +1334,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)  		cqe_size = 64;  		err = resize_kernel(dev, cq, entries, cqe_size);  		if (!err) { -			npas = cq->resize_buf->buf.npages; -			page_shift = cq->resize_buf->buf.page_shift; +			struct mlx5_frag_buf_ctrl *c; + +			c = &cq->resize_buf->fbc; +			npas = c->frag_buf.npages; +			page_shift = c->frag_buf.page_shift;  		}  	} @@ -1352,7 +1359,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)  		mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,  				     pas, 0);  	else -		mlx5_fill_page_array(&cq->resize_buf->buf, pas); +		mlx5_fill_page_frag_array(&cq->resize_buf->fbc.frag_buf, +					  pas);  	MLX5_SET(modify_cq_in, in,  		 modify_field_select_resize_field_select.resize_field_select.resize_field_select, diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c new file mode 100644 index 000000000000..0e04fdddf670 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + */ + +#include "ib_rep.h" + +static const struct mlx5_ib_profile rep_profile = { +	STAGE_CREATE(MLX5_IB_STAGE_INIT, +		     mlx5_ib_stage_init_init, +		     mlx5_ib_stage_init_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, +		     mlx5_ib_stage_rep_flow_db_init, +		     NULL), +	STAGE_CREATE(MLX5_IB_STAGE_CAPS, +		     mlx5_ib_stage_caps_init, +		     NULL), +	STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, +		     mlx5_ib_stage_rep_non_default_cb, +		     NULL), +	STAGE_CREATE(MLX5_IB_STAGE_ROCE, +		     mlx5_ib_stage_rep_roce_init, +		     mlx5_ib_stage_rep_roce_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, +		     mlx5_ib_stage_dev_res_init, +		     mlx5_ib_stage_dev_res_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, +		     mlx5_ib_stage_counters_init, +		     mlx5_ib_stage_counters_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_BFREG, +		     mlx5_ib_stage_bfrag_init, +		     mlx5_ib_stage_bfrag_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, +		     NULL, +		     mlx5_ib_stage_pre_ib_reg_umr_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_IB_REG, +		     mlx5_ib_stage_ib_reg_init, +		     mlx5_ib_stage_ib_reg_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, +		     mlx5_ib_stage_post_ib_reg_umr_init, +		     NULL), +	STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR, +		     mlx5_ib_stage_class_attr_init, +		     NULL), +}; + +static int +mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ +	return 0; +} + +static void +mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep) +{ +	rep->rep_if[REP_IB].priv = NULL; +} + +static int +mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ +	struct mlx5_ib_dev *ibdev; + +	ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev)); +	if (!ibdev) +		return -ENOMEM; + +	ibdev->rep = rep; +	ibdev->mdev = dev; +	ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports), +			       MLX5_CAP_GEN(dev, num_vhca_ports)); +	if (!__mlx5_ib_add(ibdev, &rep_profile)) +		return -EINVAL; + +	rep->rep_if[REP_IB].priv = ibdev; + +	return 0; +} + +static void +mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) +{ +	struct mlx5_ib_dev *dev; + +	if (!rep->rep_if[REP_IB].priv) +		return; + +	dev = mlx5_ib_rep_to_dev(rep); +	__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); +	rep->rep_if[REP_IB].priv = NULL; +} + +static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) +{ +	return mlx5_ib_rep_to_dev(rep); +} + +static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev) +{ +	struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch; +	int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev); +	int vport; + +	for (vport = 1; vport < total_vfs; vport++) { +		struct mlx5_eswitch_rep_if rep_if = {}; + +		rep_if.load = mlx5_ib_vport_rep_load; +		rep_if.unload = mlx5_ib_vport_rep_unload; +		rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; +		mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB); +	} +} + +static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev) +{ +	struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch; +	int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev); +	int vport; + +	for (vport = 1; vport < total_vfs; vport++) +		mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB); +} + +void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) +{ +	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; +	struct mlx5_eswitch_rep_if rep_if = {}; + +	rep_if.load = mlx5_ib_nic_rep_load; +	rep_if.unload = mlx5_ib_nic_rep_unload; +	rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; +	rep_if.priv = dev; + +	mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB); + +	mlx5_ib_rep_register_vf_vports(dev); +} + +void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) +{ +	struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch; + +	mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */ +	mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/ +} + +u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) +{ +	return mlx5_eswitch_mode(esw); +} + +struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, +					  int vport_index) +{ +	return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB); +} + +struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, +					  int vport_index) +{ +	return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH); +} + +struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw) +{ +	return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB); +} + +struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport) +{ +	return mlx5_eswitch_vport_rep(esw, vport); +} + +int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, +			      struct mlx5_ib_sq *sq) +{ +	struct mlx5_flow_handle *flow_rule; +	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + +	if (!dev->rep) +		return 0; + +	flow_rule = +		mlx5_eswitch_add_send_to_vport_rule(esw, +						    dev->rep->vport, +						    sq->base.mqp.qpn); +	if (IS_ERR(flow_rule)) +		return PTR_ERR(flow_rule); +	sq->flow_rule = flow_rule; + +	return 0; +} diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h new file mode 100644 index 000000000000..046fd942fd46 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + */ + +#ifndef __MLX5_IB_REP_H__ +#define __MLX5_IB_REP_H__ + +#include <linux/mlx5/eswitch.h> +#include "mlx5_ib.h" + +#ifdef CONFIG_MLX5_ESWITCH +u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw); +struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, +					  int vport_index); +struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw); +struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, +					   int vport_index); +void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev); +void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev); +int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, +			      struct mlx5_ib_sq *sq); +struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, +					  int vport_index); +#else /* CONFIG_MLX5_ESWITCH */ +static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) +{ +	return SRIOV_NONE; +} + +static inline +struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, +					  int vport_index) +{ +	return NULL; +} + +static inline +struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw) +{ +	return NULL; +} + +static inline +struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, +					   int vport_index) +{ +	return NULL; +} + +static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {} +static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {} +static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, +					    struct mlx5_ib_sq *sq) +{ +	return 0; +} + +static inline +struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, +					  int vport_index) +{ +	return NULL; +} +#endif + +static inline +struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) +{ +	return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv; +} +#endif /* __MLX5_IB_REP_H__ */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index da091de4e69d..daa919e5a442 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -38,6 +38,7 @@  #include <linux/pci.h>  #include <linux/dma-mapping.h>  #include <linux/slab.h> +#include <linux/bitmap.h>  #if defined(CONFIG_X86)  #include <asm/pat.h>  #endif @@ -51,13 +52,23 @@  #include <linux/mlx5/port.h>  #include <linux/mlx5/vport.h>  #include <linux/mlx5/fs.h> +#include <linux/mlx5/fs_helpers.h>  #include <linux/list.h>  #include <rdma/ib_smi.h>  #include <rdma/ib_umem.h>  #include <linux/in.h>  #include <linux/etherdevice.h>  #include "mlx5_ib.h" +#include "ib_rep.h"  #include "cmd.h" +#include <linux/mlx5/fs_helpers.h> +#include <linux/mlx5/accel.h> +#include <rdma/uverbs_std_types.h> +#include <rdma/mlx5_user_ioctl_verbs.h> +#include <rdma/mlx5_user_ioctl_cmds.h> + +#define UVERBS_MODULE_NAME mlx5_ib +#include <rdma/uverbs_named_ioctl.h>  #define DRIVER_NAME "mlx5_ib"  #define DRIVER_VERSION "5.0-0" @@ -90,6 +101,12 @@ static LIST_HEAD(mlx5_ib_dev_list);   */  static DEFINE_MUTEX(mlx5_ib_multiport_mutex); +/* We can't use an array for xlt_emergency_page because dma_map_single + * doesn't work on kernel modules memory + */ +static unsigned long xlt_emergency_page; +static struct mutex xlt_emergency_page_mutex; +  struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi)  {  	struct mlx5_ib_dev *dev; @@ -130,7 +147,7 @@ static int get_port_state(struct ib_device *ibdev,  	int ret;  	memset(&attr, 0, sizeof(attr)); -	ret = mlx5_ib_query_port(ibdev, port_num, &attr); +	ret = ibdev->query_port(ibdev, port_num, &attr);  	if (!ret)  		*state = attr.state;  	return ret; @@ -154,10 +171,19 @@ static int mlx5_netdev_event(struct notifier_block *this,  	case NETDEV_REGISTER:  	case NETDEV_UNREGISTER:  		write_lock(&roce->netdev_lock); - -		if (ndev->dev.parent == &mdev->pdev->dev) -			roce->netdev = (event == NETDEV_UNREGISTER) ? +		if (ibdev->rep) { +			struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch; +			struct net_device *rep_ndev; + +			rep_ndev = mlx5_ib_get_rep_netdev(esw, +							  ibdev->rep->vport); +			if (rep_ndev == ndev) +				roce->netdev = (event == NETDEV_UNREGISTER) ?  					NULL : ndev; +		} else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) { +			roce->netdev = (event == NETDEV_UNREGISTER) ? +				NULL : ndev; +		}  		write_unlock(&roce->netdev_lock);  		break; @@ -388,6 +414,9 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,  	if (err)  		goto out; +	props->active_width     = IB_WIDTH_4X; +	props->active_speed     = IB_SPEED_QDR; +  	translate_eth_proto_oper(eth_prot_oper, &props->active_speed,  				 &props->active_width); @@ -482,18 +511,19 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,  				      vlan_id, port_num);  } -static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num, -			   unsigned int index, const union ib_gid *gid, +static int mlx5_ib_add_gid(const union ib_gid *gid,  			   const struct ib_gid_attr *attr,  			   __always_unused void **context)  { -	return set_roce_addr(to_mdev(device), port_num, index, gid, attr); +	return set_roce_addr(to_mdev(attr->device), attr->port_num, +			     attr->index, gid, attr);  } -static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num, -			   unsigned int index, __always_unused void **context) +static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, +			   __always_unused void **context)  { -	return set_roce_addr(to_mdev(device), port_num, index, NULL, NULL); +	return set_roce_addr(to_mdev(attr->device), attr->port_num, +			     attr->index, NULL, NULL);  }  __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, @@ -505,9 +535,6 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,  	if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))  		return 0; -	if (!attr.ndev) -		return 0; -  	dev_put(attr.ndev);  	if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) @@ -527,9 +554,6 @@ int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,  	if (ret)  		return ret; -	if (!attr.ndev) -		return -ENODEV; -  	dev_put(attr.ndev);  	*gid_type = attr.gid_type; @@ -833,6 +857,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,  						MLX5_RX_HASH_SRC_PORT_UDP |  						MLX5_RX_HASH_DST_PORT_UDP |  						MLX5_RX_HASH_INNER; +			if (mlx5_accel_ipsec_device_caps(dev->mdev) & +			    MLX5_ACCEL_IPSEC_CAP_DEVICE) +				resp.rss_caps.rx_hash_fields_mask |= +					MLX5_RX_HASH_IPSEC_SPI;  			resp.response_length += sizeof(resp.rss_caps);  		}  	} else { @@ -864,6 +892,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,  		props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;  	} +	if (MLX5_CAP_DEV_MEM(mdev, memic)) { +		props->max_dm_size = +			MLX5_CAP_DEV_MEM(mdev, max_memic_size); +	} +  	if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))  		props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; @@ -969,6 +1002,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,  				MLX5_CAP_QOS(mdev, packet_pacing_min_rate);  			resp.packet_pacing_caps.supported_qpts |=  				1 << IB_QPT_RAW_PACKET; +			if (MLX5_CAP_QOS(mdev, packet_pacing_burst_bound) && +			    MLX5_CAP_QOS(mdev, packet_pacing_typical_size)) +				resp.packet_pacing_caps.cap_flags |= +					MLX5_IB_PP_SUPPORT_BURST;  		}  		resp.response_length += sizeof(resp.packet_pacing_caps);  	} @@ -1272,6 +1309,22 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,  	return ret;  } +static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port, +				  struct ib_port_attr *props) +{ +	int ret; + +	/* Only link layer == ethernet is valid for representors */ +	ret = mlx5_query_port_roce(ibdev, port, props); +	if (ret || !props) +		return ret; + +	/* We don't support GIDS */ +	props->gid_tbl_len = 0; + +	return ret; +} +  static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,  			     union ib_gid *gid)  { @@ -1638,6 +1691,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,  	resp.response_length = min(offsetof(typeof(resp), response_length) +  				   sizeof(resp.response_length), udata->outlen); +	if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) { +		if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS)) +			resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; +		if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) +			resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; +		if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) +			resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; +		if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) +			resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; +		/* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ +	} +  	context = kzalloc(sizeof(*context), GFP_KERNEL);  	if (!context)  		return ERR_PTR(-ENOMEM); @@ -1675,17 +1740,10 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,  	context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;  #endif -	context->upd_xlt_page = __get_free_page(GFP_KERNEL); -	if (!context->upd_xlt_page) { -		err = -ENOMEM; -		goto out_uars; -	} -	mutex_init(&context->upd_xlt_page_mutex); -  	if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {  		err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);  		if (err) -			goto out_page; +			goto out_uars;  	}  	INIT_LIST_HEAD(&context->vma_private_list); @@ -1762,9 +1820,6 @@ out_td:  	if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))  		mlx5_ib_dealloc_transport_domain(dev, context->tdn); -out_page: -	free_page(context->upd_xlt_page); -  out_uars:  	deallocate_uars(dev, context); @@ -1790,7 +1845,6 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)  	if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))  		mlx5_ib_dealloc_transport_domain(dev, context->tdn); -	free_page(context->upd_xlt_page);  	deallocate_uars(dev, context);  	kfree(bfregi->sys_pages);  	kfree(bfregi->count); @@ -1966,6 +2020,8 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)  		return "best effort WC";  	case MLX5_IB_MMAP_NC_PAGE:  		return "NC"; +	case MLX5_IB_MMAP_DEVICE_MEM: +		return "Device Memory";  	default:  		return NULL;  	} @@ -2124,6 +2180,34 @@ free_bfreg:  	return err;  } +static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ +	struct mlx5_ib_ucontext *mctx = to_mucontext(context); +	struct mlx5_ib_dev *dev = to_mdev(context->device); +	u16 page_idx = get_extended_index(vma->vm_pgoff); +	size_t map_size = vma->vm_end - vma->vm_start; +	u32 npages = map_size >> PAGE_SHIFT; +	phys_addr_t pfn; +	pgprot_t prot; + +	if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) != +	    page_idx + npages) +		return -EINVAL; + +	pfn = ((pci_resource_start(dev->mdev->pdev, 0) + +	      MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >> +	      PAGE_SHIFT) + +	      page_idx; +	prot = pgprot_writecombine(vma->vm_page_prot); +	vma->vm_page_prot = prot; + +	if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size, +			       vma->vm_page_prot)) +		return -EAGAIN; + +	return mlx5_ib_set_vma_data(vma, mctx); +} +  static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)  {  	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); @@ -2168,6 +2252,9 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm  	case MLX5_IB_MMAP_CLOCK_INFO:  		return mlx5_ib_mmap_clock_info_page(dev, vma, context); +	case MLX5_IB_MMAP_DEVICE_MEM: +		return dm_mmap(ibcontext, vma); +  	default:  		return -EINVAL;  	} @@ -2175,6 +2262,87 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm  	return 0;  } +struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, +			       struct ib_ucontext *context, +			       struct ib_dm_alloc_attr *attr, +			       struct uverbs_attr_bundle *attrs) +{ +	u64 act_size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE); +	struct mlx5_memic *memic = &to_mdev(ibdev)->memic; +	phys_addr_t memic_addr; +	struct mlx5_ib_dm *dm; +	u64 start_offset; +	u32 page_idx; +	int err; + +	dm = kzalloc(sizeof(*dm), GFP_KERNEL); +	if (!dm) +		return ERR_PTR(-ENOMEM); + +	mlx5_ib_dbg(to_mdev(ibdev), "alloc_memic req: user_length=0x%llx act_length=0x%llx log_alignment=%d\n", +		    attr->length, act_size, attr->alignment); + +	err = mlx5_cmd_alloc_memic(memic, &memic_addr, +				   act_size, attr->alignment); +	if (err) +		goto err_free; + +	start_offset = memic_addr & ~PAGE_MASK; +	page_idx = (memic_addr - pci_resource_start(memic->dev->pdev, 0) - +		    MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >> +		    PAGE_SHIFT; + +	err = uverbs_copy_to(attrs, +			     MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, +			     &start_offset, sizeof(start_offset)); +	if (err) +		goto err_dealloc; + +	err = uverbs_copy_to(attrs, +			     MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, +			     &page_idx, sizeof(page_idx)); +	if (err) +		goto err_dealloc; + +	bitmap_set(to_mucontext(context)->dm_pages, page_idx, +		   DIV_ROUND_UP(act_size, PAGE_SIZE)); + +	dm->dev_addr = memic_addr; + +	return &dm->ibdm; + +err_dealloc: +	mlx5_cmd_dealloc_memic(memic, memic_addr, +			       act_size); +err_free: +	kfree(dm); +	return ERR_PTR(err); +} + +int mlx5_ib_dealloc_dm(struct ib_dm *ibdm) +{ +	struct mlx5_memic *memic = &to_mdev(ibdm->device)->memic; +	struct mlx5_ib_dm *dm = to_mdm(ibdm); +	u64 act_size = roundup(dm->ibdm.length, MLX5_MEMIC_BASE_SIZE); +	u32 page_idx; +	int ret; + +	ret = mlx5_cmd_dealloc_memic(memic, dm->dev_addr, act_size); +	if (ret) +		return ret; + +	page_idx = (dm->dev_addr - pci_resource_start(memic->dev->pdev, 0) - +		    MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >> +		    PAGE_SHIFT; +	bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages, +		     page_idx, +		     DIV_ROUND_UP(act_size, PAGE_SIZE)); + +	kfree(dm); + +	return 0; +} +  static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,  				      struct ib_ucontext *context,  				      struct ib_udata *udata) @@ -2290,11 +2458,29 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)  		   offsetof(typeof(filter), field) -\  		   sizeof(filter.field)) -#define IPV4_VERSION 4 -#define IPV6_VERSION 6 +static int parse_flow_flow_action(const union ib_flow_spec *ib_spec, +				  const struct ib_flow_attr *flow_attr, +				  struct mlx5_flow_act *action) +{ +	struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act); + +	switch (maction->ib_action.type) { +	case IB_FLOW_ACTION_ESP: +		/* Currently only AES_GCM keymat is supported by the driver */ +		action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; +		action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ? +			MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : +			MLX5_FLOW_CONTEXT_ACTION_DECRYPT; +		return 0; +	default: +		return -EOPNOTSUPP; +	} +} +  static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,  			   u32 *match_v, const union ib_flow_spec *ib_spec, -			   u32 *tag_id, bool *is_drop) +			   const struct ib_flow_attr *flow_attr, +			   struct mlx5_flow_act *action)  {  	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,  					   misc_parameters); @@ -2303,6 +2489,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,  	void *headers_c;  	void *headers_v;  	int match_ipv; +	int ret;  	if (ib_spec->type & IB_FLOW_SPEC_INNER) {  		headers_c = MLX5_ADDR_OF(fte_match_param, match_c, @@ -2377,7 +2564,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,  			MLX5_SET(fte_match_set_lyr_2_4, headers_c,  				 ip_version, 0xf);  			MLX5_SET(fte_match_set_lyr_2_4, headers_v, -				 ip_version, IPV4_VERSION); +				 ip_version, MLX5_FS_IPV4_VERSION);  		} else {  			MLX5_SET(fte_match_set_lyr_2_4, headers_c,  				 ethertype, 0xffff); @@ -2416,7 +2603,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,  			MLX5_SET(fte_match_set_lyr_2_4, headers_c,  				 ip_version, 0xf);  			MLX5_SET(fte_match_set_lyr_2_4, headers_v, -				 ip_version, IPV6_VERSION); +				 ip_version, MLX5_FS_IPV6_VERSION);  		} else {  			MLX5_SET(fte_match_set_lyr_2_4, headers_c,  				 ethertype, 0xffff); @@ -2453,7 +2640,15 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,  			       ntohl(ib_spec->ipv6.mask.flow_label),  			       ntohl(ib_spec->ipv6.val.flow_label),  			       ib_spec->type & IB_FLOW_SPEC_INNER); +		break; +	case IB_FLOW_SPEC_ESP: +		if (ib_spec->esp.mask.seq) +			return -EOPNOTSUPP; +		MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, +			 ntohl(ib_spec->esp.mask.spi)); +		MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, +			 ntohl(ib_spec->esp.val.spi));  		break;  	case IB_FLOW_SPEC_TCP:  		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, @@ -2512,13 +2707,19 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,  		if (ib_spec->flow_tag.tag_id >= BIT(24))  			return -EINVAL; -		*tag_id = ib_spec->flow_tag.tag_id; +		action->flow_tag = ib_spec->flow_tag.tag_id; +		action->has_flow_tag = true;  		break;  	case IB_FLOW_SPEC_ACTION_DROP:  		if (FIELDS_NOT_SUPPORTED(ib_spec->drop,  					 LAST_DROP_FIELD))  			return -EOPNOTSUPP; -		*is_drop = true; +		action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; +		break; +	case IB_FLOW_SPEC_ACTION_HANDLE: +		ret = parse_flow_flow_action(ib_spec, flow_attr, action); +		if (ret) +			return ret;  		break;  	default:  		return -EINVAL; @@ -2561,6 +2762,46 @@ static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)  	return false;  } +enum valid_spec { +	VALID_SPEC_INVALID, +	VALID_SPEC_VALID, +	VALID_SPEC_NA, +}; + +static enum valid_spec +is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, +		     const struct mlx5_flow_spec *spec, +		     const struct mlx5_flow_act *flow_act, +		     bool egress) +{ +	const u32 *match_c = spec->match_criteria; +	bool is_crypto = +		(flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | +				     MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); +	bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); +	bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; + +	/* +	 * Currently only crypto is supported in egress, when regular egress +	 * rules would be supported, always return VALID_SPEC_NA. +	 */ +	if (!is_crypto) +		return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA; + +	return is_crypto && is_ipsec && +		(!egress || (!is_drop && !flow_act->has_flow_tag)) ? +		VALID_SPEC_VALID : VALID_SPEC_INVALID; +} + +static bool is_valid_spec(struct mlx5_core_dev *mdev, +			  const struct mlx5_flow_spec *spec, +			  const struct mlx5_flow_act *flow_act, +			  bool egress) +{ +	/* We curretly only support ipsec egress flow */ +	return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; +} +  static bool is_valid_ethertype(struct mlx5_core_dev *mdev,  			       const struct ib_flow_attr *flow_attr,  			       bool check_inner) @@ -2635,7 +2876,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)  							  ibflow);  	struct mlx5_ib_flow_handler *iter, *tmp; -	mutex_lock(&dev->flow_db.lock); +	mutex_lock(&dev->flow_db->lock);  	list_for_each_entry_safe(iter, tmp, &handler->list, list) {  		mlx5_del_flow_rules(iter->rule); @@ -2646,7 +2887,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)  	mlx5_del_flow_rules(handler->rule);  	put_flow_table(dev, handler->prio, true); -	mutex_unlock(&dev->flow_db.lock); +	mutex_unlock(&dev->flow_db->lock);  	kfree(handler); @@ -2685,17 +2926,21 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,  	max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,  						       log_max_ft_size));  	if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { -		if (flow_is_multicast_only(flow_attr) && -		    !dont_trap) +		if (ft_type == MLX5_IB_FT_TX) +			priority = 0; +		else if (flow_is_multicast_only(flow_attr) && +			 !dont_trap)  			priority = MLX5_IB_FLOW_MCAST_PRIO;  		else  			priority = ib_prio_to_core_prio(flow_attr->priority,  							dont_trap);  		ns = mlx5_get_flow_namespace(dev->mdev, +					     ft_type == MLX5_IB_FT_TX ? +					     MLX5_FLOW_NAMESPACE_EGRESS :  					     MLX5_FLOW_NAMESPACE_BYPASS);  		num_entries = MLX5_FS_MAX_ENTRIES;  		num_groups = MLX5_FS_MAX_TYPES; -		prio = &dev->flow_db.prios[priority]; +		prio = &dev->flow_db->prios[priority];  	} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||  		   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {  		ns = mlx5_get_flow_namespace(dev->mdev, @@ -2703,7 +2948,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,  		build_leftovers_ft_param(&priority,  					 &num_entries,  					 &num_groups); -		prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; +		prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];  	} else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {  		if (!MLX5_CAP_FLOWTABLE(dev->mdev,  					allow_sniffer_and_nic_rx_shared_tir)) @@ -2713,7 +2958,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,  					     MLX5_FLOW_NAMESPACE_SNIFFER_RX :  					     MLX5_FLOW_NAMESPACE_SNIFFER_TX); -		prio = &dev->flow_db.sniffer[ft_type]; +		prio = &dev->flow_db->sniffer[ft_type];  		priority = 0;  		num_entries = 1;  		num_groups = 1; @@ -2771,15 +3016,14 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,  {  	struct mlx5_flow_table	*ft = ft_prio->flow_table;  	struct mlx5_ib_flow_handler *handler; -	struct mlx5_flow_act flow_act = {0}; +	struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};  	struct mlx5_flow_spec *spec;  	struct mlx5_flow_destination *rule_dst = dst;  	const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);  	unsigned int spec_index; -	u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; -	bool is_drop = false;  	int err = 0;  	int dest_num = 1; +	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;  	if (!is_valid_attr(dev->mdev, flow_attr))  		return ERR_PTR(-EINVAL); @@ -2796,7 +3040,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,  	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {  		err = parse_flow_attr(dev->mdev, spec->match_criteria,  				      spec->match_value, -				      ib_flow, &flow_tag, &is_drop); +				      ib_flow, flow_attr, &flow_act);  		if (err < 0)  			goto free; @@ -2806,25 +3050,46 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,  	if (!flow_is_multicast_only(flow_attr))  		set_underlay_qp(dev, spec, underlay_qpn); +	if (dev->rep) { +		void *misc; + +		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, +				    misc_parameters); +		MLX5_SET(fte_match_set_misc, misc, source_port, +			 dev->rep->vport); +		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, +				    misc_parameters); +		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); +	} +  	spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); -	if (is_drop) { -		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + +	if (is_egress && +	    !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { +		err = -EINVAL; +		goto free; +	} + +	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {  		rule_dst = NULL;  		dest_num = 0;  	} else { -		flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : -		    MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; +		if (is_egress) +			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; +		else +			flow_act.action |= +				dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : +					MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;  	} -	if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG && +	if (flow_act.has_flow_tag &&  	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||  	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {  		mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", -			     flow_tag, flow_attr->type); +			     flow_act.flow_tag, flow_attr->type);  		err = -EINVAL;  		goto free;  	} -	flow_act.flow_tag = flow_tag;  	handler->rule = mlx5_add_flow_rules(ft, spec,  					    &flow_act,  					    rule_dst, dest_num); @@ -2988,6 +3253,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,  	struct mlx5_flow_destination *dst = NULL;  	struct mlx5_ib_flow_prio *ft_prio_tx = NULL;  	struct mlx5_ib_flow_prio *ft_prio; +	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;  	int err;  	int underlay_qpn; @@ -2996,16 +3262,23 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,  	if (domain != IB_FLOW_DOMAIN_USER ||  	    flow_attr->port > dev->num_ports || -	    (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)) +	    (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | +				  IB_FLOW_ATTR_FLAGS_EGRESS))) +		return ERR_PTR(-EINVAL); + +	if (is_egress && +	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || +	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT))  		return ERR_PTR(-EINVAL);  	dst = kzalloc(sizeof(*dst), GFP_KERNEL);  	if (!dst)  		return ERR_PTR(-ENOMEM); -	mutex_lock(&dev->flow_db.lock); +	mutex_lock(&dev->flow_db->lock); -	ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX); +	ft_prio = get_flow_table(dev, flow_attr, +				 is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);  	if (IS_ERR(ft_prio)) {  		err = PTR_ERR(ft_prio);  		goto unlock; @@ -3019,11 +3292,15 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,  		}  	} -	dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; -	if (mqp->flags & MLX5_IB_QP_RSS) -		dst->tir_num = mqp->rss_qp.tirn; -	else -		dst->tir_num = mqp->raw_packet_qp.rq.tirn; +	if (is_egress) { +		dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; +	} else { +		dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; +		if (mqp->flags & MLX5_IB_QP_RSS) +			dst->tir_num = mqp->rss_qp.tirn; +		else +			dst->tir_num = mqp->raw_packet_qp.rq.tirn; +	}  	if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {  		if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)  { @@ -3052,7 +3329,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,  		goto destroy_ft;  	} -	mutex_unlock(&dev->flow_db.lock); +	mutex_unlock(&dev->flow_db->lock);  	kfree(dst);  	return &handler->ibflow; @@ -3062,12 +3339,176 @@ destroy_ft:  	if (ft_prio_tx)  		put_flow_table(dev, ft_prio_tx, false);  unlock: -	mutex_unlock(&dev->flow_db.lock); +	mutex_unlock(&dev->flow_db->lock);  	kfree(dst);  	kfree(handler);  	return ERR_PTR(err);  } +static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) +{ +	u32 flags = 0; + +	if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) +		flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; + +	return flags; +} + +#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED	MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA +static struct ib_flow_action * +mlx5_ib_create_flow_action_esp(struct ib_device *device, +			       const struct ib_flow_action_attrs_esp *attr, +			       struct uverbs_attr_bundle *attrs) +{ +	struct mlx5_ib_dev *mdev = to_mdev(device); +	struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; +	struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; +	struct mlx5_ib_flow_action *action; +	u64 action_flags; +	u64 flags; +	int err = 0; + +	if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&action_flags, attrs, +						MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS))) +		return ERR_PTR(-EFAULT); + +	if (action_flags >= (MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1)) +		return ERR_PTR(-EOPNOTSUPP); + +	flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); + +	/* We current only support a subset of the standard features. Only a +	 * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn +	 * (with overlap). Full offload mode isn't supported. +	 */ +	if (!attr->keymat || attr->replay || attr->encap || +	    attr->spi || attr->seq || attr->tfc_pad || +	    attr->hard_limit_pkts || +	    (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | +			     IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) +		return ERR_PTR(-EOPNOTSUPP); + +	if (attr->keymat->protocol != +	    IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) +		return ERR_PTR(-EOPNOTSUPP); + +	aes_gcm = &attr->keymat->keymat.aes_gcm; + +	if (aes_gcm->icv_len != 16 || +	    aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) +		return ERR_PTR(-EOPNOTSUPP); + +	action = kmalloc(sizeof(*action), GFP_KERNEL); +	if (!action) +		return ERR_PTR(-ENOMEM); + +	action->esp_aes_gcm.ib_flags = attr->flags; +	memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, +	       sizeof(accel_attrs.keymat.aes_gcm.aes_key)); +	accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; +	memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, +	       sizeof(accel_attrs.keymat.aes_gcm.salt)); +	memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, +	       sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); +	accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; +	accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; +	accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; + +	accel_attrs.esn = attr->esn; +	if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) +		accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; +	if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) +		accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + +	if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) +		accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; + +	action->esp_aes_gcm.ctx = +		mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); +	if (IS_ERR(action->esp_aes_gcm.ctx)) { +		err = PTR_ERR(action->esp_aes_gcm.ctx); +		goto err_parse; +	} + +	action->esp_aes_gcm.ib_flags = attr->flags; + +	return &action->ib_action; + +err_parse: +	kfree(action); +	return ERR_PTR(err); +} + +static int +mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, +			       const struct ib_flow_action_attrs_esp *attr, +			       struct uverbs_attr_bundle *attrs) +{ +	struct mlx5_ib_flow_action *maction = to_mflow_act(action); +	struct mlx5_accel_esp_xfrm_attrs accel_attrs; +	int err = 0; + +	if (attr->keymat || attr->replay || attr->encap || +	    attr->spi || attr->seq || attr->tfc_pad || +	    attr->hard_limit_pkts || +	    (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | +			     IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | +			     IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) +		return -EOPNOTSUPP; + +	/* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can +	 * be modified. +	 */ +	if (!(maction->esp_aes_gcm.ib_flags & +	      IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && +	    attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | +			   IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) +		return -EINVAL; + +	memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, +	       sizeof(accel_attrs)); + +	accel_attrs.esn = attr->esn; +	if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) +		accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; +	else +		accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + +	err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, +					 &accel_attrs); +	if (err) +		return err; + +	maction->esp_aes_gcm.ib_flags &= +		~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; +	maction->esp_aes_gcm.ib_flags |= +		attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; + +	return 0; +} + +static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) +{ +	struct mlx5_ib_flow_action *maction = to_mflow_act(action); + +	switch (action->type) { +	case IB_FLOW_ACTION_ESP: +		/* +		 * We only support aes_gcm by now, so we implicitly know this is +		 * the underline crypto. +		 */ +		mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); +		break; +	default: +		WARN_ON(true); +		break; +	} + +	kfree(maction); +	return 0; +} +  static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)  {  	struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -3448,9 +3889,12 @@ static void destroy_umrc_res(struct mlx5_ib_dev *dev)  	if (err)  		mlx5_ib_warn(dev, "mr cache cleanup failed\n"); -	mlx5_ib_destroy_qp(dev->umrc.qp); -	ib_free_cq(dev->umrc.cq); -	ib_dealloc_pd(dev->umrc.pd); +	if (dev->umrc.qp) +		mlx5_ib_destroy_qp(dev->umrc.qp); +	if (dev->umrc.cq) +		ib_free_cq(dev->umrc.cq); +	if (dev->umrc.pd) +		ib_dealloc_pd(dev->umrc.pd);  }  enum { @@ -3552,12 +3996,15 @@ static int create_umr_res(struct mlx5_ib_dev *dev)  error_4:  	mlx5_ib_destroy_qp(qp); +	dev->umrc.qp = NULL;  error_3:  	ib_free_cq(cq); +	dev->umrc.cq = NULL;  error_2:  	ib_dealloc_pd(pd); +	dev->umrc.pd = NULL;  error_0:  	kfree(attr); @@ -3769,6 +4216,25 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,  	return 0;  } +static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num, +				   struct ib_port_immutable *immutable) +{ +	struct ib_port_attr attr; +	int err; + +	immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET; + +	err = ib_query_port(ibdev, port_num, &attr); +	if (err) +		return err; + +	immutable->pkey_tbl_len = attr.pkey_tbl_len; +	immutable->gid_tbl_len = attr.gid_tbl_len; +	immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET; + +	return 0; +} +  static void get_dev_fw_str(struct ib_device *ibdev, char *str)  {  	struct mlx5_ib_dev *dev = @@ -3799,7 +4265,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)  		goto err_destroy_vport_lag;  	} -	dev->flow_db.lag_demux_ft = ft; +	dev->flow_db->lag_demux_ft = ft;  	return 0;  err_destroy_vport_lag: @@ -3811,9 +4277,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)  {  	struct mlx5_core_dev *mdev = dev->mdev; -	if (dev->flow_db.lag_demux_ft) { -		mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft); -		dev->flow_db.lag_demux_ft = NULL; +	if (dev->flow_db->lag_demux_ft) { +		mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft); +		dev->flow_db->lag_demux_ft = NULL;  		mlx5_cmd_destroy_vport_lag(mdev);  	} @@ -3845,14 +4311,10 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)  {  	int err; -	err = mlx5_add_netdev_notifier(dev, port_num); -	if (err) -		return err; -  	if (MLX5_CAP_GEN(dev->mdev, roce)) {  		err = mlx5_nic_vport_enable_roce(dev->mdev);  		if (err) -			goto err_unregister_netdevice_notifier; +			return err;  	}  	err = mlx5_eth_lag_init(dev); @@ -3865,8 +4327,6 @@ err_disable_roce:  	if (MLX5_CAP_GEN(dev->mdev, roce))  		mlx5_nic_vport_disable_roce(dev->mdev); -err_unregister_netdevice_notifier: -	mlx5_remove_netdev_notifier(dev, port_num);  	return err;  } @@ -4500,7 +4960,48 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)  	mlx5_nic_vport_disable_roce(dev->mdev);  } -static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) +ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM, +			     UVERBS_METHOD_DM_ALLOC, +			     &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, +						  UVERBS_ATTR_TYPE(u64), +						  UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), +			     &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, +						  UVERBS_ATTR_TYPE(u16), +						  UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION, +			     UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, +			     &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, +						 UVERBS_ATTR_TYPE(u64), +						 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +#define NUM_TREES	2 +static int populate_specs_root(struct mlx5_ib_dev *dev) +{ +	const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = { +		uverbs_default_get_objects()}; +	size_t num_trees = 1; + +	if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE && +	    !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) +		default_root[num_trees++] = &mlx5_ib_flow_action; + +	if (MLX5_CAP_DEV_MEM(dev->mdev, memic) && +	    !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) +		default_root[num_trees++] = &mlx5_ib_dm; + +	dev->ib_dev.specs_root = +		uverbs_alloc_spec_tree(num_trees, default_root); + +	return PTR_ERR_OR_ZERO(dev->ib_dev.specs_root); +} + +static void depopulate_specs_root(struct mlx5_ib_dev *dev) +{ +	uverbs_free_spec_tree(dev->ib_dev.specs_root); +} + +void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)  {  	mlx5_ib_cleanup_multiport_master(dev);  #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING @@ -4509,7 +5010,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)  	kfree(dev->port);  } -static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) +int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)  {  	struct mlx5_core_dev *mdev = dev->mdev;  	const char *name; @@ -4531,8 +5032,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)  		goto err_free_port;  	if (!mlx5_core_mp_enabled(mdev)) { -		int i; -  		for (i = 1; i <= dev->num_ports; i++) {  			err = get_port_caps(dev, i);  			if (err) @@ -4561,11 +5060,13 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)  		dev->mdev->priv.eq_table.num_comp_vectors;  	dev->ib_dev.dev.parent		= &mdev->pdev->dev; -	mutex_init(&dev->flow_db.lock);  	mutex_init(&dev->cap_mask_mutex);  	INIT_LIST_HEAD(&dev->qp_list);  	spin_lock_init(&dev->reset_flow_resource_lock); +	spin_lock_init(&dev->memic.memic_lock); +	dev->memic.dev = mdev; +  #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING  	err = init_srcu_struct(&dev->mr_srcu);  	if (err) @@ -4582,7 +5083,38 @@ err_free_port:  	return -ENOMEM;  } -static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev) +{ +	dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); + +	if (!dev->flow_db) +		return -ENOMEM; + +	mutex_init(&dev->flow_db->lock); + +	return 0; +} + +int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev) +{ +	struct mlx5_ib_dev *nic_dev; + +	nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch); + +	if (!nic_dev) +		return -EINVAL; + +	dev->flow_db = nic_dev->flow_db; + +	return 0; +} + +static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) +{ +	kfree(dev->flow_db); +} + +int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)  {  	struct mlx5_core_dev *mdev = dev->mdev;  	int err; @@ -4623,7 +5155,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)  		(1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);  	dev->ib_dev.query_device	= mlx5_ib_query_device; -	dev->ib_dev.query_port		= mlx5_ib_query_port;  	dev->ib_dev.get_link_layer	= mlx5_ib_port_link_layer;  	dev->ib_dev.query_gid		= mlx5_ib_query_gid;  	dev->ib_dev.add_gid		= mlx5_ib_add_gid; @@ -4666,7 +5197,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)  	dev->ib_dev.alloc_mr		= mlx5_ib_alloc_mr;  	dev->ib_dev.map_mr_sg		= mlx5_ib_map_mr_sg;  	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status; -	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;  	dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;  	dev->ib_dev.get_vector_affinity	= mlx5_ib_get_vector_affinity;  	if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) @@ -4699,11 +5229,21 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)  			(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);  	} +	if (MLX5_CAP_DEV_MEM(mdev, memic)) { +		dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm; +		dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm; +		dev->ib_dev.reg_dm_mr = mlx5_ib_reg_dm_mr; +	} +  	dev->ib_dev.create_flow	= mlx5_ib_create_flow;  	dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;  	dev->ib_dev.uverbs_ex_cmd_mask |=  			(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |  			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); +	dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp; +	dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action; +	dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp; +	dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;  	err = init_node_data(dev);  	if (err) @@ -4717,6 +5257,80 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)  	return 0;  } +static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev) +{ +	dev->ib_dev.get_port_immutable  = mlx5_port_immutable; +	dev->ib_dev.query_port		= mlx5_ib_query_port; + +	return 0; +} + +int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) +{ +	dev->ib_dev.get_port_immutable  = mlx5_port_rep_immutable; +	dev->ib_dev.query_port		= mlx5_ib_rep_query_port; + +	return 0; +} + +static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev, +					  u8 port_num) +{ +	int i; + +	for (i = 0; i < dev->num_ports; i++) { +		dev->roce[i].dev = dev; +		dev->roce[i].native_port_num = i + 1; +		dev->roce[i].last_port_state = IB_PORT_DOWN; +	} + +	dev->ib_dev.get_netdev	= mlx5_ib_get_netdev; +	dev->ib_dev.create_wq	 = mlx5_ib_create_wq; +	dev->ib_dev.modify_wq	 = mlx5_ib_modify_wq; +	dev->ib_dev.destroy_wq	 = mlx5_ib_destroy_wq; +	dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; +	dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; + +	dev->ib_dev.uverbs_ex_cmd_mask |= +			(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | +			(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | +			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | +			(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | +			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + +	return mlx5_add_netdev_notifier(dev, port_num); +} + +static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev) +{ +	u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1; + +	mlx5_remove_netdev_notifier(dev, port_num); +} + +int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) +{ +	struct mlx5_core_dev *mdev = dev->mdev; +	enum rdma_link_layer ll; +	int port_type_cap; +	int err = 0; +	u8 port_num; + +	port_num = mlx5_core_native_port_num(dev->mdev) - 1; +	port_type_cap = MLX5_CAP_GEN(mdev, port_type); +	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); + +	if (ll == IB_LINK_LAYER_ETHERNET) +		err = mlx5_ib_stage_common_roce_init(dev, port_num); + +	return err; +} + +void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev) +{ +	mlx5_ib_stage_common_roce_cleanup(dev); +} +  static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)  {  	struct mlx5_core_dev *mdev = dev->mdev; @@ -4724,37 +5338,26 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)  	int port_type_cap;  	u8 port_num;  	int err; -	int i;  	port_num = mlx5_core_native_port_num(dev->mdev) - 1;  	port_type_cap = MLX5_CAP_GEN(mdev, port_type);  	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);  	if (ll == IB_LINK_LAYER_ETHERNET) { -		for (i = 0; i < dev->num_ports; i++) { -			dev->roce[i].dev = dev; -			dev->roce[i].native_port_num = i + 1; -			dev->roce[i].last_port_state = IB_PORT_DOWN; -		} +		err = mlx5_ib_stage_common_roce_init(dev, port_num); +		if (err) +			return err; -		dev->ib_dev.get_netdev	= mlx5_ib_get_netdev; -		dev->ib_dev.create_wq	 = mlx5_ib_create_wq; -		dev->ib_dev.modify_wq	 = mlx5_ib_modify_wq; -		dev->ib_dev.destroy_wq	 = mlx5_ib_destroy_wq; -		dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; -		dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; -		dev->ib_dev.uverbs_ex_cmd_mask |= -			(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | -			(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | -			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | -			(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | -			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);  		err = mlx5_enable_eth(dev, port_num);  		if (err) -			return err; +			goto cleanup;  	}  	return 0; +cleanup: +	mlx5_ib_stage_common_roce_cleanup(dev); + +	return err;  }  static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) @@ -4770,16 +5373,16 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)  	if (ll == IB_LINK_LAYER_ETHERNET) {  		mlx5_disable_eth(dev); -		mlx5_remove_netdev_notifier(dev, port_num); +		mlx5_ib_stage_common_roce_cleanup(dev);  	}  } -static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) +int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)  {  	return create_dev_resources(&dev->devr);  } -static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) +void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)  {  	destroy_dev_resources(&dev->devr);  } @@ -4791,7 +5394,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)  	return mlx5_ib_odp_init_one(dev);  } -static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) +int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)  {  	if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {  		dev->ib_dev.get_hw_stats	= mlx5_ib_get_hw_stats; @@ -4803,7 +5406,7 @@ static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)  	return 0;  } -static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) +void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)  {  	if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))  		mlx5_ib_dealloc_counters(dev); @@ -4834,7 +5437,7 @@ static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)  	mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);  } -static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) +int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)  {  	int err; @@ -4849,28 +5452,38 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)  	return err;  } -static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) +void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)  {  	mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);  	mlx5_free_bfreg(dev->mdev, &dev->bfreg);  } -static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev) +{ +	return populate_specs_root(dev); +} + +int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)  {  	return ib_register_device(&dev->ib_dev, NULL);  } -static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_depopulate_specs(struct mlx5_ib_dev *dev) +{ +	depopulate_specs_root(dev); +} + +void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)  {  	destroy_umrc_res(dev);  } -static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) +void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)  {  	ib_unregister_device(&dev->ib_dev);  } -static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) +int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)  {  	return create_umr_res(dev);  } @@ -4887,7 +5500,7 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)  	cancel_delay_drop(dev);  } -static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev) +int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)  {  	int err;  	int i; @@ -4902,9 +5515,21 @@ static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)  	return 0;  } -static void __mlx5_ib_remove(struct mlx5_ib_dev *dev, -			     const struct mlx5_ib_profile *profile, -			     int stage) +static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev) +{ +	mlx5_ib_register_vport_reps(dev); + +	return 0; +} + +static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev) +{ +	mlx5_ib_unregister_vport_reps(dev); +} + +void __mlx5_ib_remove(struct mlx5_ib_dev *dev, +		      const struct mlx5_ib_profile *profile, +		      int stage)  {  	/* Number of stages to cleanup */  	while (stage) { @@ -4918,23 +5543,14 @@ static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,  static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num); -static void *__mlx5_ib_add(struct mlx5_core_dev *mdev, -			   const struct mlx5_ib_profile *profile) +void *__mlx5_ib_add(struct mlx5_ib_dev *dev, +		    const struct mlx5_ib_profile *profile)  { -	struct mlx5_ib_dev *dev;  	int err;  	int i;  	printk_once(KERN_INFO "%s", mlx5_version); -	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); -	if (!dev) -		return NULL; - -	dev->mdev = mdev; -	dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), -			     MLX5_CAP_GEN(mdev, num_vhca_ports)); -  	for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {  		if (profile->stage[i].init) {  			err = profile->stage[i].init(dev); @@ -4958,9 +5574,15 @@ static const struct mlx5_ib_profile pf_profile = {  	STAGE_CREATE(MLX5_IB_STAGE_INIT,  		     mlx5_ib_stage_init_init,  		     mlx5_ib_stage_init_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, +		     mlx5_ib_stage_flow_db_init, +		     mlx5_ib_stage_flow_db_cleanup),  	STAGE_CREATE(MLX5_IB_STAGE_CAPS,  		     mlx5_ib_stage_caps_init,  		     NULL), +	STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, +		     mlx5_ib_stage_non_default_cb, +		     NULL),  	STAGE_CREATE(MLX5_IB_STAGE_ROCE,  		     mlx5_ib_stage_roce_init,  		     mlx5_ib_stage_roce_cleanup), @@ -4985,6 +5607,9 @@ static const struct mlx5_ib_profile pf_profile = {  	STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,  		     NULL,  		     mlx5_ib_stage_pre_ib_reg_umr_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_SPECS, +		     mlx5_ib_stage_populate_specs, +		     mlx5_ib_stage_depopulate_specs),  	STAGE_CREATE(MLX5_IB_STAGE_IB_REG,  		     mlx5_ib_stage_ib_reg_init,  		     mlx5_ib_stage_ib_reg_cleanup), @@ -4999,6 +5624,54 @@ static const struct mlx5_ib_profile pf_profile = {  		     NULL),  }; +static const struct mlx5_ib_profile nic_rep_profile = { +	STAGE_CREATE(MLX5_IB_STAGE_INIT, +		     mlx5_ib_stage_init_init, +		     mlx5_ib_stage_init_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, +		     mlx5_ib_stage_flow_db_init, +		     mlx5_ib_stage_flow_db_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_CAPS, +		     mlx5_ib_stage_caps_init, +		     NULL), +	STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, +		     mlx5_ib_stage_rep_non_default_cb, +		     NULL), +	STAGE_CREATE(MLX5_IB_STAGE_ROCE, +		     mlx5_ib_stage_rep_roce_init, +		     mlx5_ib_stage_rep_roce_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, +		     mlx5_ib_stage_dev_res_init, +		     mlx5_ib_stage_dev_res_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, +		     mlx5_ib_stage_counters_init, +		     mlx5_ib_stage_counters_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_UAR, +		     mlx5_ib_stage_uar_init, +		     mlx5_ib_stage_uar_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_BFREG, +		     mlx5_ib_stage_bfrag_init, +		     mlx5_ib_stage_bfrag_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, +		     NULL, +		     mlx5_ib_stage_pre_ib_reg_umr_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_SPECS, +		     mlx5_ib_stage_populate_specs, +		     mlx5_ib_stage_depopulate_specs), +	STAGE_CREATE(MLX5_IB_STAGE_IB_REG, +		     mlx5_ib_stage_ib_reg_init, +		     mlx5_ib_stage_ib_reg_cleanup), +	STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, +		     mlx5_ib_stage_post_ib_reg_umr_init, +		     NULL), +	STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR, +		     mlx5_ib_stage_class_attr_init, +		     NULL), +	STAGE_CREATE(MLX5_IB_STAGE_REP_REG, +		     mlx5_ib_stage_rep_reg_init, +		     mlx5_ib_stage_rep_reg_cleanup), +}; +  static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)  {  	struct mlx5_ib_multiport_info *mpi; @@ -5044,8 +5717,11 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)  static void *mlx5_ib_add(struct mlx5_core_dev *mdev)  {  	enum rdma_link_layer ll; +	struct mlx5_ib_dev *dev;  	int port_type_cap; +	printk_once(KERN_INFO "%s", mlx5_version); +  	port_type_cap = MLX5_CAP_GEN(mdev, port_type);  	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); @@ -5055,7 +5731,22 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)  		return mlx5_ib_add_slave_port(mdev, port_num);  	} -	return __mlx5_ib_add(mdev, &pf_profile); +	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); +	if (!dev) +		return NULL; + +	dev->mdev = mdev; +	dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), +			     MLX5_CAP_GEN(mdev, num_vhca_ports)); + +	if (MLX5_VPORT_MANAGER(mdev) && +	    mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { +		dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0); + +		return __mlx5_ib_add(dev, &nic_rep_profile); +	} + +	return __mlx5_ib_add(dev, &pf_profile);  }  static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) @@ -5087,13 +5778,32 @@ static struct mlx5_interface mlx5_ib_interface = {  	.protocol	= MLX5_INTERFACE_PROTOCOL_IB,  }; +unsigned long mlx5_ib_get_xlt_emergency_page(void) +{ +	mutex_lock(&xlt_emergency_page_mutex); +	return xlt_emergency_page; +} + +void mlx5_ib_put_xlt_emergency_page(void) +{ +	mutex_unlock(&xlt_emergency_page_mutex); +} +  static int __init mlx5_ib_init(void)  {  	int err; +	xlt_emergency_page = __get_free_page(GFP_KERNEL); +	if (!xlt_emergency_page) +		return -ENOMEM; + +	mutex_init(&xlt_emergency_page_mutex); +  	mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0); -	if (!mlx5_ib_event_wq) +	if (!mlx5_ib_event_wq) { +		free_page(xlt_emergency_page);  		return -ENOMEM; +	}  	mlx5_ib_odp_init(); @@ -5106,6 +5816,8 @@ static void __exit mlx5_ib_cleanup(void)  {  	mlx5_unregister_interface(&mlx5_ib_interface);  	destroy_workqueue(mlx5_ib_event_wq); +	mutex_destroy(&xlt_emergency_page_mutex); +	free_page(xlt_emergency_page);  }  module_init(mlx5_ib_init); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a5272499b600..49a1aa0ff429 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -45,6 +45,7 @@  #include <linux/mlx5/transobj.h>  #include <rdma/ib_user_verbs.h>  #include <rdma/mlx5-abi.h> +#include <rdma/uverbs_ioctl.h>  #define mlx5_ib_dbg(dev, format, arg...)				\  pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\ @@ -108,6 +109,16 @@ enum {  	MLX5_IB_INVALID_BFREG		= BIT(31),  }; +enum { +	MLX5_MAX_MEMIC_PAGES = 0x100, +	MLX5_MEMIC_ALLOC_SIZE_MASK = 0x3f, +}; + +enum { +	MLX5_MEMIC_BASE_ALIGN	= 6, +	MLX5_MEMIC_BASE_SIZE	= 1 << MLX5_MEMIC_BASE_ALIGN, +}; +  struct mlx5_ib_vma_private_data {  	struct list_head list;  	struct vm_area_struct *vma; @@ -130,10 +141,8 @@ struct mlx5_ib_ucontext {  	/* protect vma_private_list add/del */  	struct mutex		vma_private_list_mutex; -	unsigned long		upd_xlt_page; -	/* protect ODP/KSM */ -	struct mutex		upd_xlt_page_mutex;  	u64			lib_caps; +	DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);  };  static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -155,6 +164,7 @@ struct mlx5_ib_pd {  #define MLX5_IB_NUM_FLOW_FT		(MLX5_IB_FLOW_LEFTOVERS_PRIO + 1)  #define MLX5_IB_NUM_SNIFFER_FTS		2 +#define MLX5_IB_NUM_EGRESS_FTS		1  struct mlx5_ib_flow_prio {  	struct mlx5_flow_table		*flow_table;  	unsigned int			refcount; @@ -170,6 +180,7 @@ struct mlx5_ib_flow_handler {  struct mlx5_ib_flow_db {  	struct mlx5_ib_flow_prio	prios[MLX5_IB_NUM_FLOW_FT];  	struct mlx5_ib_flow_prio	sniffer[MLX5_IB_NUM_SNIFFER_FTS]; +	struct mlx5_ib_flow_prio	egress[MLX5_IB_NUM_EGRESS_FTS];  	struct mlx5_flow_table		*lag_demux_ft;  	/* Protect flow steering bypass flow tables  	 * when add/del flow rules. @@ -343,6 +354,7 @@ struct mlx5_ib_sq {  	struct mlx5_ib_wq	*sq;  	struct mlx5_ib_ubuffer  ubuffer;  	struct mlx5_db		*doorbell; +	struct mlx5_flow_handle	*flow_rule;  	u32			tisn;  	u8			state;  }; @@ -371,7 +383,7 @@ struct mlx5_ib_qp {  		struct mlx5_ib_rss_qp rss_qp;  		struct mlx5_ib_dct dct;  	}; -	struct mlx5_buf		buf; +	struct mlx5_frag_buf	buf;  	struct mlx5_db		db;  	struct mlx5_ib_wq	rq; @@ -405,7 +417,7 @@ struct mlx5_ib_qp {  	struct list_head	qps_list;  	struct list_head	cq_recv_list;  	struct list_head	cq_send_list; -	u32			rate_limit; +	struct mlx5_rate_limit	rl;  	u32                     underlay_qpn;  	bool			tunnel_offload_en;  	/* storage for qp sub type when core qp type is IB_QPT_DRIVER */ @@ -413,7 +425,7 @@ struct mlx5_ib_qp {  };  struct mlx5_ib_cq_buf { -	struct mlx5_buf		buf; +	struct mlx5_frag_buf_ctrl fbc;  	struct ib_umem		*umem;  	int			cqe_size;  	int			nent; @@ -495,7 +507,7 @@ struct mlx5_ib_wc {  struct mlx5_ib_srq {  	struct ib_srq		ibsrq;  	struct mlx5_core_srq	msrq; -	struct mlx5_buf		buf; +	struct mlx5_frag_buf	buf;  	struct mlx5_db		db;  	u64		       *wrid;  	/* protect SRQ hanlding @@ -521,8 +533,19 @@ enum mlx5_ib_mtt_access_flags {  	MLX5_IB_MTT_WRITE = (1 << 1),  }; +struct mlx5_ib_dm { +	struct ib_dm		ibdm; +	phys_addr_t		dev_addr; +}; +  #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) +#define MLX5_IB_DM_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE   |\ +				   IB_ACCESS_REMOTE_WRITE  |\ +				   IB_ACCESS_REMOTE_READ   |\ +				   IB_ACCESS_REMOTE_ATOMIC |\ +				   IB_ZERO_BASED) +  struct mlx5_ib_mr {  	struct ib_mr		ibmr;  	void			*descs; @@ -731,7 +754,9 @@ struct mlx5_ib_delay_drop {  enum mlx5_ib_stages {  	MLX5_IB_STAGE_INIT, +	MLX5_IB_STAGE_FLOW_DB,  	MLX5_IB_STAGE_CAPS, +	MLX5_IB_STAGE_NON_DEFAULT_CB,  	MLX5_IB_STAGE_ROCE,  	MLX5_IB_STAGE_DEVICE_RESOURCES,  	MLX5_IB_STAGE_ODP, @@ -740,10 +765,12 @@ enum mlx5_ib_stages {  	MLX5_IB_STAGE_UAR,  	MLX5_IB_STAGE_BFREG,  	MLX5_IB_STAGE_PRE_IB_REG_UMR, +	MLX5_IB_STAGE_SPECS,  	MLX5_IB_STAGE_IB_REG,  	MLX5_IB_STAGE_POST_IB_REG_UMR,  	MLX5_IB_STAGE_DELAY_DROP,  	MLX5_IB_STAGE_CLASS_ATTR, +	MLX5_IB_STAGE_REP_REG,  	MLX5_IB_STAGE_MAX,  }; @@ -770,6 +797,22 @@ struct mlx5_ib_multiport_info {  	bool unaffiliate;  }; +struct mlx5_ib_flow_action { +	struct ib_flow_action		ib_action; +	union { +		struct { +			u64			    ib_flags; +			struct mlx5_accel_esp_xfrm *ctx; +		} esp_aes_gcm; +	}; +}; + +struct mlx5_memic { +	struct mlx5_core_dev *dev; +	spinlock_t		memic_lock; +	DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES); +}; +  struct mlx5_ib_dev {  	struct ib_device		ib_dev;  	struct mlx5_core_dev		*mdev; @@ -798,7 +841,7 @@ struct mlx5_ib_dev {  	struct srcu_struct      mr_srcu;  	u32			null_mkey;  #endif -	struct mlx5_ib_flow_db	flow_db; +	struct mlx5_ib_flow_db	*flow_db;  	/* protect resources needed as part of reset flow */  	spinlock_t		reset_flow_resource_lock;  	struct list_head	qp_list; @@ -808,6 +851,7 @@ struct mlx5_ib_dev {  	struct mlx5_sq_bfreg	fp_bfreg;  	struct mlx5_ib_delay_drop	delay_drop;  	const struct mlx5_ib_profile	*profile; +	struct mlx5_eswitch_rep		*rep;  	/* protect the user_td */  	struct mutex		lb_mutex; @@ -815,6 +859,7 @@ struct mlx5_ib_dev {  	u8			umr_fence;  	struct list_head	ib_dev_list;  	u64			sys_image_guid; +	struct mlx5_memic	memic;  };  static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -882,6 +927,11 @@ static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)  	return container_of(msrq, struct mlx5_ib_srq, msrq);  } +static inline struct mlx5_ib_dm *to_mdm(struct ib_dm *ibdm) +{ +	return container_of(ibdm, struct mlx5_ib_dm, ibdm); +} +  static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)  {  	return container_of(ibmr, struct mlx5_ib_mr, ibmr); @@ -892,6 +942,12 @@ static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw)  	return container_of(ibmw, struct mlx5_ib_mw, ibmw);  } +static inline struct mlx5_ib_flow_action * +to_mflow_act(struct ib_flow_action *ibact) +{ +	return container_of(ibact, struct mlx5_ib_flow_action, ib_action); +} +  int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,  			struct mlx5_db *db);  void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db); @@ -1020,7 +1076,14 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,  						      struct ib_udata *udata);  int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);  bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev); - +struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, +			       struct ib_ucontext *context, +			       struct ib_dm_alloc_attr *attr, +			       struct uverbs_attr_bundle *attrs); +int mlx5_ib_dealloc_dm(struct ib_dm *ibdm); +struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, +				struct ib_dm_mr_attr *attr, +				struct uverbs_attr_bundle *attrs);  #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING  void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev); @@ -1050,6 +1113,31 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,  #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ +/* Needed for rep profile */ +int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev); +void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev); +void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev); +void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev); +void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev); +void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev); +void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev); +void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev); +void __mlx5_ib_remove(struct mlx5_ib_dev *dev, +		      const struct mlx5_ib_profile *profile, +		      int stage); +void *__mlx5_ib_add(struct mlx5_ib_dev *dev, +		    const struct mlx5_ib_profile *profile); +  int mlx5_ib_get_vf_config(struct ib_device *device, int vf,  			  u8 port, struct ifla_vf_info *info);  int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, @@ -1191,4 +1279,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev,  	return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages;  } +unsigned long mlx5_ib_get_xlt_emergency_page(void); +void mlx5_ib_put_xlt_emergency_page(void); +  #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index c51c602f06d6..1520a2f20f98 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -47,10 +47,25 @@ enum {  #define MLX5_UMR_ALIGN 2048 -static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); -static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); +static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); +static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);  static int mr_cache_max_order(struct mlx5_ib_dev *dev);  static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); +static bool umr_can_modify_entity_size(struct mlx5_ib_dev *dev) +{ +	return !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled); +} + +static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) +{ +	return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); +} + +static bool use_umr(struct mlx5_ib_dev *dev, int order) +{ +	return order <= mr_cache_max_order(dev) && +		umr_can_modify_entity_size(dev); +}  static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)  { @@ -189,7 +204,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)  		MLX5_SET(mkc, mkc, free, 1);  		MLX5_SET(mkc, mkc, umr_en, 1); -		MLX5_SET(mkc, mkc, access_mode, ent->access_mode); +		MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); +		MLX5_SET(mkc, mkc, access_mode_4_2, +			 (ent->access_mode >> 2) & 0x7);  		MLX5_SET(mkc, mkc, qpn, 0xffffff);  		MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); @@ -220,26 +237,32 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)  {  	struct mlx5_mr_cache *cache = &dev->cache;  	struct mlx5_cache_ent *ent = &cache->ent[c]; +	struct mlx5_ib_mr *tmp_mr;  	struct mlx5_ib_mr *mr; -	int err; +	LIST_HEAD(del_list);  	int i;  	for (i = 0; i < num; i++) {  		spin_lock_irq(&ent->lock);  		if (list_empty(&ent->head)) {  			spin_unlock_irq(&ent->lock); -			return; +			break;  		}  		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); -		list_del(&mr->list); +		list_move(&mr->list, &del_list);  		ent->cur--;  		ent->size--;  		spin_unlock_irq(&ent->lock); -		err = destroy_mkey(dev, mr); -		if (err) -			mlx5_ib_warn(dev, "failed destroy mkey\n"); -		else -			kfree(mr); +		mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); +	} + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +	synchronize_srcu(&dev->mr_srcu); +#endif + +	list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { +		list_del(&mr->list); +		kfree(mr);  	}  } @@ -562,32 +585,38 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)  {  	struct mlx5_mr_cache *cache = &dev->cache;  	struct mlx5_cache_ent *ent = &cache->ent[c]; +	struct mlx5_ib_mr *tmp_mr;  	struct mlx5_ib_mr *mr; -	int err; +	LIST_HEAD(del_list);  	cancel_delayed_work(&ent->dwork);  	while (1) {  		spin_lock_irq(&ent->lock);  		if (list_empty(&ent->head)) {  			spin_unlock_irq(&ent->lock); -			return; +			break;  		}  		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); -		list_del(&mr->list); +		list_move(&mr->list, &del_list);  		ent->cur--;  		ent->size--;  		spin_unlock_irq(&ent->lock); -		err = destroy_mkey(dev, mr); -		if (err) -			mlx5_ib_warn(dev, "failed destroy mkey\n"); -		else -			kfree(mr); +		mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); +	} + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +	synchronize_srcu(&dev->mr_srcu); +#endif + +	list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { +		list_del(&mr->list); +		kfree(mr);  	}  }  static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)  { -	if (!mlx5_debugfs_root) +	if (!mlx5_debugfs_root || dev->rep)  		return;  	debugfs_remove_recursive(dev->cache.root); @@ -600,7 +629,7 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)  	struct mlx5_cache_ent *ent;  	int i; -	if (!mlx5_debugfs_root) +	if (!mlx5_debugfs_root || dev->rep)  		return 0;  	cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); @@ -690,6 +719,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)  			   MLX5_IB_UMR_OCTOWORD;  		ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;  		if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && +		    !dev->rep &&  		    mlx5_core_is_pf(dev->mdev))  			ent->limit = dev->mdev->profile->mr_cache[i].limit;  		else @@ -739,6 +769,9 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)  {  	int i; +	if (!dev->cache.wq) +		return 0; +  	dev->cache.stopped = 1;  	flush_workqueue(dev->cache.wq); @@ -776,7 +809,7 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)  	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); -	MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA); +	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);  	MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));  	MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));  	MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); @@ -943,7 +976,10 @@ static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,  {  	struct mlx5_ib_dev *dev = mr->dev;  	struct ib_umem *umem = mr->umem; +  	if (flags & MLX5_IB_UPD_XLT_INDIRECT) { +		if (!umr_can_use_indirect_mkey(dev)) +			return -EPERM;  		mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);  		return npages;  	} @@ -973,7 +1009,6 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,  {  	struct mlx5_ib_dev *dev = mr->dev;  	struct device *ddev = dev->ib_dev.dev.parent; -	struct mlx5_ib_ucontext *uctx = NULL;  	int size;  	void *xlt;  	dma_addr_t dma; @@ -989,6 +1024,11 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,  	size_t pages_to_map = 0;  	size_t pages_iter = 0;  	gfp_t gfp; +	bool use_emergency_page = false; + +	if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && +	    !umr_can_use_indirect_mkey(dev)) +		return -EPERM;  	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,  	 * so we need to align the offset and length accordingly @@ -1015,12 +1055,11 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,  	}  	if (!xlt) { -		uctx = to_mucontext(mr->ibmr.pd->uobject->context);  		mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); +		xlt = (void *)mlx5_ib_get_xlt_emergency_page();  		size = PAGE_SIZE; -		xlt = (void *)uctx->upd_xlt_page; -		mutex_lock(&uctx->upd_xlt_page_mutex);  		memset(xlt, 0, size); +		use_emergency_page = true;  	}  	pages_iter = size / desc_size;  	dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE); @@ -1084,8 +1123,8 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,  	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);  free_xlt: -	if (uctx) -		mutex_unlock(&uctx->upd_xlt_page_mutex); +	if (use_emergency_page) +		mlx5_ib_put_xlt_emergency_page();  	else  		free_pages((unsigned long)xlt, get_order(size)); @@ -1137,7 +1176,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,  	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);  	MLX5_SET(mkc, mkc, free, !populate); -	MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); +	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);  	MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));  	MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));  	MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); @@ -1193,22 +1232,96 @@ static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,  	mr->access_flags = access_flags;  } +static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, +					  u64 length, int acc) +{ +	struct mlx5_ib_dev *dev = to_mdev(pd->device); +	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); +	struct mlx5_core_dev *mdev = dev->mdev; +	struct mlx5_ib_mr *mr; +	void *mkc; +	u32 *in; +	int err; + +	mr = kzalloc(sizeof(*mr), GFP_KERNEL); +	if (!mr) +		return ERR_PTR(-ENOMEM); + +	in = kzalloc(inlen, GFP_KERNEL); +	if (!in) { +		err = -ENOMEM; +		goto err_free; +	} + +	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + +	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MEMIC & 0x3); +	MLX5_SET(mkc, mkc, access_mode_4_2, +		 (MLX5_MKC_ACCESS_MODE_MEMIC >> 2) & 0x7); +	MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); +	MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); +	MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); +	MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); +	MLX5_SET(mkc, mkc, lr, 1); + +	MLX5_SET64(mkc, mkc, len, length); +	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); +	MLX5_SET(mkc, mkc, qpn, 0xffffff); +	MLX5_SET64(mkc, mkc, start_addr, +		   memic_addr - pci_resource_start(dev->mdev->pdev, 0)); + +	err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); +	if (err) +		goto err_in; + +	kfree(in); + +	mr->umem = NULL; +	set_mr_fileds(dev, mr, 0, length, acc); + +	return &mr->ibmr; + +err_in: +	kfree(in); + +err_free: +	kfree(mr); + +	return ERR_PTR(err); +} + +struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, +				struct ib_dm_mr_attr *attr, +				struct uverbs_attr_bundle *attrs) +{ +	struct mlx5_ib_dm *mdm = to_mdm(dm); +	u64 memic_addr; + +	if (attr->access_flags & ~MLX5_IB_DM_ALLOWED_ACCESS) +		return ERR_PTR(-EINVAL); + +	memic_addr = mdm->dev_addr + attr->offset; + +	return mlx5_ib_get_memic_mr(pd, memic_addr, attr->length, +				    attr->access_flags); +} +  struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,  				  u64 virt_addr, int access_flags,  				  struct ib_udata *udata)  {  	struct mlx5_ib_dev *dev = to_mdev(pd->device);  	struct mlx5_ib_mr *mr = NULL; +	bool populate_mtts = false;  	struct ib_umem *umem;  	int page_shift;  	int npages;  	int ncont;  	int order;  	int err; -	bool use_umr = true;  	if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) -		return ERR_PTR(-EINVAL); +		return ERR_PTR(-EOPNOTSUPP);  	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",  		    start, virt_addr, length, access_flags); @@ -1220,6 +1333,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,  			return ERR_PTR(-EINVAL);  		mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags); +		if (IS_ERR(mr)) +			return ERR_CAST(mr);  		return &mr->ibmr;  	}  #endif @@ -1230,26 +1345,29 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,  	if (err < 0)  		return ERR_PTR(err); -	if (order <= mr_cache_max_order(dev)) { +	if (use_umr(dev, order)) {  		mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,  					 page_shift, order, access_flags);  		if (PTR_ERR(mr) == -EAGAIN) {  			mlx5_ib_dbg(dev, "cache empty for order %d\n", order);  			mr = NULL;  		} +		populate_mtts = false;  	} else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {  		if (access_flags & IB_ACCESS_ON_DEMAND) {  			err = -EINVAL;  			pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n");  			goto error;  		} -		use_umr = false; +		populate_mtts = true;  	}  	if (!mr) { +		if (!umr_can_modify_entity_size(dev)) +			populate_mtts = true;  		mutex_lock(&dev->slow_path_mutex);  		mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, -				page_shift, access_flags, !use_umr); +				page_shift, access_flags, populate_mtts);  		mutex_unlock(&dev->slow_path_mutex);  	} @@ -1267,7 +1385,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,  	update_odp_mr(mr);  #endif -	if (use_umr) { +	if (!populate_mtts) {  		int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;  		if (access_flags & IB_ACCESS_ON_DEMAND) @@ -1282,7 +1400,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,  		}  	} +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING  	mr->live = 1; +#endif  	return &mr->ibmr;  error:  	ib_umem_release(umem); @@ -1361,36 +1481,34 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,  		ib_umem_release(mr->umem);  		err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,  				  &npages, &page_shift, &ncont, &order); -		if (err < 0) { -			clean_mr(dev, mr); -			return err; -		} +		if (err) +			goto err;  	}  	if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {  		/*  		 * UMR can't be used - MKey needs to be replaced.  		 */ -		if (mr->allocated_from_cache) { +		if (mr->allocated_from_cache)  			err = unreg_umr(dev, mr); -			if (err) -				mlx5_ib_warn(dev, "Failed to unregister MR\n"); -		} else { +		else  			err = destroy_mkey(dev, mr); -			if (err) -				mlx5_ib_warn(dev, "Failed to destroy MKey\n"); -		}  		if (err) -			return err; +			goto err;  		mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,  				page_shift, access_flags, true); -		if (IS_ERR(mr)) -			return PTR_ERR(mr); +		if (IS_ERR(mr)) { +			err = PTR_ERR(mr); +			mr = to_mmr(ib_mr); +			goto err; +		}  		mr->allocated_from_cache = 0; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING  		mr->live = 1; +#endif  	} else {  		/*  		 * Send a UMR WQE @@ -1413,13 +1531,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,  			err = rereg_umr(pd, mr, access_flags, flags);  		} -		if (err) { -			mlx5_ib_warn(dev, "Failed to rereg UMR\n"); -			ib_umem_release(mr->umem); -			mr->umem = NULL; -			clean_mr(dev, mr); -			return err; -		} +		if (err) +			goto err;  	}  	set_mr_fileds(dev, mr, npages, len, access_flags); @@ -1428,6 +1541,14 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,  	update_odp_mr(mr);  #endif  	return 0; + +err: +	if (mr->umem) { +		ib_umem_release(mr->umem); +		mr->umem = NULL; +	} +	clean_mr(dev, mr); +	return err;  }  static int @@ -1476,10 +1597,9 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)  	}  } -static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)  {  	int allocated_from_cache = mr->allocated_from_cache; -	int err;  	if (mr->sig) {  		if (mlx5_core_destroy_psv(dev->mdev, @@ -1496,21 +1616,11 @@ static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)  	mlx5_free_priv_descs(mr); -	if (!allocated_from_cache) { -		u32 key = mr->mmkey.key; - -		err = destroy_mkey(dev, mr); -		if (err) { -			mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", -				     key, err); -			return err; -		} -	} - -	return 0; +	if (!allocated_from_cache) +		destroy_mkey(dev, mr);  } -static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)  {  	int npages = mr->npages;  	struct ib_umem *umem = mr->umem; @@ -1551,16 +1661,12 @@ static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)  		kfree(mr);  	else  		mlx5_mr_cache_free(dev, mr); - -	return 0;  }  int mlx5_ib_dereg_mr(struct ib_mr *ibmr)  { -	struct mlx5_ib_dev *dev = to_mdev(ibmr->device); -	struct mlx5_ib_mr *mr = to_mmr(ibmr); - -	return dereg_mr(dev, mr); +	dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr)); +	return 0;  }  struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, @@ -1641,7 +1747,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,  		goto err_free_in;  	} -	MLX5_SET(mkc, mkc, access_mode, mr->access_mode); +	MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3); +	MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7);  	MLX5_SET(mkc, mkc, umr_en, 1);  	mr->ibmr.device = pd->device; @@ -1722,7 +1829,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,  	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);  	MLX5_SET(mkc, mkc, umr_en, 1);  	MLX5_SET(mkc, mkc, lr, 1); -	MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS); +	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);  	MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));  	MLX5_SET(mkc, mkc, qpn, 0xffffff); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a2e1aa86e133..7ed4b70f6447 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -36,6 +36,7 @@  #include <rdma/ib_user_verbs.h>  #include <linux/mlx5/fs.h>  #include "mlx5_ib.h" +#include "ib_rep.h"  /* not supported currently */  static int wq_signature; @@ -85,7 +86,9 @@ struct mlx5_modify_raw_qp_param {  	u16 operation;  	u32 set_mask; /* raw_qp_set_mask_map */ -	u32 rate_limit; + +	struct mlx5_rate_limit rl; +  	u8 rq_q_ctr_id;  }; @@ -877,7 +880,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,  		goto err_free;  	} -	err = ib_copy_to_udata(udata, resp, sizeof(*resp)); +	err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp)));  	if (err) {  		mlx5_ib_dbg(dev, "copy failed\n");  		goto err_unmap; @@ -1082,6 +1085,13 @@ static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,  	mlx5_core_destroy_tis(dev->mdev, sq->tisn);  } +static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev, +				       struct mlx5_ib_sq *sq) +{ +	if (sq->flow_rule) +		mlx5_del_flow_rules(sq->flow_rule); +} +  static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,  				   struct mlx5_ib_sq *sq, void *qpin,  				   struct ib_pd *pd) @@ -1145,8 +1155,15 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,  	if (err)  		goto err_umem; +	err = create_flow_rule_vport_sq(dev, sq); +	if (err) +		goto err_flow; +  	return 0; +err_flow: +	mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp); +  err_umem:  	ib_umem_release(sq->ubuffer.umem);  	sq->ubuffer.umem = NULL; @@ -1157,6 +1174,7 @@ err_umem:  static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,  				     struct mlx5_ib_sq *sq)  { +	destroy_flow_rule_vport_sq(dev, sq);  	mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);  	ib_umem_release(sq->ubuffer.umem);  } @@ -1267,6 +1285,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,  	if (tunnel_offload_en)  		MLX5_SET(tirc, tirc, tunneled_offload_en, 1); +	if (dev->rep) +		MLX5_SET(tirc, tirc, self_lb_block, +			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_); +  	err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);  	kvfree(in); @@ -1391,6 +1413,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,  	void *tirc;  	void *hfso;  	u32 selected_fields = 0; +	u32 outer_l4;  	size_t min_resp_len;  	u32 tdn = mucontext->tdn;  	struct mlx5_ib_create_qp_rss ucmd = {}; @@ -1446,7 +1469,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,  		return -EOPNOTSUPP;  	} -	err = ib_copy_to_udata(udata, &resp, min_resp_len); +	err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));  	if (err) {  		mlx5_ib_dbg(dev, "copy failed\n");  		return -EINVAL; @@ -1521,10 +1544,14 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,  		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,  			 MLX5_L3_PROT_TYPE_IPV6); -	if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || -	     (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) && -	     ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || -	     (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) { +	outer_l4 = ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || +		    (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) << 0 | +		   ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || +		    (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) << 1 | +		   (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2; + +	/* Check that only one l4 protocol is set */ +	if (outer_l4 & (outer_l4 - 1)) {  		err = -EINVAL;  		goto err;  	} @@ -1555,9 +1582,16 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,  	    (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))  		selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT; +	if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) +		selected_fields |= MLX5_HASH_FIELD_SEL_IPSEC_SPI; +  	MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);  create_tir: +	if (dev->rep) +		MLX5_SET(tirc, tirc, self_lb_block, +			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_); +  	err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);  	if (err) @@ -2143,7 +2177,6 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,  					struct ib_qp_init_attr *attr,  					struct mlx5_ib_create_qp *ucmd)  { -	struct mlx5_ib_dev *dev;  	struct mlx5_ib_qp *qp;  	int err = 0;  	u32 uidx = MLX5_IB_DEFAULT_UIDX; @@ -2152,8 +2185,6 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,  	if (!attr->srq || !attr->recv_cq)  		return ERR_PTR(-EINVAL); -	dev = to_mdev(pd->device); -  	err = get_qp_user_index(to_mucontext(pd->uobject->context),  				ucmd, sizeof(*ucmd), &uidx);  	if (err) @@ -2753,8 +2784,9 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,  				   const struct mlx5_modify_raw_qp_param *raw_qp_param)  {  	struct mlx5_ib_qp *ibqp = sq->base.container_mibqp; -	u32 old_rate = ibqp->rate_limit; -	u32 new_rate = old_rate; +	struct mlx5_rate_limit old_rl = ibqp->rl; +	struct mlx5_rate_limit new_rl = old_rl; +	bool new_rate_added = false;  	u16 rl_index = 0;  	void *in;  	void *sqc; @@ -2776,39 +2808,43 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,  			pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n",  				__func__);  		else -			new_rate = raw_qp_param->rate_limit; +			new_rl = raw_qp_param->rl;  	} -	if (old_rate != new_rate) { -		if (new_rate) { -			err = mlx5_rl_add_rate(dev, new_rate, &rl_index); +	if (!mlx5_rl_are_equal(&old_rl, &new_rl)) { +		if (new_rl.rate) { +			err = mlx5_rl_add_rate(dev, &rl_index, &new_rl);  			if (err) { -				pr_err("Failed configuring rate %u: %d\n", -				       new_rate, err); +				pr_err("Failed configuring rate limit(err %d): \ +				       rate %u, max_burst_sz %u, typical_pkt_sz %u\n", +				       err, new_rl.rate, new_rl.max_burst_sz, +				       new_rl.typical_pkt_sz); +  				goto out;  			} +			new_rate_added = true;  		}  		MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); +		/* index 0 means no limit */  		MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index);  	}  	err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);  	if (err) {  		/* Remove new rate from table if failed */ -		if (new_rate && -		    old_rate != new_rate) -			mlx5_rl_remove_rate(dev, new_rate); +		if (new_rate_added) +			mlx5_rl_remove_rate(dev, &new_rl);  		goto out;  	}  	/* Only remove the old rate after new rate was set */ -	if ((old_rate && -	    (old_rate != new_rate)) || +	if ((old_rl.rate && +	     !mlx5_rl_are_equal(&old_rl, &new_rl)) ||  	    (new_state != MLX5_SQC_STATE_RDY)) -		mlx5_rl_remove_rate(dev, old_rate); +		mlx5_rl_remove_rate(dev, &old_rl); -	ibqp->rate_limit = new_rate; +	ibqp->rl = new_rl;  	sq->state = new_state;  out: @@ -2885,7 +2921,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,  static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,  			       const struct ib_qp_attr *attr, int attr_mask, -			       enum ib_qp_state cur_state, enum ib_qp_state new_state) +			       enum ib_qp_state cur_state, enum ib_qp_state new_state, +			       const struct mlx5_ib_modify_qp *ucmd)  {  	static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {  		[MLX5_QP_STATE_RST] = { @@ -2938,18 +2975,16 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,  	u16 op;  	u8 tx_affinity = 0; +	mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ? +			     qp->qp_sub_type : ibqp->qp_type); +	if (mlx5_st < 0) +		return -EINVAL; +  	context = kzalloc(sizeof(*context), GFP_KERNEL);  	if (!context)  		return -ENOMEM; -	err = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ? -			 qp->qp_sub_type : ibqp->qp_type); -	if (err < 0) { -		mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type); -		goto out; -	} - -	context->flags = cpu_to_be32(err << 16); +	context->flags = cpu_to_be32(mlx5_st << 16);  	if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {  		context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); @@ -3103,10 +3138,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,  	mlx5_cur = to_mlx5_state(cur_state);  	mlx5_new = to_mlx5_state(new_state); -	mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ? -			     qp->qp_sub_type : ibqp->qp_type); -	if (mlx5_st < 0) -		goto out;  	if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||  	    !optab[mlx5_cur][mlx5_new]) { @@ -3129,7 +3160,30 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,  		}  		if (attr_mask & IB_QP_RATE_LIMIT) { -			raw_qp_param.rate_limit = attr->rate_limit; +			raw_qp_param.rl.rate = attr->rate_limit; + +			if (ucmd->burst_info.max_burst_sz) { +				if (attr->rate_limit && +				    MLX5_CAP_QOS(dev->mdev, packet_pacing_burst_bound)) { +					raw_qp_param.rl.max_burst_sz = +						ucmd->burst_info.max_burst_sz; +				} else { +					err = -EINVAL; +					goto out; +				} +			} + +			if (ucmd->burst_info.typical_pkt_sz) { +				if (attr->rate_limit && +				    MLX5_CAP_QOS(dev->mdev, packet_pacing_typical_size)) { +					raw_qp_param.rl.typical_pkt_sz = +						ucmd->burst_info.typical_pkt_sz; +				} else { +					err = -EINVAL; +					goto out; +				} +			} +  			raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT;  		} @@ -3157,7 +3211,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,  	 * If we moved a kernel QP to RESET, clean up all old CQ  	 * entries and reinitialize the QP.  	 */ -	if (new_state == IB_QPS_RESET && !ibqp->uobject) { +	if (new_state == IB_QPS_RESET && +	    !ibqp->uobject && ibqp->qp_type != IB_QPT_XRC_TGT) {  		mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,  				 ibqp->srq ? to_msrq(ibqp->srq) : NULL);  		if (send_cq != recv_cq) @@ -3316,8 +3371,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,  {  	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);  	struct mlx5_ib_qp *qp = to_mqp(ibqp); +	struct mlx5_ib_modify_qp ucmd = {};  	enum ib_qp_type qp_type;  	enum ib_qp_state cur_state, new_state; +	size_t required_cmd_sz;  	int err = -EINVAL;  	int port;  	enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; @@ -3325,6 +3382,28 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,  	if (ibqp->rwq_ind_tbl)  		return -ENOSYS; +	if (udata && udata->inlen) { +		required_cmd_sz = offsetof(typeof(ucmd), reserved) + +			sizeof(ucmd.reserved); +		if (udata->inlen < required_cmd_sz) +			return -EINVAL; + +		if (udata->inlen > sizeof(ucmd) && +		    !ib_is_udata_cleared(udata, sizeof(ucmd), +					 udata->inlen - sizeof(ucmd))) +			return -EOPNOTSUPP; + +		if (ib_copy_from_udata(&ucmd, udata, +				       min(udata->inlen, sizeof(ucmd)))) +			return -EFAULT; + +		if (ucmd.comp_mask || +		    memchr_inv(&ucmd.reserved, 0, sizeof(ucmd.reserved)) || +		    memchr_inv(&ucmd.burst_info.reserved, 0, +			       sizeof(ucmd.burst_info.reserved))) +			return -EOPNOTSUPP; +	} +  	if (unlikely(ibqp->qp_type == IB_QPT_GSI))  		return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); @@ -3405,7 +3484,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,  		goto out;  	} -	err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); +	err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, +				  new_state, &ucmd);  out:  	mutex_unlock(&qp->mutex); @@ -3625,8 +3705,19 @@ static __be64 get_umr_update_pd_mask(void)  	return cpu_to_be64(result);  } -static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, -				struct ib_send_wr *wr, int atomic) +static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) +{ +	if ((mask & MLX5_MKEY_MASK_PAGE_SIZE && +	     MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) || +	    (mask & MLX5_MKEY_MASK_A && +	     MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))) +		return -EPERM; +	return 0; +} + +static int set_reg_umr_segment(struct mlx5_ib_dev *dev, +			       struct mlx5_wqe_umr_ctrl_seg *umr, +			       struct ib_send_wr *wr, int atomic)  {  	struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -3658,6 +3749,8 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,  	if (!wr->num_sge)  		umr->flags |= MLX5_UMR_INLINE; + +	return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask));  }  static u8 get_umr_flags(int acc) @@ -4480,7 +4573,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,  			}  			qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;  			ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey); -			set_reg_umr_segment(seg, wr, !!(MLX5_CAP_GEN(mdev, atomic))); +			err = set_reg_umr_segment(dev, seg, wr, !!(MLX5_CAP_GEN(mdev, atomic))); +			if (unlikely(err)) +				goto out;  			seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);  			size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;  			if (unlikely((seg == qend))) @@ -4718,26 +4813,14 @@ static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,  					struct mlx5_ib_sq *sq,  					u8 *sq_state)  { -	void *out; -	void *sqc; -	int inlen;  	int err; -	inlen = MLX5_ST_SZ_BYTES(query_sq_out); -	out = kvzalloc(inlen, GFP_KERNEL); -	if (!out) -		return -ENOMEM; - -	err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out); +	err = mlx5_core_query_sq_state(dev->mdev, sq->base.mqp.qpn, sq_state);  	if (err)  		goto out; - -	sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context); -	*sq_state = MLX5_GET(sqc, sqc, state);  	sq->state = *sq_state;  out: -	kvfree(out);  	return err;  }  | 

