diff options
Diffstat (limited to 'include/rdma')
-rw-r--r-- | include/rdma/ib.h | 2 | ||||
-rw-r--r-- | include/rdma/ib_cm.h | 66 | ||||
-rw-r--r-- | include/rdma/ib_mad.h | 40 | ||||
-rw-r--r-- | include/rdma/ib_umem.h | 10 | ||||
-rw-r--r-- | include/rdma/ib_umem_odp.h | 98 | ||||
-rw-r--r-- | include/rdma/ib_verbs.h | 216 | ||||
-rw-r--r-- | include/rdma/iba.h | 146 | ||||
-rw-r--r-- | include/rdma/ibta_vol1_c12.h | 213 | ||||
-rw-r--r-- | include/rdma/iw_portmap.h | 3 | ||||
-rw-r--r-- | include/rdma/opa_port_info.h | 2 | ||||
-rw-r--r-- | include/rdma/rdma_netlink.h | 10 | ||||
-rw-r--r-- | include/rdma/rdma_vt.h | 1 | ||||
-rw-r--r-- | include/rdma/rdmavt_cq.h | 1 | ||||
-rw-r--r-- | include/rdma/rdmavt_qp.h | 57 | ||||
-rw-r--r-- | include/rdma/restrack.h | 8 | ||||
-rw-r--r-- | include/rdma/signature.h | 2 | ||||
-rw-r--r-- | include/rdma/uverbs_named_ioctl.h | 6 | ||||
-rw-r--r-- | include/rdma/uverbs_std_types.h | 13 | ||||
-rw-r--r-- | include/rdma/uverbs_types.h | 34 |
19 files changed, 653 insertions, 275 deletions
diff --git a/include/rdma/ib.h b/include/rdma/ib.h index 4f385ec54f80..fe2fc9e91588 100644 --- a/include/rdma/ib.h +++ b/include/rdma/ib.h @@ -36,6 +36,8 @@ #include <linux/types.h> #include <linux/sched.h> #include <linux/cred.h> +#include <linux/uaccess.h> +#include <linux/fs.h> struct ib_addr { union { diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 49f4f75499b3..8ec482e391aa 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -1,38 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. */ -#if !defined(IB_CM_H) +#ifndef IB_CM_H #define IB_CM_H #include <rdma/ib_mad.h> @@ -526,21 +500,6 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, u8 private_data_len); /** - * ib_send_cm_lap - Sends a load alternate path request. - * @cm_id: Connection identifier associated with the load alternate path - * message. - * @alternate_path: A path record that identifies the alternate path to - * load. - * @private_data: Optional user-defined private data sent with the - * load alternate path message. - * @private_data_len: Size of the private data buffer, in bytes. - */ -int ib_send_cm_lap(struct ib_cm_id *cm_id, - struct sa_path_rec *alternate_path, - const void *private_data, - u8 private_data_len); - -/** * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning * to a specified QP state. * @cm_id: Communication identifier associated with the QP attributes to @@ -560,25 +519,6 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, struct ib_qp_attr *qp_attr, int *qp_attr_mask); -/** - * ib_send_cm_apr - Sends an alternate path response message in response to - * a load alternate path request. - * @cm_id: Connection identifier associated with the alternate path response. - * @status: Reply status sent with the alternate path response. - * @info: Optional additional information sent with the alternate path - * response. - * @info_length: Size of the additional information, in bytes. - * @private_data: Optional user-defined private data sent with the - * alternate path response message. - * @private_data_len: Size of the private data buffer, in bytes. - */ -int ib_send_cm_apr(struct ib_cm_id *cm_id, - enum ib_cm_apr_status status, - void *info, - u8 info_length, - const void *private_data, - u8 private_data_len); - struct ib_cm_sidr_req_param { struct sa_path_rec *path; const struct ib_gid_attr *sgid_attr; diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index eea946fcc819..4e62650e2127 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -815,46 +815,6 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf, u32 timeout_ms); /** - * ib_redirect_mad_qp - Registers a QP for MAD services. - * @qp: Reference to a QP that requires MAD services. - * @rmpp_version: If set, indicates that the client will send - * and receive MADs that contain the RMPP header for the given version. - * If set to 0, indicates that RMPP is not used by this client. - * @send_handler: The completion callback routine invoked after a send - * request has completed. - * @recv_handler: The completion callback routine invoked for a received - * MAD. - * @context: User specified context associated with the registration. - * - * Use of this call allows clients to use MAD services, such as RMPP, - * on user-owned QPs. After calling this routine, users may send - * MADs on the specified QP by calling ib_mad_post_send. - */ -struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, - u8 rmpp_version, - ib_mad_send_handler send_handler, - ib_mad_recv_handler recv_handler, - void *context); - -/** - * ib_process_mad_wc - Processes a work completion associated with a - * MAD sent or received on a redirected QP. - * @mad_agent: Specifies the registered MAD service using the redirected QP. - * @wc: References a work completion associated with a sent or received - * MAD segment. - * - * This routine is used to complete or continue processing on a MAD request. - * If the work completion is associated with a send operation, calling - * this routine is required to continue an RMPP transfer or to wait for a - * corresponding response, if it is a request. If the work completion is - * associated with a receive operation, calling this routine is required to - * process an inbound or outbound RMPP transfer, or to match a response MAD - * with its corresponding request. - */ -int ib_process_mad_wc(struct ib_mad_agent *mad_agent, - struct ib_wc *wc); - -/** * ib_create_send_mad - Allocate and initialize a data buffer and work request * for sending a MAD. * @mad_agent: Specifies the registered MAD service to associate with the MAD. diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 1052d0d62be7..e3518fd6b95b 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -42,7 +42,7 @@ struct ib_ucontext; struct ib_umem_odp; struct ib_umem { - struct ib_ucontext *context; + struct ib_device *ibdev; struct mm_struct *owning_mm; size_t length; unsigned long address; @@ -69,8 +69,8 @@ static inline size_t ib_umem_num_pages(struct ib_umem *umem) #ifdef CONFIG_INFINIBAND_USER_MEM -struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, - size_t size, int access, int dmasync); +struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, + size_t size, int access); void ib_umem_release(struct ib_umem *umem); int ib_umem_page_count(struct ib_umem *umem); int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, @@ -83,9 +83,9 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, #include <linux/err.h> -static inline struct ib_umem *ib_umem_get(struct ib_udata *udata, +static inline struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, size_t size, - int access, int dmasync) + int access) { return ERR_PTR(-EINVAL); } diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 479db5c98ff6..64314ff76612 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -35,16 +35,11 @@ #include <rdma/ib_umem.h> #include <rdma/ib_verbs.h> -#include <linux/interval_tree.h> - -struct umem_odp_node { - u64 __subtree_last; - struct rb_node rb; -}; struct ib_umem_odp { struct ib_umem umem; - struct ib_ucontext_per_mm *per_mm; + struct mmu_interval_notifier notifier; + struct pid *tgid; /* * An array of the pages included in the on-demand paging umem. @@ -67,17 +62,17 @@ struct ib_umem_odp { struct mutex umem_mutex; void *private; /* for the HW driver to use. */ - int notifiers_seq; - int notifiers_count; int npages; - /* Tree tracking */ - struct umem_odp_node interval_tree; + /* + * An implicit odp umem cannot be DMA mapped, has 0 length, and serves + * only as an anchor for the driver to hold onto the per_mm. FIXME: + * This should be removed and drivers should work with the per_mm + * directly. + */ + bool is_implicit_odp; - struct completion notifier_completion; - int dying; unsigned int page_shift; - struct work_struct work; }; static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) @@ -88,14 +83,13 @@ static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) /* Returns the first page of an ODP umem. */ static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp) { - return ALIGN_DOWN(umem_odp->umem.address, 1UL << umem_odp->page_shift); + return umem_odp->notifier.interval_tree.start; } /* Returns the address of the page after the last one of an ODP umem. */ static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp) { - return ALIGN(umem_odp->umem.address + umem_odp->umem.length, - 1UL << umem_odp->page_shift); + return umem_odp->notifier.interval_tree.last + 1; } static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp) @@ -119,26 +113,15 @@ static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -struct ib_ucontext_per_mm { - struct ib_ucontext *context; - struct mm_struct *mm; - struct pid *tgid; - bool active; - - struct rb_root_cached umem_tree; - /* Protects umem_tree */ - struct rw_semaphore umem_rwsem; - - struct mmu_notifier mn; - unsigned int odp_mrs_count; - - struct list_head ucontext_list; - struct rcu_head rcu; -}; - -int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access); -struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root_umem, - unsigned long addr, size_t size); +struct ib_umem_odp * +ib_umem_odp_get(struct ib_device *device, unsigned long addr, size_t size, + int access, const struct mmu_interval_notifier_ops *ops); +struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device, + int access); +struct ib_umem_odp * +ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, unsigned long addr, + size_t size, + const struct mmu_interval_notifier_ops *ops); void ib_umem_odp_release(struct ib_umem_odp *umem_odp); int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, @@ -148,46 +131,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, u64 bound); -typedef int (*umem_call_back)(struct ib_umem_odp *item, u64 start, u64 end, - void *cookie); -/* - * Call the callback on each ib_umem in the range. Returns the logical or of - * the return values of the functions called. - */ -int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, - u64 start, u64 end, - umem_call_back cb, - bool blockable, void *cookie); - -/* - * Find first region intersecting with address range. - * Return NULL if not found - */ -struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, - u64 addr, u64 length); - -static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp, - unsigned long mmu_seq) -{ - /* - * This code is strongly based on the KVM code from - * mmu_notifier_retry. Should be called with - * the relevant locks taken (umem_odp->umem_mutex - * and the ucontext umem_mutex semaphore locked for read). - */ - - if (unlikely(umem_odp->notifiers_count)) - return 1; - if (umem_odp->notifiers_seq != mmu_seq) - return 1; - return 0; -} - #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ -static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) +static inline struct ib_umem_odp * +ib_umem_odp_get(struct ib_device *device, unsigned long addr, size_t size, + int access, const struct mmu_interval_notifier_ops *ops) { - return -EINVAL; + return ERR_PTR(-EINVAL); } static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {} diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4f225175cb91..1f779fad3a1e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -72,11 +72,16 @@ #define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN struct ib_umem_odp; +struct ib_uqp_object; +struct ib_usrq_object; +struct ib_uwq_object; extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; extern struct workqueue_struct *ib_comp_unbound_wq; +struct ib_ucq_object; + __printf(3, 4) __cold void ibdev_printk(const char *level, const struct ib_device *ibdev, const char *format, ...); @@ -98,15 +103,54 @@ void ibdev_info(const struct ib_device *ibdev, const char *format, ...); #if defined(CONFIG_DYNAMIC_DEBUG) #define ibdev_dbg(__dev, format, args...) \ dynamic_ibdev_dbg(__dev, format, ##args) -#elif defined(DEBUG) -#define ibdev_dbg(__dev, format, args...) \ - ibdev_printk(KERN_DEBUG, __dev, format, ##args) #else __printf(2, 3) __cold static inline void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {} #endif +#define ibdev_level_ratelimited(ibdev_level, ibdev, fmt, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + if (__ratelimit(&_rs)) \ + ibdev_level(ibdev, fmt, ##__VA_ARGS__); \ +} while (0) + +#define ibdev_emerg_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_emerg, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_alert_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_alert, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_crit_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_crit, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_err_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_err, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_warn_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_warn, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_notice_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_notice, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_info_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__) + +#if defined(CONFIG_DYNAMIC_DEBUG) +/* descriptor check is first to prevent flooding with "callbacks suppressed" */ +#define ibdev_dbg_ratelimited(ibdev, fmt, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) \ + __dynamic_ibdev_dbg(&descriptor, ibdev, fmt, \ + ##__VA_ARGS__); \ +} while (0) +#else +__printf(2, 3) __cold +static inline +void ibdev_dbg_ratelimited(const struct ib_device *ibdev, const char *format, ...) {} +#endif + union ib_gid { u8 raw[16]; struct { @@ -327,7 +371,7 @@ struct ib_tm_caps { struct ib_cq_init_attr { unsigned int cqe; - int comp_vector; + u32 comp_vector; u32 flags; }; @@ -406,6 +450,8 @@ struct ib_device_attr { struct ib_tm_caps tm_caps; struct ib_cq_caps cq_caps; u64 max_dm_size; + /* Max entries for sgl for optimized performance per READ */ + u32 max_sgl_rd; }; enum ib_mtu { @@ -451,6 +497,16 @@ enum ib_port_state { IB_PORT_ACTIVE_DEFER = 5 }; +enum ib_port_phys_state { + IB_PORT_PHYS_STATE_SLEEP = 1, + IB_PORT_PHYS_STATE_POLLING = 2, + IB_PORT_PHYS_STATE_DISABLED = 3, + IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING = 4, + IB_PORT_PHYS_STATE_LINK_UP = 5, + IB_PORT_PHYS_STATE_LINK_ERROR_RECOVERY = 6, + IB_PORT_PHYS_STATE_PHY_TEST = 7, +}; + enum ib_port_width { IB_WIDTH_1X = 1, IB_WIDTH_2X = 16, @@ -1362,8 +1418,11 @@ enum ib_access_flags { IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED, IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND, IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB, + IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING, - IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1) + IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE, + IB_ACCESS_SUPPORTED = + ((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL, }; /* @@ -1417,16 +1476,12 @@ struct ib_ucontext { bool cleanup_retryable; - void (*invalidate_range)(struct ib_umem_odp *umem_odp, - unsigned long start, unsigned long end); - struct mutex per_mm_list_lock; - struct list_head per_mm_list; - struct ib_rdmacg_object cg_obj; /* * Implementation details of the RDMA core, don't use in drivers: */ struct rdma_restrack_entry res; + struct xarray mmap_xa; }; struct ib_uobject { @@ -1497,7 +1552,7 @@ enum ib_poll_context { struct ib_cq { struct ib_device *device; - struct ib_uobject *uobject; + struct ib_ucq_object *uobject; ib_comp_handler comp_handler; void (*event_handler)(struct ib_event *, void *); void *cq_context; @@ -1511,6 +1566,11 @@ struct ib_cq { }; struct workqueue_struct *comp_wq; struct dim *dim; + + /* updated only by trace points */ + ktime_t timestamp; + bool interrupt; + /* * Implementation details of the RDMA core, don't use in drivers: */ @@ -1520,7 +1580,7 @@ struct ib_cq { struct ib_srq { struct ib_device *device; struct ib_pd *pd; - struct ib_uobject *uobject; + struct ib_usrq_object *uobject; void (*event_handler)(struct ib_event *, void *); void *srq_context; enum ib_srq_type srq_type; @@ -1565,7 +1625,7 @@ enum ib_wq_state { struct ib_wq { struct ib_device *device; - struct ib_uobject *uobject; + struct ib_uwq_object *uobject; void *wq_context; void (*event_handler)(struct ib_event *, void *); struct ib_pd *pd; @@ -1681,7 +1741,7 @@ struct ib_qp { atomic_t usecnt; struct list_head open_list; struct ib_qp *real_qp; - struct ib_uobject *uobject; + struct ib_uqp_object *uobject; void (*event_handler)(struct ib_event *, void *); void *qp_context; /* sgid_attrs associated with the AV's */ @@ -2076,7 +2136,7 @@ struct ib_flow_action { atomic_t usecnt; }; -struct ib_mad_hdr; +struct ib_mad; struct ib_grh; enum ib_process_mad_flags { @@ -2100,11 +2160,6 @@ struct ib_port_cache { enum ib_port_state port_state; }; -struct ib_cache { - rwlock_t lock; - struct ib_event_handler event_handler; -}; - struct ib_port_immutable { int pkey_tbl_len; int gid_tbl_len; @@ -2174,6 +2229,11 @@ struct rdma_netdev_alloc_params { struct net_device *netdev, void *param); }; +struct ib_odp_counters { + atomic64_t faults; + atomic64_t invalidations; +}; + struct ib_counters { struct ib_device *device; struct ib_uobject *uobject; @@ -2207,6 +2267,21 @@ struct iw_cm_conn_param; #define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct +struct rdma_user_mmap_entry { + struct kref ref; + struct ib_ucontext *ucontext; + unsigned long start_pgoff; + size_t npages; + bool driver_removed; +}; + +/* Return the offset (in bytes) the user should pass to libc's mmap() */ +static inline u64 +rdma_user_mmap_get_offset(const struct rdma_user_mmap_entry *entry) +{ + return (u64)entry->start_pgoff << PAGE_SHIFT; +} + /** * struct ib_device_ops - InfiniBand device operations * This structure defines all the InfiniBand device operations, providers will @@ -2234,9 +2309,8 @@ struct ib_device_ops { int (*process_mad)(struct ib_device *device, int process_mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index); int (*query_device)(struct ib_device *device, struct ib_device_attr *device_attr, struct ib_udata *udata); @@ -2319,6 +2393,13 @@ struct ib_device_ops { struct ib_udata *udata); void (*dealloc_ucontext)(struct ib_ucontext *context); int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); + /** + * This will be called once refcount of an entry in mmap_xa reaches + * zero. The type of the memory that was mapped may differ between + * entries and is opaque to the rdma_user_mmap interface. + * Therefore needs to be implemented by the driver in mmap_free. + */ + void (*mmap_free)(struct rdma_user_mmap_entry *entry); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata); void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); @@ -2402,6 +2483,9 @@ struct ib_device_ops { struct ifla_vf_info *ivf); int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); + int (*get_vf_guid)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, int type); struct ib_wq *(*create_wq)(struct ib_pd *pd, @@ -2517,6 +2601,13 @@ struct ib_device_ops { */ int (*counter_update_stats)(struct rdma_counter *counter); + /** + * Allows rdma drivers to add their own restrack attributes + * dumped via 'rdma stat' iproute2 command. + */ + int (*fill_stat_entry)(struct sk_buff *msg, + struct rdma_restrack_entry *entry); + DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_cq); DECLARE_RDMA_OBJ_SIZE(ib_pd); @@ -2544,13 +2635,18 @@ struct ib_device { struct rcu_head rcu_head; struct list_head event_handler_list; - spinlock_t event_handler_lock; + /* Protects event_handler_list */ + struct rw_semaphore event_handler_rwsem; + + /* Protects QP's event_handler calls and open_qp list */ + spinlock_t qp_open_list_lock; struct rw_semaphore client_data_rwsem; struct xarray client_data; struct mutex unregistration_lock; - struct ib_cache cache; + /* Synchronize GID, Pkey cache entries, subnet prefix, LMC */ + rwlock_t cache_lock; /** * port_data is indexed by port number */ @@ -2743,18 +2839,26 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client, void ib_set_device_ops(struct ib_device *device, const struct ib_device_ops *ops); -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, pgprot_t prot); -#else -static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, - pgprot_t prot) -{ - return -EINVAL; -} -#endif + unsigned long pfn, unsigned long size, pgprot_t prot, + struct rdma_user_mmap_entry *entry); +int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length); +int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length, u32 min_pgoff, + u32 max_pgoff); + +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, + unsigned long pgoff); +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, + struct vm_area_struct *vma); +void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry); + +void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry); static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) { @@ -2851,7 +2955,7 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, void ib_register_event_handler(struct ib_event_handler *event_handler); void ib_unregister_event_handler(struct ib_event_handler *event_handler); -void ib_dispatch_event(struct ib_event *event); +void ib_dispatch_event(const struct ib_event *event); int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); @@ -3257,6 +3361,9 @@ int ib_get_vf_config(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *info); int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); +int ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); @@ -3713,6 +3820,25 @@ static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, NULL); } +struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, + int nr_cqe, enum ib_poll_context poll_ctx, + const char *caller); + +/** + * ib_alloc_cq_any: Allocate kernel CQ + * @dev: The IB device + * @private: Private data attached to the CQE + * @nr_cqe: Number of CQEs in the CQ + * @poll_ctx: Context used for polling the CQ + */ +static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev, + void *private, int nr_cqe, + enum ib_poll_context poll_ctx) +{ + return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx, + KBUILD_MODNAME); +} + /** * ib_free_cq_user - Free kernel/user CQ * @cq: The CQ to free @@ -3978,9 +4104,7 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, */ static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev) { - struct device_dma_parameters *p = dev->dma_device->dma_parms; - - return p ? p->max_segment_size : UINT_MAX; + return dma_get_max_seg_size(dev->dma_device); } /** @@ -4042,6 +4166,15 @@ static inline void ib_dma_free_coherent(struct ib_device *dev, dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); } +/* ib_reg_user_mr - register a memory region for virtual addresses from kernel + * space. This function should be called when 'current' is the owning MM. + */ +struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int mr_access_flags); + +/* ib_advise_mr - give an advice about an address range in a memory region */ +int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, u32 num_sge); /** * ib_dereg_mr_user - Deregisters a memory region and removes it from the * HCA translation table. @@ -4189,6 +4322,9 @@ static inline int ib_check_mr_access(int flags) !(flags & IB_ACCESS_LOCAL_WRITE)) return -EINVAL; + if (flags & ~IB_ACCESS_SUPPORTED) + return -EINVAL; + return 0; } diff --git a/include/rdma/iba.h b/include/rdma/iba.h new file mode 100644 index 000000000000..6a1115b02a0d --- /dev/null +++ b/include/rdma/iba.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. + */ +#ifndef _IBA_DEFS_H_ +#define _IBA_DEFS_H_ + +#include <linux/kernel.h> +#include <linux/bitfield.h> +#include <asm/unaligned.h> + +static inline u32 _iba_get8(const u8 *ptr) +{ + return *ptr; +} + +static inline void _iba_set8(u8 *ptr, u32 mask, u32 prep_value) +{ + *ptr = (*ptr & ~mask) | prep_value; +} + +static inline u16 _iba_get16(const __be16 *ptr) +{ + return be16_to_cpu(*ptr); +} + +static inline void _iba_set16(__be16 *ptr, u16 mask, u16 prep_value) +{ + *ptr = cpu_to_be16((be16_to_cpu(*ptr) & ~mask) | prep_value); +} + +static inline u32 _iba_get32(const __be32 *ptr) +{ + return be32_to_cpu(*ptr); +} + +static inline void _iba_set32(__be32 *ptr, u32 mask, u32 prep_value) +{ + *ptr = cpu_to_be32((be32_to_cpu(*ptr) & ~mask) | prep_value); +} + +static inline u64 _iba_get64(const __be64 *ptr) +{ + /* + * The mads are constructed so that 32 bit and smaller are naturally + * aligned, everything larger has a max alignment of 4 bytes. + */ + return be64_to_cpu(get_unaligned(ptr)); +} + +static inline void _iba_set64(__be64 *ptr, u64 mask, u64 prep_value) +{ + put_unaligned(cpu_to_be64((_iba_get64(ptr) & ~mask) | prep_value), ptr); +} + +#define _IBA_SET(field_struct, field_offset, field_mask, num_bits, ptr, value) \ + ({ \ + field_struct *_ptr = ptr; \ + _iba_set##num_bits((void *)_ptr + (field_offset), field_mask, \ + FIELD_PREP(field_mask, value)); \ + }) +#define IBA_SET(field, ptr, value) _IBA_SET(field, ptr, value) + +#define _IBA_GET_MEM_PTR(field_struct, field_offset, type, num_bits, ptr) \ + ({ \ + field_struct *_ptr = ptr; \ + (type *)((void *)_ptr + (field_offset)); \ + }) +#define IBA_GET_MEM_PTR(field, ptr) _IBA_GET_MEM_PTR(field, ptr) + +/* FIXME: A set should always set the entire field, meaning we should zero the trailing bytes */ +#define _IBA_SET_MEM(field_struct, field_offset, type, num_bits, ptr, in, \ + bytes) \ + ({ \ + const type *_in_ptr = in; \ + WARN_ON(bytes * 8 > num_bits); \ + if (in && bytes) \ + memcpy(_IBA_GET_MEM_PTR(field_struct, field_offset, \ + type, num_bits, ptr), \ + _in_ptr, bytes); \ + }) +#define IBA_SET_MEM(field, ptr, in, bytes) _IBA_SET_MEM(field, ptr, in, bytes) + +#define _IBA_GET(field_struct, field_offset, field_mask, num_bits, ptr) \ + ({ \ + const field_struct *_ptr = ptr; \ + (u##num_bits) FIELD_GET( \ + field_mask, _iba_get##num_bits((const void *)_ptr + \ + (field_offset))); \ + }) +#define IBA_GET(field, ptr) _IBA_GET(field, ptr) + +#define _IBA_GET_MEM(field_struct, field_offset, type, num_bits, ptr, out, \ + bytes) \ + ({ \ + type *_out_ptr = out; \ + WARN_ON(bytes * 8 > num_bits); \ + if (out && bytes) \ + memcpy(_out_ptr, \ + _IBA_GET_MEM_PTR(field_struct, field_offset, \ + type, num_bits, ptr), \ + bytes); \ + }) +#define IBA_GET_MEM(field, ptr, out, bytes) _IBA_GET_MEM(field, ptr, out, bytes) + +/* + * The generated list becomes the parameters to the macros, the order is: + * - struct this applies to + * - starting offset of the max + * - GENMASK or GENMASK_ULL in CPU order + * - The width of data the mask operations should work on, in bits + */ + +/* + * Extraction using a tabular description like table 106. bit_offset is from + * the Byte[Bit] notation. + */ +#define IBA_FIELD_BLOC(field_struct, byte_offset, bit_offset, num_bits) \ + field_struct, byte_offset, \ + GENMASK(7 - (bit_offset), 7 - (bit_offset) - (num_bits - 1)), \ + 8 +#define IBA_FIELD8_LOC(field_struct, byte_offset, num_bits) \ + IBA_FIELD_BLOC(field_struct, byte_offset, 0, num_bits) + +#define IBA_FIELD16_LOC(field_struct, byte_offset, num_bits) \ + field_struct, (byte_offset)&0xFFFE, \ + GENMASK(15 - (((byte_offset) % 2) * 8), \ + 15 - (((byte_offset) % 2) * 8) - (num_bits - 1)), \ + 16 + +#define IBA_FIELD32_LOC(field_struct, byte_offset, num_bits) \ + field_struct, (byte_offset)&0xFFFC, \ + GENMASK(31 - (((byte_offset) % 4) * 8), \ + 31 - (((byte_offset) % 4) * 8) - (num_bits - 1)), \ + 32 + +#define IBA_FIELD64_LOC(field_struct, byte_offset) \ + field_struct, byte_offset, GENMASK_ULL(63, 0), 64 +/* + * In IBTA spec, everything that is more than 64bits is multiple + * of bytes without leftover bits. + */ +#define IBA_FIELD_MLOC(field_struct, byte_offset, num_bits, type) \ + field_struct, byte_offset, type, num_bits + +#endif /* _IBA_DEFS_H_ */ diff --git a/include/rdma/ibta_vol1_c12.h b/include/rdma/ibta_vol1_c12.h new file mode 100644 index 000000000000..269904425d3f --- /dev/null +++ b/include/rdma/ibta_vol1_c12.h @@ -0,0 +1,213 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. + * + * This file is IBTA volume 1, chapter 12 declarations: + * CHAPTER 12: COMMUNICATION MANAGEMENT + */ +#ifndef _IBTA_VOL1_C12_H_ +#define _IBTA_VOL1_C12_H_ + +#include <rdma/iba.h> + +#define CM_FIELD_BLOC(field_struct, byte_offset, bits_offset, width) \ + IBA_FIELD_BLOC(field_struct, \ + (byte_offset + sizeof(struct ib_mad_hdr)), bits_offset, \ + width) +#define CM_FIELD8_LOC(field_struct, byte_offset, width) \ + IBA_FIELD8_LOC(field_struct, \ + (byte_offset + sizeof(struct ib_mad_hdr)), width) +#define CM_FIELD16_LOC(field_struct, byte_offset, width) \ + IBA_FIELD16_LOC(field_struct, \ + (byte_offset + sizeof(struct ib_mad_hdr)), width) +#define CM_FIELD32_LOC(field_struct, byte_offset, width) \ + IBA_FIELD32_LOC(field_struct, \ + (byte_offset + sizeof(struct ib_mad_hdr)), width) +#define CM_FIELD64_LOC(field_struct, byte_offset) \ + IBA_FIELD64_LOC(field_struct, (byte_offset + sizeof(struct ib_mad_hdr))) +#define CM_FIELD_MLOC(field_struct, byte_offset, width, type) \ + IBA_FIELD_MLOC(field_struct, \ + (byte_offset + sizeof(struct ib_mad_hdr)), width, type) +#define CM_STRUCT(field_struct, total_len) \ + field_struct \ + { \ + struct ib_mad_hdr hdr; \ + u32 _data[(total_len) / 32 + \ + BUILD_BUG_ON_ZERO((total_len) % 32 != 0)]; \ + } + +/* Table 106 REQ Message Contents */ +#define CM_REQ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_req_msg, 0, 32) +#define CM_REQ_SERVICE_ID CM_FIELD64_LOC(struct cm_req_msg, 8) +#define CM_REQ_LOCAL_CA_GUID CM_FIELD64_LOC(struct cm_req_msg, 16) +#define CM_REQ_LOCAL_Q_KEY CM_FIELD32_LOC(struct cm_req_msg, 28, 32) +#define CM_REQ_LOCAL_QPN CM_FIELD32_LOC(struct cm_req_msg, 32, 24) +#define CM_REQ_RESPONDER_RESOURCES CM_FIELD8_LOC(struct cm_req_msg, 35, 8) +#define CM_REQ_LOCAL_EECN CM_FIELD32_LOC(struct cm_req_msg, 36, 24) +#define CM_REQ_INITIATOR_DEPTH CM_FIELD8_LOC(struct cm_req_msg, 39, 8) +#define CM_REQ_REMOTE_EECN CM_FIELD32_LOC(struct cm_req_msg, 40, 24) +#define CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT \ + CM_FIELD8_LOC(struct cm_req_msg, 43, 5) +#define CM_REQ_TRANSPORT_SERVICE_TYPE CM_FIELD_BLOC(struct cm_req_msg, 43, 5, 2) +#define CM_REQ_END_TO_END_FLOW_CONTROL \ + CM_FIELD_BLOC(struct cm_req_msg, 43, 7, 1) +#define CM_REQ_STARTING_PSN CM_FIELD32_LOC(struct cm_req_msg, 44, 24) +#define CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT CM_FIELD8_LOC(struct cm_req_msg, 47, 5) +#define CM_REQ_RETRY_COUNT CM_FIELD_BLOC(struct cm_req_msg, 47, 5, 3) +#define CM_REQ_PARTITION_KEY CM_FIELD16_LOC(struct cm_req_msg, 48, 16) +#define CM_REQ_PATH_PACKET_PAYLOAD_MTU CM_FIELD8_LOC(struct cm_req_msg, 50, 4) +#define CM_REQ_RDC_EXISTS CM_FIELD_BLOC(struct cm_req_msg, 50, 4, 1) +#define CM_REQ_RNR_RETRY_COUNT CM_FIELD_BLOC(struct cm_req_msg, 50, 5, 3) +#define CM_REQ_MAX_CM_RETRIES CM_FIELD8_LOC(struct cm_req_msg, 51, 4) +#define CM_REQ_SRQ CM_FIELD_BLOC(struct cm_req_msg, 51, 4, 1) +#define CM_REQ_EXTENDED_TRANSPORT_TYPE \ + CM_FIELD_BLOC(struct cm_req_msg, 51, 5, 3) +#define CM_REQ_PRIMARY_LOCAL_PORT_LID CM_FIELD16_LOC(struct cm_req_msg, 52, 16) +#define CM_REQ_PRIMARY_REMOTE_PORT_LID CM_FIELD16_LOC(struct cm_req_msg, 54, 16) +#define CM_REQ_PRIMARY_LOCAL_PORT_GID \ + CM_FIELD_MLOC(struct cm_req_msg, 56, 128, union ib_gid) +#define CM_REQ_PRIMARY_REMOTE_PORT_GID \ + CM_FIELD_MLOC(struct cm_req_msg, 72, 128, union ib_gid) +#define CM_REQ_PRIMARY_FLOW_LABEL CM_FIELD32_LOC(struct cm_req_msg, 88, 20) +#define CM_REQ_PRIMARY_PACKET_RATE CM_FIELD_BLOC(struct cm_req_msg, 91, 2, 6) +#define CM_REQ_PRIMARY_TRAFFIC_CLASS CM_FIELD8_LOC(struct cm_req_msg, 92, 8) +#define CM_REQ_PRIMARY_HOP_LIMIT CM_FIELD8_LOC(struct cm_req_msg, 93, 8) +#define CM_REQ_PRIMARY_SL CM_FIELD8_LOC(struct cm_req_msg, 94, 4) +#define CM_REQ_PRIMARY_SUBNET_LOCAL CM_FIELD_BLOC(struct cm_req_msg, 94, 4, 1) +#define CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT CM_FIELD8_LOC(struct cm_req_msg, 95, 5) +#define CM_REQ_ALTERNATE_LOCAL_PORT_LID \ + CM_FIELD16_LOC(struct cm_req_msg, 96, 16) +#define CM_REQ_ALTERNATE_REMOTE_PORT_LID \ + CM_FIELD16_LOC(struct cm_req_msg, 98, 16) +#define CM_REQ_ALTERNATE_LOCAL_PORT_GID \ + CM_FIELD_MLOC(struct cm_req_msg, 100, 128, union ib_gid) +#define CM_REQ_ALTERNATE_REMOTE_PORT_GID \ + CM_FIELD_MLOC(struct cm_req_msg, 116, 128, union ib_gid) +#define CM_REQ_ALTERNATE_FLOW_LABEL CM_FIELD32_LOC(struct cm_req_msg, 132, 20) +#define CM_REQ_ALTERNATE_PACKET_RATE CM_FIELD_BLOC(struct cm_req_msg, 135, 2, 6) +#define CM_REQ_ALTERNATE_TRAFFIC_CLASS CM_FIELD8_LOC(struct cm_req_msg, 136, 8) +#define CM_REQ_ALTERNATE_HOP_LIMIT CM_FIELD8_LOC(struct cm_req_msg, 137, 8) +#define CM_REQ_ALTERNATE_SL CM_FIELD8_LOC(struct cm_req_msg, 138, 4) +#define CM_REQ_ALTERNATE_SUBNET_LOCAL \ + CM_FIELD_BLOC(struct cm_req_msg, 138, 4, 1) +#define CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT \ + CM_FIELD8_LOC(struct cm_req_msg, 139, 5) +#define CM_REQ_SAP_SUPPORTED CM_FIELD_BLOC(struct cm_req_msg, 139, 5, 1) +#define CM_REQ_PRIVATE_DATA CM_FIELD_MLOC(struct cm_req_msg, 140, 736, void) +CM_STRUCT(struct cm_req_msg, 140 * 8 + 736); + +/* Table 107 MRA Message Contents */ +#define CM_MRA_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_mra_msg, 0, 32) +#define CM_MRA_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_mra_msg, 4, 32) +#define CM_MRA_MESSAGE_MRAED CM_FIELD8_LOC(struct cm_mra_msg, 8, 2) +#define CM_MRA_SERVICE_TIMEOUT CM_FIELD8_LOC(struct cm_mra_msg, 9, 5) +#define CM_MRA_PRIVATE_DATA CM_FIELD_MLOC(struct cm_mra_msg, 10, 1776, void) +CM_STRUCT(struct cm_mra_msg, 10 * 8 + 1776); + +/* Table 108 REJ Message Contents */ +#define CM_REJ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_rej_msg, 0, 32) +#define CM_REJ_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rej_msg, 4, 32) +#define CM_REJ_MESSAGE_REJECTED CM_FIELD8_LOC(struct cm_rej_msg, 8, 2) +#define CM_REJ_REJECTED_INFO_LENGTH CM_FIELD8_LOC(struct cm_rej_msg, 9, 7) +#define CM_REJ_REASON CM_FIELD16_LOC(struct cm_rej_msg, 10, 16) +#define CM_REJ_ARI CM_FIELD_MLOC(struct cm_rej_msg, 12, 576, void) +#define CM_REJ_PRIVATE_DATA CM_FIELD_MLOC(struct cm_rej_msg, 84, 1184, void) +CM_STRUCT(struct cm_rej_msg, 84 * 8 + 1184); + +/* Table 110 REP Message Contents */ +#define CM_REP_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_rep_msg, 0, 32) +#define CM_REP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rep_msg, 4, 32) +#define CM_REP_LOCAL_Q_KEY CM_FIELD32_LOC(struct cm_rep_msg, 8, 32) +#define CM_REP_LOCAL_QPN CM_FIELD32_LOC(struct cm_rep_msg, 12, 24) +#define CM_REP_LOCAL_EE_CONTEXT_NUMBER CM_FIELD32_LOC(struct cm_rep_msg, 16, 24) +#define CM_REP_STARTING_PSN CM_FIELD32_LOC(struct cm_rep_msg, 20, 24) +#define CM_REP_RESPONDER_RESOURCES CM_FIELD8_LOC(struct cm_rep_msg, 24, 8) +#define CM_REP_INITIATOR_DEPTH CM_FIELD8_LOC(struct cm_rep_msg, 25, 8) +#define CM_REP_TARGET_ACK_DELAY CM_FIELD8_LOC(struct cm_rep_msg, 26, 5) +#define CM_REP_FAILOVER_ACCEPTED CM_FIELD_BLOC(struct cm_rep_msg, 26, 5, 2) +#define CM_REP_END_TO_END_FLOW_CONTROL \ + CM_FIELD_BLOC(struct cm_rep_msg, 26, 7, 1) +#define CM_REP_RNR_RETRY_COUNT CM_FIELD8_LOC(struct cm_rep_msg, 27, 3) +#define CM_REP_SRQ CM_FIELD_BLOC(struct cm_rep_msg, 27, 3, 1) +#define CM_REP_LOCAL_CA_GUID CM_FIELD64_LOC(struct cm_rep_msg, 28) +#define CM_REP_PRIVATE_DATA CM_FIELD_MLOC(struct cm_rep_msg, 36, 1568, void) +CM_STRUCT(struct cm_rep_msg, 36 * 8 + 1568); + +/* Table 111 RTU Message Contents */ +#define CM_RTU_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_rtu_msg, 0, 32) +#define CM_RTU_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rtu_msg, 4, 32) +#define CM_RTU_PRIVATE_DATA CM_FIELD_MLOC(struct cm_rtu_msg, 8, 1792, void) +CM_STRUCT(struct cm_rtu_msg, 8 * 8 + 1792); + +/* Table 112 DREQ Message Contents */ +#define CM_DREQ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_dreq_msg, 0, 32) +#define CM_DREQ_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_dreq_msg, 4, 32) +#define CM_DREQ_REMOTE_QPN_EECN CM_FIELD32_LOC(struct cm_dreq_msg, 8, 24) +#define CM_DREQ_PRIVATE_DATA CM_FIELD_MLOC(struct cm_dreq_msg, 12, 1760, void) +CM_STRUCT(struct cm_dreq_msg, 12 * 8 + 1760); + +/* Table 113 DREP Message Contents */ +#define CM_DREP_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_drep_msg, 0, 32) +#define CM_DREP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_drep_msg, 4, 32) +#define CM_DREP_PRIVATE_DATA CM_FIELD_MLOC(struct cm_drep_msg, 8, 1792, void) +CM_STRUCT(struct cm_drep_msg, 8 * 8 + 1792); + +/* Table 115 LAP Message Contents */ +#define CM_LAP_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_lap_msg, 0, 32) +#define CM_LAP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_lap_msg, 4, 32) +#define CM_LAP_REMOTE_QPN_EECN CM_FIELD32_LOC(struct cm_lap_msg, 12, 24) +#define CM_LAP_REMOTE_CM_RESPONSE_TIMEOUT \ + CM_FIELD8_LOC(struct cm_lap_msg, 15, 5) +#define CM_LAP_ALTERNATE_LOCAL_PORT_LID \ + CM_FIELD16_LOC(struct cm_lap_msg, 20, 16) +#define CM_LAP_ALTERNATE_REMOTE_PORT_LID \ + CM_FIELD16_LOC(struct cm_lap_msg, 22, 16) +#define CM_LAP_ALTERNATE_LOCAL_PORT_GID \ + CM_FIELD_MLOC(struct cm_lap_msg, 24, 128, union ib_gid) +#define CM_LAP_ALTERNATE_REMOTE_PORT_GID \ + CM_FIELD_MLOC(struct cm_lap_msg, 40, 128, union ib_gid) +#define CM_LAP_ALTERNATE_FLOW_LABEL CM_FIELD32_LOC(struct cm_lap_msg, 56, 20) +#define CM_LAP_ALTERNATE_TRAFFIC_CLASS CM_FIELD8_LOC(struct cm_lap_msg, 59, 8) +#define CM_LAP_ALTERNATE_HOP_LIMIT CM_FIELD8_LOC(struct cm_lap_msg, 60, 8) +#define CM_LAP_ALTERNATE_PACKET_RATE CM_FIELD_BLOC(struct cm_lap_msg, 61, 2, 6) +#define CM_LAP_ALTERNATE_SL CM_FIELD8_LOC(struct cm_lap_msg, 62, 4) +#define CM_LAP_ALTERNATE_SUBNET_LOCAL CM_FIELD_BLOC(struct cm_lap_msg, 62, 4, 1) +#define CM_LAP_ALTERNATE_LOCAL_ACK_TIMEOUT \ + CM_FIELD8_LOC(struct cm_lap_msg, 63, 5) +#define CM_LAP_PRIVATE_DATA CM_FIELD_MLOC(struct cm_lap_msg, 64, 1344, void) +CM_STRUCT(struct cm_lap_msg, 64 * 8 + 1344); + +/* Table 116 APR Message Contents */ +#define CM_APR_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_apr_msg, 0, 32) +#define CM_APR_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_apr_msg, 4, 32) +#define CM_APR_ADDITIONAL_INFORMATION_LENGTH \ + CM_FIELD8_LOC(struct cm_apr_msg, 8, 8) +#define CM_APR_AR_STATUS CM_FIELD8_LOC(struct cm_apr_msg, 9, 8) +#define CM_APR_ADDITIONAL_INFORMATION \ + CM_FIELD_MLOC(struct cm_apr_msg, 12, 576, void) +#define CM_APR_PRIVATE_DATA CM_FIELD_MLOC(struct cm_apr_msg, 84, 1184, void) +CM_STRUCT(struct cm_apr_msg, 84 * 8 + 1184); + +/* Table 119 SIDR_REQ Message Contents */ +#define CM_SIDR_REQ_REQUESTID CM_FIELD32_LOC(struct cm_sidr_req_msg, 0, 32) +#define CM_SIDR_REQ_PARTITION_KEY CM_FIELD16_LOC(struct cm_sidr_req_msg, 4, 16) +#define CM_SIDR_REQ_SERVICEID CM_FIELD64_LOC(struct cm_sidr_req_msg, 8) +#define CM_SIDR_REQ_PRIVATE_DATA \ + CM_FIELD_MLOC(struct cm_sidr_req_msg, 16, 1728, void) +CM_STRUCT(struct cm_sidr_req_msg, 16 * 8 + 1728); + +/* Table 120 SIDR_REP Message Contents */ +#define CM_SIDR_REP_REQUESTID CM_FIELD32_LOC(struct cm_sidr_rep_msg, 0, 32) +#define CM_SIDR_REP_STATUS CM_FIELD8_LOC(struct cm_sidr_rep_msg, 4, 8) +#define CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH \ + CM_FIELD8_LOC(struct cm_sidr_rep_msg, 5, 8) +#define CM_SIDR_REP_QPN CM_FIELD32_LOC(struct cm_sidr_rep_msg, 8, 24) +#define CM_SIDR_REP_SERVICEID CM_FIELD64_LOC(struct cm_sidr_rep_msg, 12) +#define CM_SIDR_REP_Q_KEY CM_FIELD32_LOC(struct cm_sidr_rep_msg, 20, 32) +#define CM_SIDR_REP_ADDITIONAL_INFORMATION \ + CM_FIELD_MLOC(struct cm_sidr_rep_msg, 24, 576, void) +#define CM_SIDR_REP_PRIVATE_DATA \ + CM_FIELD_MLOC(struct cm_sidr_rep_msg, 96, 1088, void) +CM_STRUCT(struct cm_sidr_rep_msg, 96 * 8 + 1088); + +#endif /* _IBTA_VOL1_C12_H_ */ diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h index b9fee7feeeb5..c89535047c42 100644 --- a/include/rdma/iw_portmap.h +++ b/include/rdma/iw_portmap.h @@ -33,6 +33,9 @@ #ifndef _IW_PORTMAP_H #define _IW_PORTMAP_H +#include <linux/socket.h> +#include <linux/netlink.h> + #define IWPM_ULIBNAME_SIZE 32 #define IWPM_DEVNAME_SIZE 32 #define IWPM_IFNAME_SIZE 16 diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index 7147a9263011..bdbfe25d3854 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -33,6 +33,8 @@ #if !defined(OPA_PORT_INFO_H) #define OPA_PORT_INFO_H +#include <rdma/opa_smi.h> + #define OPA_PORT_LINK_MODE_NOP 0 /* No change */ #define OPA_PORT_LINK_MODE_OPA 4 /* Port mode is OPA */ diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 6631624e4d7c..ab22759de7ea 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -76,28 +76,32 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, /** * Send the supplied skb to a specific userspace PID. + * @net: Net namespace in which to send the skb * @skb: The netlink skb * @pid: Userspace netlink process ID * Returns 0 on success or a negative error code. */ -int rdma_nl_unicast(struct sk_buff *skb, u32 pid); +int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid); /** * Send, with wait/1 retry, the supplied skb to a specific userspace PID. + * @net: Net namespace in which to send the skb * @skb: The netlink skb * @pid: Userspace netlink process ID * Returns 0 on success or a negative error code. */ -int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid); +int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid); /** * Send the supplied skb to a netlink group. + * @net: Net namespace in which to send the skb * @skb: The netlink skb * @group: Netlink group ID * @flags: allocation flags * Returns 0 on success or a negative error code. */ -int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags); +int rdma_nl_multicast(struct net *net, struct sk_buff *skb, + unsigned int group, gfp_t flags); /** * Check if there are any listeners to the netlink group diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 525848e227dc..ac5a9430abb6 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -116,6 +116,7 @@ struct rvt_ibport { u64 n_unaligned; u64 n_rc_dupreq; u64 n_rc_seqnak; + u64 n_rc_crwaits; u16 pkey_violations; u16 qkey_violations; u16 mkey_violations; diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h index 04c519ef6d71..574eb7278f46 100644 --- a/include/rdma/rdmavt_cq.h +++ b/include/rdma/rdmavt_cq.h @@ -53,6 +53,7 @@ #include <linux/kthread.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_verbs.h> /* * Define an ib_cq_notify value that is not valid so we know when CQ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index e06c77d76463..0d5c70e2d8ab 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -640,34 +640,14 @@ static inline int rvt_cmp_msn(u32 a, u32 b) return (((int)a) - ((int)b)) << 8; } -/** - * rvt_compute_aeth - compute the AETH (syndrome + MSN) - * @qp: the queue pair to compute the AETH for - * - * Returns the AETH. - */ __be32 rvt_compute_aeth(struct rvt_qp *qp); -/** - * rvt_get_credit - flush the send work queue of a QP - * @qp: the qp who's send work queue to flush - * @aeth: the Acknowledge Extended Transport Header - * - * The QP s_lock should be held. - */ void rvt_get_credit(struct rvt_qp *qp, u32 aeth); -/** - * rvt_restart_sge - rewind the sge state for a wqe - * @ss: the sge state pointer - * @wqe: the wqe to rewind - * @len: the data length from the start of the wqe in bytes - * - * Returns the remaining data length. - */ u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len); /** + * rvt_div_round_up_mtu - round up divide * @qp - the qp pair * @len - the length * @@ -973,6 +953,41 @@ static inline void rvt_free_rq(struct rvt_rq *rq) rq->wq = NULL; } +/** + * rvt_to_iport - Get the ibport pointer + * @qp: the qp pointer + * + * This function returns the ibport pointer from the qp pointer. + */ +static inline struct rvt_ibport *rvt_to_iport(struct rvt_qp *qp) +{ + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + + return rdi->ports[qp->port_num - 1]; +} + +/** + * rvt_rc_credit_avail - Check if there are enough RC credits for the request + * @qp: the qp + * @wqe: the request + * + * This function returns false when there are not enough credits for the given + * request and true otherwise. + */ +static inline bool rvt_rc_credit_avail(struct rvt_qp *qp, struct rvt_swqe *wqe) +{ + lockdep_assert_held(&qp->s_lock); + if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && + rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { + struct rvt_ibport *rvp = rvt_to_iport(qp); + + qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; + rvp->n_rc_crwaits++; + return false; + } + return true; +} + struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi, u64 v, void (*cb)(struct rvt_qp *qp, u64 v)); diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index b0fc6b26bdf5..7682d1bcf789 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -105,8 +105,7 @@ struct rdma_restrack_entry { }; int rdma_restrack_count(struct ib_device *dev, - enum rdma_restrack_type type, - struct pid_namespace *ns); + enum rdma_restrack_type type); void rdma_restrack_kadd(struct rdma_restrack_entry *res); void rdma_restrack_uadd(struct rdma_restrack_entry *res); @@ -157,6 +156,11 @@ int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name, int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value); int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value); +int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name, + const char *str); +int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, + u64 value); + struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, u32 id); diff --git a/include/rdma/signature.h b/include/rdma/signature.h index f24cc2a1d3c5..d16b0fcc8344 100644 --- a/include/rdma/signature.h +++ b/include/rdma/signature.h @@ -6,6 +6,8 @@ #ifndef _RDMA_SIGNATURE_H_ #define _RDMA_SIGNATURE_H_ +#include <linux/types.h> + enum ib_signature_prot_cap { IB_PROT_T10DIF_TYPE_1 = 1, IB_PROT_T10DIF_TYPE_2 = 1 << 1, diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h index 3447bfe356d6..6ae6cf8e4c2e 100644 --- a/include/rdma/uverbs_named_ioctl.h +++ b/include/rdma/uverbs_named_ioctl.h @@ -76,7 +76,7 @@ #define DECLARE_UVERBS_NAMED_OBJECT(_object_id, _type_attrs, ...) \ static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ _object_id)[] = { __VA_ARGS__ }; \ - const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ + static const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ .id = _object_id, \ .type_attrs = &_type_attrs, \ .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ @@ -88,10 +88,10 @@ * identify all uapi methods with a (object,method) tuple. However, they have * no type pointer. */ -#define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...) \ +#define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...) \ static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ _object_id)[] = { __VA_ARGS__ }; \ - const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ + static const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ .id = _object_id, \ .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ .methods = &UVERBS_OBJECT_METHODS(_object_id) \ diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 05eabfd5d0d3..1b28ce1aba07 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -104,16 +104,6 @@ static inline void uobj_put_write(struct ib_uobject *uobj) rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); } -static inline int __must_check -uobj_alloc_commit(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) -{ - int ret = rdma_alloc_commit_uobject(uobj, attrs); - - if (ret) - return ret; - return 0; -} - static inline void uobj_alloc_abort(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) { @@ -124,8 +114,7 @@ static inline struct ib_uobject * __uobj_alloc(const struct uverbs_api_object *obj, struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev) { - struct ib_uobject *uobj = - rdma_alloc_begin_uobject(obj, attrs->ufile, attrs); + struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs); if (!IS_ERR(uobj)) *ib_dev = attrs->context->device; diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index d57a5ba00c74..f1cbdae67250 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -83,9 +83,9 @@ enum rdma_lookup_mode { */ struct uverbs_obj_type_class { struct ib_uobject *(*alloc_begin)(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile); + struct uverbs_attr_bundle *attrs); /* This consumes the kref on uobj */ - int (*alloc_commit)(struct ib_uobject *uobj); + void (*alloc_commit)(struct ib_uobject *uobj); /* This does not consume the kref on uobj */ void (*alloc_abort)(struct ib_uobject *uobj); @@ -98,7 +98,6 @@ struct uverbs_obj_type_class { enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs); void (*remove_handle)(struct ib_uobject *uobj); - u8 needs_kfree_rcu; }; struct uverbs_obj_type { @@ -138,23 +137,34 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, void rdma_lookup_put_uobject(struct ib_uobject *uobj, enum rdma_lookup_mode mode); struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile, struct uverbs_attr_bundle *attrs); void rdma_alloc_abort_uobject(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); -int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj, - struct uverbs_attr_bundle *attrs); +void rdma_alloc_commit_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs); + +/* + * uverbs_uobject_get is called in order to increase the reference count on + * an uobject. This is useful when a handler wants to keep the uobject's memory + * alive, regardless if this uobject is still alive in the context's objects + * repository. Objects are put via uverbs_uobject_put. + */ +static inline void uverbs_uobject_get(struct ib_uobject *uobject) +{ + kref_get(&uobject->ref); +} +void uverbs_uobject_put(struct ib_uobject *uobject); struct uverbs_obj_fd_type { /* * In fd based objects, uverbs_obj_type_ops points to generic * fd operations. In order to specialize the underlying types (e.g. * completion_channel), we use fops, name and flags for fd creation. - * context_closed is called when the context is closed either when - * the driver is removed or the process terminated. + * destroy_object is called when the uobject is to be destroyed, + * because the driver is removed or the FD is closed. */ struct uverbs_obj_type type; - int (*context_closed)(struct ib_uobject *uobj, + int (*destroy_object)(struct ib_uobject *uobj, enum rdma_remove_reason why); const struct file_operations *fops; const char *name; @@ -163,11 +173,11 @@ struct uverbs_obj_fd_type { extern const struct uverbs_obj_type_class uverbs_idr_class; extern const struct uverbs_obj_type_class uverbs_fd_class; -void uverbs_close_fd(struct file *f); +int uverbs_uobject_fd_release(struct inode *inode, struct file *filp); #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ sizeof(char)) -#define UVERBS_TYPE_ALLOC_FD(_obj_size, _context_closed, _fops, _name, _flags)\ +#define UVERBS_TYPE_ALLOC_FD(_obj_size, _destroy_object, _fops, _name, _flags) \ ((&((const struct uverbs_obj_fd_type) \ {.type = { \ .type_class = &uverbs_fd_class, \ @@ -175,7 +185,7 @@ void uverbs_close_fd(struct file *f); UVERBS_BUILD_BUG_ON((_obj_size) < \ sizeof(struct ib_uobject)), \ }, \ - .context_closed = _context_closed, \ + .destroy_object = _destroy_object, \ .fops = _fops, \ .name = _name, \ .flags = _flags}))->type) |