diff options
Diffstat (limited to 'include/rdma/ib_verbs.h')
-rw-r--r-- | include/rdma/ib_verbs.h | 216 |
1 files changed, 176 insertions, 40 deletions
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4f225175cb91..1f779fad3a1e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -72,11 +72,16 @@ #define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN struct ib_umem_odp; +struct ib_uqp_object; +struct ib_usrq_object; +struct ib_uwq_object; extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; extern struct workqueue_struct *ib_comp_unbound_wq; +struct ib_ucq_object; + __printf(3, 4) __cold void ibdev_printk(const char *level, const struct ib_device *ibdev, const char *format, ...); @@ -98,15 +103,54 @@ void ibdev_info(const struct ib_device *ibdev, const char *format, ...); #if defined(CONFIG_DYNAMIC_DEBUG) #define ibdev_dbg(__dev, format, args...) \ dynamic_ibdev_dbg(__dev, format, ##args) -#elif defined(DEBUG) -#define ibdev_dbg(__dev, format, args...) \ - ibdev_printk(KERN_DEBUG, __dev, format, ##args) #else __printf(2, 3) __cold static inline void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {} #endif +#define ibdev_level_ratelimited(ibdev_level, ibdev, fmt, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + if (__ratelimit(&_rs)) \ + ibdev_level(ibdev, fmt, ##__VA_ARGS__); \ +} while (0) + +#define ibdev_emerg_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_emerg, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_alert_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_alert, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_crit_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_crit, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_err_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_err, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_warn_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_warn, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_notice_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_notice, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_info_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__) + +#if defined(CONFIG_DYNAMIC_DEBUG) +/* descriptor check is first to prevent flooding with "callbacks suppressed" */ +#define ibdev_dbg_ratelimited(ibdev, fmt, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) \ + __dynamic_ibdev_dbg(&descriptor, ibdev, fmt, \ + ##__VA_ARGS__); \ +} while (0) +#else +__printf(2, 3) __cold +static inline +void ibdev_dbg_ratelimited(const struct ib_device *ibdev, const char *format, ...) {} +#endif + union ib_gid { u8 raw[16]; struct { @@ -327,7 +371,7 @@ struct ib_tm_caps { struct ib_cq_init_attr { unsigned int cqe; - int comp_vector; + u32 comp_vector; u32 flags; }; @@ -406,6 +450,8 @@ struct ib_device_attr { struct ib_tm_caps tm_caps; struct ib_cq_caps cq_caps; u64 max_dm_size; + /* Max entries for sgl for optimized performance per READ */ + u32 max_sgl_rd; }; enum ib_mtu { @@ -451,6 +497,16 @@ enum ib_port_state { IB_PORT_ACTIVE_DEFER = 5 }; +enum ib_port_phys_state { + IB_PORT_PHYS_STATE_SLEEP = 1, + IB_PORT_PHYS_STATE_POLLING = 2, + IB_PORT_PHYS_STATE_DISABLED = 3, + IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING = 4, + IB_PORT_PHYS_STATE_LINK_UP = 5, + IB_PORT_PHYS_STATE_LINK_ERROR_RECOVERY = 6, + IB_PORT_PHYS_STATE_PHY_TEST = 7, +}; + enum ib_port_width { IB_WIDTH_1X = 1, IB_WIDTH_2X = 16, @@ -1362,8 +1418,11 @@ enum ib_access_flags { IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED, IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND, IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB, + IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING, - IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1) + IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE, + IB_ACCESS_SUPPORTED = + ((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL, }; /* @@ -1417,16 +1476,12 @@ struct ib_ucontext { bool cleanup_retryable; - void (*invalidate_range)(struct ib_umem_odp *umem_odp, - unsigned long start, unsigned long end); - struct mutex per_mm_list_lock; - struct list_head per_mm_list; - struct ib_rdmacg_object cg_obj; /* * Implementation details of the RDMA core, don't use in drivers: */ struct rdma_restrack_entry res; + struct xarray mmap_xa; }; struct ib_uobject { @@ -1497,7 +1552,7 @@ enum ib_poll_context { struct ib_cq { struct ib_device *device; - struct ib_uobject *uobject; + struct ib_ucq_object *uobject; ib_comp_handler comp_handler; void (*event_handler)(struct ib_event *, void *); void *cq_context; @@ -1511,6 +1566,11 @@ struct ib_cq { }; struct workqueue_struct *comp_wq; struct dim *dim; + + /* updated only by trace points */ + ktime_t timestamp; + bool interrupt; + /* * Implementation details of the RDMA core, don't use in drivers: */ @@ -1520,7 +1580,7 @@ struct ib_cq { struct ib_srq { struct ib_device *device; struct ib_pd *pd; - struct ib_uobject *uobject; + struct ib_usrq_object *uobject; void (*event_handler)(struct ib_event *, void *); void *srq_context; enum ib_srq_type srq_type; @@ -1565,7 +1625,7 @@ enum ib_wq_state { struct ib_wq { struct ib_device *device; - struct ib_uobject *uobject; + struct ib_uwq_object *uobject; void *wq_context; void (*event_handler)(struct ib_event *, void *); struct ib_pd *pd; @@ -1681,7 +1741,7 @@ struct ib_qp { atomic_t usecnt; struct list_head open_list; struct ib_qp *real_qp; - struct ib_uobject *uobject; + struct ib_uqp_object *uobject; void (*event_handler)(struct ib_event *, void *); void *qp_context; /* sgid_attrs associated with the AV's */ @@ -2076,7 +2136,7 @@ struct ib_flow_action { atomic_t usecnt; }; -struct ib_mad_hdr; +struct ib_mad; struct ib_grh; enum ib_process_mad_flags { @@ -2100,11 +2160,6 @@ struct ib_port_cache { enum ib_port_state port_state; }; -struct ib_cache { - rwlock_t lock; - struct ib_event_handler event_handler; -}; - struct ib_port_immutable { int pkey_tbl_len; int gid_tbl_len; @@ -2174,6 +2229,11 @@ struct rdma_netdev_alloc_params { struct net_device *netdev, void *param); }; +struct ib_odp_counters { + atomic64_t faults; + atomic64_t invalidations; +}; + struct ib_counters { struct ib_device *device; struct ib_uobject *uobject; @@ -2207,6 +2267,21 @@ struct iw_cm_conn_param; #define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct +struct rdma_user_mmap_entry { + struct kref ref; + struct ib_ucontext *ucontext; + unsigned long start_pgoff; + size_t npages; + bool driver_removed; +}; + +/* Return the offset (in bytes) the user should pass to libc's mmap() */ +static inline u64 +rdma_user_mmap_get_offset(const struct rdma_user_mmap_entry *entry) +{ + return (u64)entry->start_pgoff << PAGE_SHIFT; +} + /** * struct ib_device_ops - InfiniBand device operations * This structure defines all the InfiniBand device operations, providers will @@ -2234,9 +2309,8 @@ struct ib_device_ops { int (*process_mad)(struct ib_device *device, int process_mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index); int (*query_device)(struct ib_device *device, struct ib_device_attr *device_attr, struct ib_udata *udata); @@ -2319,6 +2393,13 @@ struct ib_device_ops { struct ib_udata *udata); void (*dealloc_ucontext)(struct ib_ucontext *context); int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); + /** + * This will be called once refcount of an entry in mmap_xa reaches + * zero. The type of the memory that was mapped may differ between + * entries and is opaque to the rdma_user_mmap interface. + * Therefore needs to be implemented by the driver in mmap_free. + */ + void (*mmap_free)(struct rdma_user_mmap_entry *entry); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata); void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); @@ -2402,6 +2483,9 @@ struct ib_device_ops { struct ifla_vf_info *ivf); int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); + int (*get_vf_guid)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, int type); struct ib_wq *(*create_wq)(struct ib_pd *pd, @@ -2517,6 +2601,13 @@ struct ib_device_ops { */ int (*counter_update_stats)(struct rdma_counter *counter); + /** + * Allows rdma drivers to add their own restrack attributes + * dumped via 'rdma stat' iproute2 command. + */ + int (*fill_stat_entry)(struct sk_buff *msg, + struct rdma_restrack_entry *entry); + DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_cq); DECLARE_RDMA_OBJ_SIZE(ib_pd); @@ -2544,13 +2635,18 @@ struct ib_device { struct rcu_head rcu_head; struct list_head event_handler_list; - spinlock_t event_handler_lock; + /* Protects event_handler_list */ + struct rw_semaphore event_handler_rwsem; + + /* Protects QP's event_handler calls and open_qp list */ + spinlock_t qp_open_list_lock; struct rw_semaphore client_data_rwsem; struct xarray client_data; struct mutex unregistration_lock; - struct ib_cache cache; + /* Synchronize GID, Pkey cache entries, subnet prefix, LMC */ + rwlock_t cache_lock; /** * port_data is indexed by port number */ @@ -2743,18 +2839,26 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client, void ib_set_device_ops(struct ib_device *device, const struct ib_device_ops *ops); -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, pgprot_t prot); -#else -static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, - pgprot_t prot) -{ - return -EINVAL; -} -#endif + unsigned long pfn, unsigned long size, pgprot_t prot, + struct rdma_user_mmap_entry *entry); +int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length); +int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length, u32 min_pgoff, + u32 max_pgoff); + +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, + unsigned long pgoff); +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, + struct vm_area_struct *vma); +void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry); + +void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry); static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) { @@ -2851,7 +2955,7 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, void ib_register_event_handler(struct ib_event_handler *event_handler); void ib_unregister_event_handler(struct ib_event_handler *event_handler); -void ib_dispatch_event(struct ib_event *event); +void ib_dispatch_event(const struct ib_event *event); int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); @@ -3257,6 +3361,9 @@ int ib_get_vf_config(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *info); int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); +int ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); @@ -3713,6 +3820,25 @@ static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, NULL); } +struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, + int nr_cqe, enum ib_poll_context poll_ctx, + const char *caller); + +/** + * ib_alloc_cq_any: Allocate kernel CQ + * @dev: The IB device + * @private: Private data attached to the CQE + * @nr_cqe: Number of CQEs in the CQ + * @poll_ctx: Context used for polling the CQ + */ +static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev, + void *private, int nr_cqe, + enum ib_poll_context poll_ctx) +{ + return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx, + KBUILD_MODNAME); +} + /** * ib_free_cq_user - Free kernel/user CQ * @cq: The CQ to free @@ -3978,9 +4104,7 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, */ static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev) { - struct device_dma_parameters *p = dev->dma_device->dma_parms; - - return p ? p->max_segment_size : UINT_MAX; + return dma_get_max_seg_size(dev->dma_device); } /** @@ -4042,6 +4166,15 @@ static inline void ib_dma_free_coherent(struct ib_device *dev, dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); } +/* ib_reg_user_mr - register a memory region for virtual addresses from kernel + * space. This function should be called when 'current' is the owning MM. + */ +struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int mr_access_flags); + +/* ib_advise_mr - give an advice about an address range in a memory region */ +int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, u32 num_sge); /** * ib_dereg_mr_user - Deregisters a memory region and removes it from the * HCA translation table. @@ -4189,6 +4322,9 @@ static inline int ib_check_mr_access(int flags) !(flags & IB_ACCESS_LOCAL_WRITE)) return -EINVAL; + if (flags & ~IB_ACCESS_SUPPORTED) + return -EINVAL; + return 0; } |