summaryrefslogtreecommitdiffstats
path: root/include/rdma/ib_verbs.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/rdma/ib_verbs.h')
-rw-r--r--include/rdma/ib_verbs.h216
1 files changed, 176 insertions, 40 deletions
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 4f225175cb91..1f779fad3a1e 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -72,11 +72,16 @@
#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
struct ib_umem_odp;
+struct ib_uqp_object;
+struct ib_usrq_object;
+struct ib_uwq_object;
extern struct workqueue_struct *ib_wq;
extern struct workqueue_struct *ib_comp_wq;
extern struct workqueue_struct *ib_comp_unbound_wq;
+struct ib_ucq_object;
+
__printf(3, 4) __cold
void ibdev_printk(const char *level, const struct ib_device *ibdev,
const char *format, ...);
@@ -98,15 +103,54 @@ void ibdev_info(const struct ib_device *ibdev, const char *format, ...);
#if defined(CONFIG_DYNAMIC_DEBUG)
#define ibdev_dbg(__dev, format, args...) \
dynamic_ibdev_dbg(__dev, format, ##args)
-#elif defined(DEBUG)
-#define ibdev_dbg(__dev, format, args...) \
- ibdev_printk(KERN_DEBUG, __dev, format, ##args)
#else
__printf(2, 3) __cold
static inline
void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {}
#endif
+#define ibdev_level_ratelimited(ibdev_level, ibdev, fmt, ...) \
+do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ if (__ratelimit(&_rs)) \
+ ibdev_level(ibdev, fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define ibdev_emerg_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_emerg, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_alert_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_alert, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_crit_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_crit, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_err_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_err, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_warn_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_warn, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_notice_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_notice, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_info_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__)
+
+#if defined(CONFIG_DYNAMIC_DEBUG)
+/* descriptor check is first to prevent flooding with "callbacks suppressed" */
+#define ibdev_dbg_ratelimited(ibdev, fmt, ...) \
+do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
+ if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) \
+ __dynamic_ibdev_dbg(&descriptor, ibdev, fmt, \
+ ##__VA_ARGS__); \
+} while (0)
+#else
+__printf(2, 3) __cold
+static inline
+void ibdev_dbg_ratelimited(const struct ib_device *ibdev, const char *format, ...) {}
+#endif
+
union ib_gid {
u8 raw[16];
struct {
@@ -327,7 +371,7 @@ struct ib_tm_caps {
struct ib_cq_init_attr {
unsigned int cqe;
- int comp_vector;
+ u32 comp_vector;
u32 flags;
};
@@ -406,6 +450,8 @@ struct ib_device_attr {
struct ib_tm_caps tm_caps;
struct ib_cq_caps cq_caps;
u64 max_dm_size;
+ /* Max entries for sgl for optimized performance per READ */
+ u32 max_sgl_rd;
};
enum ib_mtu {
@@ -451,6 +497,16 @@ enum ib_port_state {
IB_PORT_ACTIVE_DEFER = 5
};
+enum ib_port_phys_state {
+ IB_PORT_PHYS_STATE_SLEEP = 1,
+ IB_PORT_PHYS_STATE_POLLING = 2,
+ IB_PORT_PHYS_STATE_DISABLED = 3,
+ IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING = 4,
+ IB_PORT_PHYS_STATE_LINK_UP = 5,
+ IB_PORT_PHYS_STATE_LINK_ERROR_RECOVERY = 6,
+ IB_PORT_PHYS_STATE_PHY_TEST = 7,
+};
+
enum ib_port_width {
IB_WIDTH_1X = 1,
IB_WIDTH_2X = 16,
@@ -1362,8 +1418,11 @@ enum ib_access_flags {
IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED,
IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
+ IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING,
- IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1)
+ IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE,
+ IB_ACCESS_SUPPORTED =
+ ((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL,
};
/*
@@ -1417,16 +1476,12 @@ struct ib_ucontext {
bool cleanup_retryable;
- void (*invalidate_range)(struct ib_umem_odp *umem_odp,
- unsigned long start, unsigned long end);
- struct mutex per_mm_list_lock;
- struct list_head per_mm_list;
-
struct ib_rdmacg_object cg_obj;
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
struct rdma_restrack_entry res;
+ struct xarray mmap_xa;
};
struct ib_uobject {
@@ -1497,7 +1552,7 @@ enum ib_poll_context {
struct ib_cq {
struct ib_device *device;
- struct ib_uobject *uobject;
+ struct ib_ucq_object *uobject;
ib_comp_handler comp_handler;
void (*event_handler)(struct ib_event *, void *);
void *cq_context;
@@ -1511,6 +1566,11 @@ struct ib_cq {
};
struct workqueue_struct *comp_wq;
struct dim *dim;
+
+ /* updated only by trace points */
+ ktime_t timestamp;
+ bool interrupt;
+
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
@@ -1520,7 +1580,7 @@ struct ib_cq {
struct ib_srq {
struct ib_device *device;
struct ib_pd *pd;
- struct ib_uobject *uobject;
+ struct ib_usrq_object *uobject;
void (*event_handler)(struct ib_event *, void *);
void *srq_context;
enum ib_srq_type srq_type;
@@ -1565,7 +1625,7 @@ enum ib_wq_state {
struct ib_wq {
struct ib_device *device;
- struct ib_uobject *uobject;
+ struct ib_uwq_object *uobject;
void *wq_context;
void (*event_handler)(struct ib_event *, void *);
struct ib_pd *pd;
@@ -1681,7 +1741,7 @@ struct ib_qp {
atomic_t usecnt;
struct list_head open_list;
struct ib_qp *real_qp;
- struct ib_uobject *uobject;
+ struct ib_uqp_object *uobject;
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
/* sgid_attrs associated with the AV's */
@@ -2076,7 +2136,7 @@ struct ib_flow_action {
atomic_t usecnt;
};
-struct ib_mad_hdr;
+struct ib_mad;
struct ib_grh;
enum ib_process_mad_flags {
@@ -2100,11 +2160,6 @@ struct ib_port_cache {
enum ib_port_state port_state;
};
-struct ib_cache {
- rwlock_t lock;
- struct ib_event_handler event_handler;
-};
-
struct ib_port_immutable {
int pkey_tbl_len;
int gid_tbl_len;
@@ -2174,6 +2229,11 @@ struct rdma_netdev_alloc_params {
struct net_device *netdev, void *param);
};
+struct ib_odp_counters {
+ atomic64_t faults;
+ atomic64_t invalidations;
+};
+
struct ib_counters {
struct ib_device *device;
struct ib_uobject *uobject;
@@ -2207,6 +2267,21 @@ struct iw_cm_conn_param;
#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
+struct rdma_user_mmap_entry {
+ struct kref ref;
+ struct ib_ucontext *ucontext;
+ unsigned long start_pgoff;
+ size_t npages;
+ bool driver_removed;
+};
+
+/* Return the offset (in bytes) the user should pass to libc's mmap() */
+static inline u64
+rdma_user_mmap_get_offset(const struct rdma_user_mmap_entry *entry)
+{
+ return (u64)entry->start_pgoff << PAGE_SHIFT;
+}
+
/**
* struct ib_device_ops - InfiniBand device operations
* This structure defines all the InfiniBand device operations, providers will
@@ -2234,9 +2309,8 @@ struct ib_device_ops {
int (*process_mad)(struct ib_device *device, int process_mad_flags,
u8 port_num, const struct ib_wc *in_wc,
const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad, size_t in_mad_size,
- struct ib_mad_hdr *out_mad, size_t *out_mad_size,
- u16 *out_mad_pkey_index);
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
int (*query_device)(struct ib_device *device,
struct ib_device_attr *device_attr,
struct ib_udata *udata);
@@ -2319,6 +2393,13 @@ struct ib_device_ops {
struct ib_udata *udata);
void (*dealloc_ucontext)(struct ib_ucontext *context);
int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
+ /**
+ * This will be called once refcount of an entry in mmap_xa reaches
+ * zero. The type of the memory that was mapped may differ between
+ * entries and is opaque to the rdma_user_mmap interface.
+ * Therefore needs to be implemented by the driver in mmap_free.
+ */
+ void (*mmap_free)(struct rdma_user_mmap_entry *entry);
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
@@ -2402,6 +2483,9 @@ struct ib_device_ops {
struct ifla_vf_info *ivf);
int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
struct ifla_vf_stats *stats);
+ int (*get_vf_guid)(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_guid *node_guid,
+ struct ifla_vf_guid *port_guid);
int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
int type);
struct ib_wq *(*create_wq)(struct ib_pd *pd,
@@ -2517,6 +2601,13 @@ struct ib_device_ops {
*/
int (*counter_update_stats)(struct rdma_counter *counter);
+ /**
+ * Allows rdma drivers to add their own restrack attributes
+ * dumped via 'rdma stat' iproute2 command.
+ */
+ int (*fill_stat_entry)(struct sk_buff *msg,
+ struct rdma_restrack_entry *entry);
+
DECLARE_RDMA_OBJ_SIZE(ib_ah);
DECLARE_RDMA_OBJ_SIZE(ib_cq);
DECLARE_RDMA_OBJ_SIZE(ib_pd);
@@ -2544,13 +2635,18 @@ struct ib_device {
struct rcu_head rcu_head;
struct list_head event_handler_list;
- spinlock_t event_handler_lock;
+ /* Protects event_handler_list */
+ struct rw_semaphore event_handler_rwsem;
+
+ /* Protects QP's event_handler calls and open_qp list */
+ spinlock_t qp_open_list_lock;
struct rw_semaphore client_data_rwsem;
struct xarray client_data;
struct mutex unregistration_lock;
- struct ib_cache cache;
+ /* Synchronize GID, Pkey cache entries, subnet prefix, LMC */
+ rwlock_t cache_lock;
/**
* port_data is indexed by port number
*/
@@ -2743,18 +2839,26 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void ib_set_device_ops(struct ib_device *device,
const struct ib_device_ops *ops);
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
- unsigned long pfn, unsigned long size, pgprot_t prot);
-#else
-static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
- struct vm_area_struct *vma,
- unsigned long pfn, unsigned long size,
- pgprot_t prot)
-{
- return -EINVAL;
-}
-#endif
+ unsigned long pfn, unsigned long size, pgprot_t prot,
+ struct rdma_user_mmap_entry *entry);
+int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length);
+int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length, u32 min_pgoff,
+ u32 max_pgoff);
+
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
+ unsigned long pgoff);
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma);
+void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry);
+
+void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry);
static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
{
@@ -2851,7 +2955,7 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
void ib_register_event_handler(struct ib_event_handler *event_handler);
void ib_unregister_event_handler(struct ib_event_handler *event_handler);
-void ib_dispatch_event(struct ib_event *event);
+void ib_dispatch_event(const struct ib_event *event);
int ib_query_port(struct ib_device *device,
u8 port_num, struct ib_port_attr *port_attr);
@@ -3257,6 +3361,9 @@ int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
struct ifla_vf_info *info);
int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
struct ifla_vf_stats *stats);
+int ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_guid *node_guid,
+ struct ifla_vf_guid *port_guid);
int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
int type);
@@ -3713,6 +3820,25 @@ static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
NULL);
}
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+ int nr_cqe, enum ib_poll_context poll_ctx,
+ const char *caller);
+
+/**
+ * ib_alloc_cq_any: Allocate kernel CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @poll_ctx: Context used for polling the CQ
+ */
+static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
+ void *private, int nr_cqe,
+ enum ib_poll_context poll_ctx)
+{
+ return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx,
+ KBUILD_MODNAME);
+}
+
/**
* ib_free_cq_user - Free kernel/user CQ
* @cq: The CQ to free
@@ -3978,9 +4104,7 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
*/
static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev)
{
- struct device_dma_parameters *p = dev->dma_device->dma_parms;
-
- return p ? p->max_segment_size : UINT_MAX;
+ return dma_get_max_seg_size(dev->dma_device);
}
/**
@@ -4042,6 +4166,15 @@ static inline void ib_dma_free_coherent(struct ib_device *dev,
dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
}
+/* ib_reg_user_mr - register a memory region for virtual addresses from kernel
+ * space. This function should be called when 'current' is the owning MM.
+ */
+struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags);
+
+/* ib_advise_mr - give an advice about an address range in a memory region */
+int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
+ u32 flags, struct ib_sge *sg_list, u32 num_sge);
/**
* ib_dereg_mr_user - Deregisters a memory region and removes it from the
* HCA translation table.
@@ -4189,6 +4322,9 @@ static inline int ib_check_mr_access(int flags)
!(flags & IB_ACCESS_LOCAL_WRITE))
return -EINVAL;
+ if (flags & ~IB_ACCESS_SUPPORTED)
+ return -EINVAL;
+
return 0;
}
OpenPOWER on IntegriCloud