summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/Makefile9
-rw-r--r--drivers/infiniband/core/addr.c13
-rw-r--r--drivers/infiniband/core/cache.c160
-rw-r--r--drivers/infiniband/core/cm.c1065
-rw-r--r--drivers/infiniband/core/cm_msgs.h787
-rw-r--r--drivers/infiniband/core/cma.c209
-rw-r--r--drivers/infiniband/core/cma_configfs.c8
-rw-r--r--drivers/infiniband/core/cma_trace.c16
-rw-r--r--drivers/infiniband/core/cma_trace.h391
-rw-r--r--drivers/infiniband/core/core_priv.h39
-rw-r--r--drivers/infiniband/core/counters.c55
-rw-r--r--drivers/infiniband/core/cq.c55
-rw-r--r--drivers/infiniband/core/device.c211
-rw-r--r--drivers/infiniband/core/fmr_pool.c13
-rw-r--r--drivers/infiniband/core/ib_core_uverbs.c367
-rw-r--r--drivers/infiniband/core/iwcm.c52
-rw-r--r--drivers/infiniband/core/iwpm_msg.c17
-rw-r--r--drivers/infiniband/core/iwpm_util.c15
-rw-r--r--drivers/infiniband/core/iwpm_util.h5
-rw-r--r--drivers/infiniband/core/mad.c31
-rw-r--r--drivers/infiniband/core/netlink.c158
-rw-r--r--drivers/infiniband/core/nldev.c181
-rw-r--r--drivers/infiniband/core/rdma_core.c236
-rw-r--r--drivers/infiniband/core/rdma_core.h45
-rw-r--r--drivers/infiniband/core/restrack.c25
-rw-r--r--drivers/infiniband/core/restrack.h1
-rw-r--r--drivers/infiniband/core/rw.c31
-rw-r--r--drivers/infiniband/core/sa_query.c8
-rw-r--r--drivers/infiniband/core/security.c26
-rw-r--r--drivers/infiniband/core/sysfs.c42
-rw-r--r--drivers/infiniband/core/trace.c14
-rw-r--r--drivers/infiniband/core/umem.c125
-rw-r--r--drivers/infiniband/core/umem_odp.c645
-rw-r--r--drivers/infiniband/core/user_mad.c7
-rw-r--r--drivers/infiniband/core/uverbs.h50
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c343
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c48
-rw-r--r--drivers/infiniband/core/uverbs_main.c394
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c45
-rw-r--r--drivers/infiniband/core/uverbs_std_types_async_fd.c52
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c19
-rw-r--r--drivers/infiniband/core/uverbs_std_types_device.c38
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c7
-rw-r--r--drivers/infiniband/core/verbs.c118
44 files changed, 3031 insertions, 3145 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 09881bd5f12d..d1b14887960e 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -11,7 +11,8 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
- nldev.o restrack.o counters.o
+ nldev.o restrack.o counters.o ib_core_uverbs.o \
+ trace.o
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
@@ -20,7 +21,8 @@ ib_cm-y := cm.o
iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
-rdma_cm-y := cma.o
+CFLAGS_cma_trace.o += -I$(src)
+rdma_cm-y := cma.o cma_trace.o
rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
@@ -33,6 +35,7 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
uverbs_std_types_cq.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
uverbs_std_types_mr.o uverbs_std_types_counters.o \
- uverbs_uapi.o uverbs_std_types_device.o
+ uverbs_uapi.o uverbs_std_types_device.o \
+ uverbs_std_types_async_fd.o
ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 9b76a8fcdd24..1753a9801b70 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -139,7 +139,7 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
if (ib_nl_is_good_ip_resp(nlh))
ib_nl_process_good_ip_rsep(nlh);
- return skb->len;
+ return 0;
}
static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
@@ -183,7 +183,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
- rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
+ rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
/* Make the request retry, so when we get the response from userspace
* we will have something.
@@ -352,7 +352,7 @@ static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
if (family == AF_INET) {
rt = container_of(dst, struct rtable, dst);
- return rt->rt_gw_family == AF_INET;
+ return rt->rt_uses_gateway;
}
rt6 = container_of(dst, struct rt6_info, dst);
@@ -421,16 +421,15 @@ static int addr6_resolve(struct sockaddr *src_sock,
(const struct sockaddr_in6 *)dst_sock;
struct flowi6 fl6;
struct dst_entry *dst;
- int ret;
memset(&fl6, 0, sizeof fl6);
fl6.daddr = dst_in->sin6_addr;
fl6.saddr = src_in->sin6_addr;
fl6.flowi6_oif = addr->bound_dev_if;
- ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
- if (ret < 0)
- return ret;
+ dst = ipv6_stub->ipv6_dst_lookup_flow(addr->net, NULL, &fl6, NULL);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
if (ipv6_addr_any(&src_in->sin6_addr))
src_in->sin6_addr = fl6.saddr;
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 18e476b3ced0..17bfedd24cc3 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -51,9 +51,8 @@ struct ib_pkey_cache {
struct ib_update_work {
struct work_struct work;
- struct ib_device *device;
- u8 port_num;
- bool enforce_security;
+ struct ib_event event;
+ bool enforce_security;
};
union ib_gid zgid;
@@ -130,7 +129,7 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
event.element.port_num = port;
event.event = IB_EVENT_GID_CHANGE;
- ib_dispatch_event(&event);
+ ib_dispatch_event_clients(&event);
}
static const char * const gid_type_str[] = {
@@ -810,6 +809,7 @@ static void release_gid_table(struct ib_device *device,
if (leak)
return;
+ mutex_destroy(&table->lock);
kfree(table->data_vec);
kfree(table);
}
@@ -818,22 +818,16 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table)
{
int i;
- bool deleted = false;
if (!table)
return;
mutex_lock(&table->lock);
for (i = 0; i < table->sz; ++i) {
- if (is_gid_entry_valid(table->data_vec[i])) {
+ if (is_gid_entry_valid(table->data_vec[i]))
del_gid(ib_dev, port, table, i);
- deleted = true;
- }
}
mutex_unlock(&table->lock);
-
- if (deleted)
- dispatch_gid_change_event(ib_dev, port);
}
void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
@@ -1039,7 +1033,7 @@ int ib_get_cached_pkey(struct ib_device *device,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- read_lock_irqsave(&device->cache.lock, flags);
+ read_lock_irqsave(&device->cache_lock, flags);
cache = device->port_data[port_num].cache.pkey;
@@ -1048,7 +1042,7 @@ int ib_get_cached_pkey(struct ib_device *device,
else
*pkey = cache->table[index];
- read_unlock_irqrestore(&device->cache.lock, flags);
+ read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
}
@@ -1063,9 +1057,9 @@ int ib_get_cached_subnet_prefix(struct ib_device *device,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- read_lock_irqsave(&device->cache.lock, flags);
+ read_lock_irqsave(&device->cache_lock, flags);
*sn_pfx = device->port_data[port_num].cache.subnet_prefix;
- read_unlock_irqrestore(&device->cache.lock, flags);
+ read_unlock_irqrestore(&device->cache_lock, flags);
return 0;
}
@@ -1085,7 +1079,7 @@ int ib_find_cached_pkey(struct ib_device *device,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- read_lock_irqsave(&device->cache.lock, flags);
+ read_lock_irqsave(&device->cache_lock, flags);
cache = device->port_data[port_num].cache.pkey;
@@ -1106,7 +1100,7 @@ int ib_find_cached_pkey(struct ib_device *device,
ret = 0;
}
- read_unlock_irqrestore(&device->cache.lock, flags);
+ read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
}
@@ -1125,7 +1119,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- read_lock_irqsave(&device->cache.lock, flags);
+ read_lock_irqsave(&device->cache_lock, flags);
cache = device->port_data[port_num].cache.pkey;
@@ -1138,7 +1132,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
break;
}
- read_unlock_irqrestore(&device->cache.lock, flags);
+ read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
}
@@ -1154,9 +1148,9 @@ int ib_get_cached_lmc(struct ib_device *device,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- read_lock_irqsave(&device->cache.lock, flags);
+ read_lock_irqsave(&device->cache_lock, flags);
*lmc = device->port_data[port_num].cache.lmc;
- read_unlock_irqrestore(&device->cache.lock, flags);
+ read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
}
@@ -1172,9 +1166,9 @@ int ib_get_cached_port_state(struct ib_device *device,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- read_lock_irqsave(&device->cache.lock, flags);
+ read_lock_irqsave(&device->cache_lock, flags);
*port_state = device->port_data[port_num].cache.port_state;
- read_unlock_irqrestore(&device->cache.lock, flags);
+ read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
}
@@ -1386,9 +1380,8 @@ err:
return ret;
}
-static void ib_cache_update(struct ib_device *device,
- u8 port,
- bool enforce_security)
+static int
+ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
{
struct ib_port_attr *tprops = NULL;
struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
@@ -1396,11 +1389,11 @@ static void ib_cache_update(struct ib_device *device,
int ret;
if (!rdma_is_port_valid(device, port))
- return;
+ return -EINVAL;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
- return;
+ return -ENOMEM;
ret = ib_query_port(device, port, tprops);
if (ret) {
@@ -1418,8 +1411,10 @@ static void ib_cache_update(struct ib_device *device,
pkey_cache = kmalloc(struct_size(pkey_cache, table,
tprops->pkey_tbl_len),
GFP_KERNEL);
- if (!pkey_cache)
+ if (!pkey_cache) {
+ ret = -ENOMEM;
goto err;
+ }
pkey_cache->table_len = tprops->pkey_tbl_len;
@@ -1433,7 +1428,7 @@ static void ib_cache_update(struct ib_device *device,
}
}
- write_lock_irq(&device->cache.lock);
+ write_lock_irq(&device->cache_lock);
old_pkey_cache = device->port_data[port].cache.pkey;
@@ -1442,7 +1437,7 @@ static void ib_cache_update(struct ib_device *device,
device->port_data[port].cache.port_state = tprops->state;
device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
- write_unlock_irq(&device->cache.lock);
+ write_unlock_irq(&device->cache_lock);
if (enforce_security)
ib_security_cache_change(device,
@@ -1451,57 +1446,91 @@ static void ib_cache_update(struct ib_device *device,
kfree(old_pkey_cache);
kfree(tprops);
- return;
+ return 0;
err:
kfree(pkey_cache);
kfree(tprops);
+ return ret;
+}
+
+static void ib_cache_event_task(struct work_struct *_work)
+{
+ struct ib_update_work *work =
+ container_of(_work, struct ib_update_work, work);
+ int ret;
+
+ /* Before distributing the cache update event, first sync
+ * the cache.
+ */
+ ret = ib_cache_update(work->event.device, work->event.element.port_num,
+ work->enforce_security);
+
+ /* GID event is notified already for individual GID entries by
+ * dispatch_gid_change_event(). Hence, notifiy for rest of the
+ * events.
+ */
+ if (!ret && work->event.event != IB_EVENT_GID_CHANGE)
+ ib_dispatch_event_clients(&work->event);
+
+ kfree(work);
}
-static void ib_cache_task(struct work_struct *_work)
+static void ib_generic_event_task(struct work_struct *_work)
{
struct ib_update_work *work =
container_of(_work, struct ib_update_work, work);
- ib_cache_update(work->device,
- work->port_num,
- work->enforce_security);
+ ib_dispatch_event_clients(&work->event);
kfree(work);
}
-static void ib_cache_event(struct ib_event_handler *handler,
- struct ib_event *event)
+static bool is_cache_update_event(const struct ib_event *event)
+{
+ return (event->event == IB_EVENT_PORT_ERR ||
+ event->event == IB_EVENT_PORT_ACTIVE ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER ||
+ event->event == IB_EVENT_GID_CHANGE);
+}
+
+/**
+ * ib_dispatch_event - Dispatch an asynchronous event
+ * @event:Event to dispatch
+ *
+ * Low-level drivers must call ib_dispatch_event() to dispatch the
+ * event to all registered event handlers when an asynchronous event
+ * occurs.
+ */
+void ib_dispatch_event(const struct ib_event *event)
{
struct ib_update_work *work;
- if (event->event == IB_EVENT_PORT_ERR ||
- event->event == IB_EVENT_PORT_ACTIVE ||
- event->event == IB_EVENT_LID_CHANGE ||
- event->event == IB_EVENT_PKEY_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER ||
- event->event == IB_EVENT_GID_CHANGE) {
- work = kmalloc(sizeof *work, GFP_ATOMIC);
- if (work) {
- INIT_WORK(&work->work, ib_cache_task);
- work->device = event->device;
- work->port_num = event->element.port_num;
- if (event->event == IB_EVENT_PKEY_CHANGE ||
- event->event == IB_EVENT_GID_CHANGE)
- work->enforce_security = true;
- else
- work->enforce_security = false;
-
- queue_work(ib_wq, &work->work);
- }
- }
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ if (is_cache_update_event(event))
+ INIT_WORK(&work->work, ib_cache_event_task);
+ else
+ INIT_WORK(&work->work, ib_generic_event_task);
+
+ work->event = *event;
+ if (event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_GID_CHANGE)
+ work->enforce_security = true;
+
+ queue_work(ib_wq, &work->work);
}
+EXPORT_SYMBOL(ib_dispatch_event);
int ib_cache_setup_one(struct ib_device *device)
{
unsigned int p;
int err;
- rwlock_init(&device->cache.lock);
+ rwlock_init(&device->cache_lock);
err = gid_table_setup_one(device);
if (err)
@@ -1510,9 +1539,6 @@ int ib_cache_setup_one(struct ib_device *device)
rdma_for_each_port (device, p)
ib_cache_update(device, p, true);
- INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
- device, ib_cache_event);
- ib_register_event_handler(&device->cache.event_handler);
return 0;
}
@@ -1534,14 +1560,12 @@ void ib_cache_release_one(struct ib_device *device)
void ib_cache_cleanup_one(struct ib_device *device)
{
- /* The cleanup function unregisters the event handler,
- * waits for all in-progress workqueue elements and cleans
- * up the GID cache. This function should be called after
- * the device was removed from the devices list and all
- * clients were removed, so the cache exists but is
+ /* The cleanup function waits for all in-progress workqueue
+ * elements and cleans up the GID cache. This function should be
+ * called after the device was removed from the devices list and
+ * all clients were removed, so the cache exists but is
* non-functional and shouldn't be updated anymore.
*/
- ib_unregister_event_handler(&device->cache.event_handler);
flush_workqueue(ib_wq);
gid_table_cleanup_one(device);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index da10e6ccb43c..68cc1b2d6824 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1,36 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2004-2007 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/completion.h>
@@ -246,7 +220,7 @@ struct cm_work {
};
struct cm_timewait_info {
- struct cm_work work; /* Must be first. */
+ struct cm_work work;
struct list_head list;
struct rb_node remote_qp_node;
struct rb_node remote_id_node;
@@ -263,10 +237,11 @@ struct cm_id_private {
struct rb_node sidr_id_node;
spinlock_t lock; /* Do not acquire inside cm.lock */
struct completion comp;
- atomic_t refcount;
+ refcount_t refcount;
/* Number of clients sharing this ib_cm_id. Only valid for listeners.
* Protected by the cm.lock spinlock. */
int listen_sharecount;
+ struct rcu_head rcu;
struct ib_mad_send_buf *msg;
struct cm_timewait_info *timewait_info;
@@ -308,7 +283,7 @@ static void cm_work_handler(struct work_struct *work);
static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
{
- if (atomic_dec_and_test(&cm_id_priv->refcount))
+ if (refcount_dec_and_test(&cm_id_priv->refcount))
complete(&cm_id_priv->comp);
}
@@ -365,7 +340,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
m->ah = ah;
m->retries = cm_id_priv->max_cm_retries;
- atomic_inc(&cm_id_priv->refcount);
+ refcount_inc(&cm_id_priv->refcount);
m->context[0] = cm_id_priv;
*msg = m;
@@ -619,28 +594,16 @@ static void cm_free_id(__be32 local_id)
xa_erase_irq(&cm.local_id_table, cm_local_id(local_id));
}
-static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
+static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id)
{
struct cm_id_private *cm_id_priv;
+ rcu_read_lock();
cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id));
- if (cm_id_priv) {
- if (cm_id_priv->id.remote_id == remote_id)
- atomic_inc(&cm_id_priv->refcount);
- else
- cm_id_priv = NULL;
- }
-
- return cm_id_priv;
-}
-
-static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
-{
- struct cm_id_private *cm_id_priv;
-
- spin_lock_irq(&cm.lock);
- cm_id_priv = cm_get_id(local_id, remote_id);
- spin_unlock_irq(&cm.lock);
+ if (!cm_id_priv || cm_id_priv->id.remote_id != remote_id ||
+ !refcount_inc_not_zero(&cm_id_priv->refcount))
+ cm_id_priv = NULL;
+ rcu_read_unlock();
return cm_id_priv;
}
@@ -883,7 +846,7 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
INIT_LIST_HEAD(&cm_id_priv->prim_list);
INIT_LIST_HEAD(&cm_id_priv->altr_list);
atomic_set(&cm_id_priv->work_count, -1);
- atomic_set(&cm_id_priv->refcount, 1);
+ refcount_set(&cm_id_priv->refcount, 1);
return &cm_id_priv->id;
error:
@@ -1115,7 +1078,7 @@ retest:
rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
kfree(cm_id_priv->private_data);
- kfree(cm_id_priv);
+ kfree_rcu(cm_id_priv, rcu);
}
void ib_destroy_cm_id(struct ib_cm_id *cm_id)
@@ -1230,7 +1193,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
spin_unlock_irqrestore(&cm.lock, flags);
return ERR_PTR(-EINVAL);
}
- atomic_inc(&cm_id_priv->refcount);
+ refcount_inc(&cm_id_priv->refcount);
++cm_id_priv->listen_sharecount;
spin_unlock_irqrestore(&cm.lock, flags);
@@ -1288,54 +1251,72 @@ static void cm_format_req(struct cm_req_msg *req_msg,
cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
cm_form_tid(cm_id_priv));
- req_msg->local_comm_id = cm_id_priv->id.local_id;
- req_msg->service_id = param->service_id;
- req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
- cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
- cm_req_set_init_depth(req_msg, param->initiator_depth);
- cm_req_set_remote_resp_timeout(req_msg,
- param->remote_cm_response_timeout);
+ IBA_SET(CM_REQ_LOCAL_COMM_ID, req_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_REQ_SERVICE_ID, req_msg, be64_to_cpu(param->service_id));
+ IBA_SET(CM_REQ_LOCAL_CA_GUID, req_msg,
+ be64_to_cpu(cm_id_priv->id.device->node_guid));
+ IBA_SET(CM_REQ_LOCAL_QPN, req_msg, param->qp_num);
+ IBA_SET(CM_REQ_INITIATOR_DEPTH, req_msg, param->initiator_depth);
+ IBA_SET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg,
+ param->remote_cm_response_timeout);
cm_req_set_qp_type(req_msg, param->qp_type);
- cm_req_set_flow_ctrl(req_msg, param->flow_control);
- cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
- cm_req_set_local_resp_timeout(req_msg,
- param->local_cm_response_timeout);
- req_msg->pkey = param->primary_path->pkey;
- cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
- cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
+ IBA_SET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg, param->flow_control);
+ IBA_SET(CM_REQ_STARTING_PSN, req_msg, param->starting_psn);
+ IBA_SET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg,
+ param->local_cm_response_timeout);
+ IBA_SET(CM_REQ_PARTITION_KEY, req_msg,
+ be16_to_cpu(param->primary_path->pkey));
+ IBA_SET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg,
+ param->primary_path->mtu);
+ IBA_SET(CM_REQ_MAX_CM_RETRIES, req_msg, param->max_cm_retries);
if (param->qp_type != IB_QPT_XRC_INI) {
- cm_req_set_resp_res(req_msg, param->responder_resources);
- cm_req_set_retry_count(req_msg, param->retry_count);
- cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
- cm_req_set_srq(req_msg, param->srq);
+ IBA_SET(CM_REQ_RESPONDER_RESOURCES, req_msg,
+ param->responder_resources);
+ IBA_SET(CM_REQ_RETRY_COUNT, req_msg, param->retry_count);
+ IBA_SET(CM_REQ_RNR_RETRY_COUNT, req_msg,
+ param->rnr_retry_count);
+ IBA_SET(CM_REQ_SRQ, req_msg, param->srq);
}
- req_msg->primary_local_gid = pri_path->sgid;
- req_msg->primary_remote_gid = pri_path->dgid;
+ *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg) =
+ pri_path->sgid;
+ *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg) =
+ pri_path->dgid;
if (pri_ext) {
- req_msg->primary_local_gid.global.interface_id
- = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid));
- req_msg->primary_remote_gid.global.interface_id
- = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid));
+ IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg)
+ ->global.interface_id =
+ OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid));
+ IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg)
+ ->global.interface_id =
+ OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid));
}
if (pri_path->hop_limit <= 1) {
- req_msg->primary_local_lid = pri_ext ? 0 :
- htons(ntohl(sa_path_get_slid(pri_path)));
- req_msg->primary_remote_lid = pri_ext ? 0 :
- htons(ntohl(sa_path_get_dlid(pri_path)));
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(pri_ext ? 0 :
+ htons(ntohl(sa_path_get_slid(
+ pri_path)))));
+ IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
+ be16_to_cpu(pri_ext ? 0 :
+ htons(ntohl(sa_path_get_dlid(
+ pri_path)))));
} else {
/* Work-around until there's a way to obtain remote LID info */
- req_msg->primary_local_lid = IB_LID_PERMISSIVE;
- req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(IB_LID_PERMISSIVE));
+ IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
+ be16_to_cpu(IB_LID_PERMISSIVE));
}
- cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
- cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
- req_msg->primary_traffic_class = pri_path->traffic_class;
- req_msg->primary_hop_limit = pri_path->hop_limit;
- cm_req_set_primary_sl(req_msg, pri_path->sl);
- cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
- cm_req_set_primary_local_ack_timeout(req_msg,
+ IBA_SET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg,
+ be32_to_cpu(pri_path->flow_label));
+ IBA_SET(CM_REQ_PRIMARY_PACKET_RATE, req_msg, pri_path->rate);
+ IBA_SET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg, pri_path->traffic_class);
+ IBA_SET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg, pri_path->hop_limit);
+ IBA_SET(CM_REQ_PRIMARY_SL, req_msg, pri_path->sl);
+ IBA_SET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg,
+ (pri_path->hop_limit <= 1));
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg,
cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
pri_path->packet_life_time));
@@ -1346,38 +1327,55 @@ static void cm_format_req(struct cm_req_msg *req_msg,
alt_ext = opa_is_extended_lid(alt_path->opa.dlid,
alt_path->opa.slid);
- req_msg->alt_local_gid = alt_path->sgid;
- req_msg->alt_remote_gid = alt_path->dgid;
+ *IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg) =
+ alt_path->sgid;
+ *IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg) =
+ alt_path->dgid;
if (alt_ext) {
- req_msg->alt_local_gid.global.interface_id
- = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid));
- req_msg->alt_remote_gid.global.interface_id
- = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid));
+ IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
+ req_msg)
+ ->global.interface_id =
+ OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid));
+ IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID,
+ req_msg)
+ ->global.interface_id =
+ OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid));
}
if (alt_path->hop_limit <= 1) {
- req_msg->alt_local_lid = alt_ext ? 0 :
- htons(ntohl(sa_path_get_slid(alt_path)));
- req_msg->alt_remote_lid = alt_ext ? 0 :
- htons(ntohl(sa_path_get_dlid(alt_path)));
+ IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(
+ alt_ext ? 0 :
+ htons(ntohl(sa_path_get_slid(
+ alt_path)))));
+ IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
+ be16_to_cpu(
+ alt_ext ? 0 :
+ htons(ntohl(sa_path_get_dlid(
+ alt_path)))));
} else {
- req_msg->alt_local_lid = IB_LID_PERMISSIVE;
- req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
+ IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(IB_LID_PERMISSIVE));
+ IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
+ be16_to_cpu(IB_LID_PERMISSIVE));
}
- cm_req_set_alt_flow_label(req_msg,
- alt_path->flow_label);
- cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
- req_msg->alt_traffic_class = alt_path->traffic_class;
- req_msg->alt_hop_limit = alt_path->hop_limit;
- cm_req_set_alt_sl(req_msg, alt_path->sl);
- cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
- cm_req_set_alt_local_ack_timeout(req_msg,
+ IBA_SET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg,
+ be32_to_cpu(alt_path->flow_label));
+ IBA_SET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg, alt_path->rate);
+ IBA_SET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg,
+ alt_path->traffic_class);
+ IBA_SET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg,
+ alt_path->hop_limit);
+ IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, alt_path->sl);
+ IBA_SET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg,
+ (alt_path->hop_limit <= 1));
+ IBA_SET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg,
cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
alt_path->packet_life_time));
}
if (param->private_data && param->private_data_len)
- memcpy(req_msg->private_data, param->private_data,
- param->private_data_len);
+ IBA_SET_MEM(CM_REQ_PRIVATE_DATA, req_msg, param->private_data,
+ param->private_data_len);
}
static int cm_validate_req_param(struct ib_cm_req_param *param)
@@ -1469,8 +1467,8 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
- cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
- cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
+ cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
+ cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
spin_lock_irqsave(&cm_id_priv->lock, flags);
ret = ib_post_send_mad(cm_id_priv->msg, NULL);
@@ -1508,14 +1506,16 @@ static int cm_issue_rej(struct cm_port *port,
rej_msg = (struct cm_rej_msg *) msg->mad;
cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
- rej_msg->remote_comm_id = rcv_msg->local_comm_id;
- rej_msg->local_comm_id = rcv_msg->remote_comm_id;
- cm_rej_set_msg_rejected(rej_msg, msg_rejected);
- rej_msg->reason = cpu_to_be16(reason);
+ IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
+ IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg));
+ IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+ IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
+ IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, msg_rejected);
+ IBA_SET(CM_REJ_REASON, rej_msg, reason);
if (ari && ari_length) {
- cm_rej_set_reject_info_len(rej_msg, ari_length);
- memcpy(rej_msg->ari, ari, ari_length);
+ IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
+ IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
}
ret = ib_post_send_mad(msg, NULL);
@@ -1525,18 +1525,12 @@ static int cm_issue_rej(struct cm_port *port,
return ret;
}
-static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
- __be32 local_qpn, __be32 remote_qpn)
-{
- return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
- ((local_ca_guid == remote_ca_guid) &&
- (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
-}
-
static bool cm_req_has_alt_path(struct cm_req_msg *req_msg)
{
- return ((req_msg->alt_local_lid) ||
- (ib_is_opa_gid(&req_msg->alt_local_gid)));
+ return ((cpu_to_be16(
+ IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg))) ||
+ (ib_is_opa_gid(IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
+ req_msg))));
}
static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num,
@@ -1556,14 +1550,18 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
sa_path_set_dlid(primary_path,
- ntohs(req_msg->primary_local_lid));
+ IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
+ req_msg));
sa_path_set_slid(primary_path,
- ntohs(req_msg->primary_remote_lid));
+ IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
+ req_msg));
} else {
- lid = opa_get_lid_from_gid(&req_msg->primary_local_gid);
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg));
sa_path_set_dlid(primary_path, lid);
- lid = opa_get_lid_from_gid(&req_msg->primary_remote_gid);
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg));
sa_path_set_slid(primary_path, lid);
}
@@ -1571,13 +1569,19 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
return;
if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) {
- sa_path_set_dlid(alt_path, ntohs(req_msg->alt_local_lid));
- sa_path_set_slid(alt_path, ntohs(req_msg->alt_remote_lid));
+ sa_path_set_dlid(alt_path,
+ IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
+ req_msg));
+ sa_path_set_slid(alt_path,
+ IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
+ req_msg));
} else {
- lid = opa_get_lid_from_gid(&req_msg->alt_local_gid);
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg));
sa_path_set_dlid(alt_path, lid);
- lid = opa_get_lid_from_gid(&req_msg->alt_remote_gid);
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg));
sa_path_set_slid(alt_path, lid);
}
}
@@ -1586,44 +1590,58 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
struct sa_path_rec *primary_path,
struct sa_path_rec *alt_path)
{
- primary_path->dgid = req_msg->primary_local_gid;
- primary_path->sgid = req_msg->primary_remote_gid;
- primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
- primary_path->hop_limit = req_msg->primary_hop_limit;
- primary_path->traffic_class = req_msg->primary_traffic_class;
+ primary_path->dgid =
+ *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg);
+ primary_path->sgid =
+ *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg);
+ primary_path->flow_label =
+ cpu_to_be32(IBA_GET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg));
+ primary_path->hop_limit = IBA_GET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg);
+ primary_path->traffic_class =
+ IBA_GET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg);
primary_path->reversible = 1;
- primary_path->pkey = req_msg->pkey;
- primary_path->sl = cm_req_get_primary_sl(req_msg);
+ primary_path->pkey =
+ cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
+ primary_path->sl = IBA_GET(CM_REQ_PRIMARY_SL, req_msg);
primary_path->mtu_selector = IB_SA_EQ;
- primary_path->mtu = cm_req_get_path_mtu(req_msg);
+ primary_path->mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
primary_path->rate_selector = IB_SA_EQ;
- primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
+ primary_path->rate = IBA_GET(CM_REQ_PRIMARY_PACKET_RATE, req_msg);
primary_path->packet_life_time_selector = IB_SA_EQ;
primary_path->packet_life_time =
- cm_req_get_primary_local_ack_timeout(req_msg);
+ IBA_GET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg);
primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
- primary_path->service_id = req_msg->service_id;
+ primary_path->service_id =
+ cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
if (sa_path_is_roce(primary_path))
primary_path->roce.route_resolved = false;
if (cm_req_has_alt_path(req_msg)) {
- alt_path->dgid = req_msg->alt_local_gid;
- alt_path->sgid = req_msg->alt_remote_gid;
- alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
- alt_path->hop_limit = req_msg->alt_hop_limit;
- alt_path->traffic_class = req_msg->alt_traffic_class;
+ alt_path->dgid = *IBA_GET_MEM_PTR(
+ CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg);
+ alt_path->sgid = *IBA_GET_MEM_PTR(
+ CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg);
+ alt_path->flow_label = cpu_to_be32(
+ IBA_GET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg));
+ alt_path->hop_limit =
+ IBA_GET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg);
+ alt_path->traffic_class =
+ IBA_GET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg);
alt_path->reversible = 1;
- alt_path->pkey = req_msg->pkey;
- alt_path->sl = cm_req_get_alt_sl(req_msg);
+ alt_path->pkey =
+ cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
+ alt_path->sl = IBA_GET(CM_REQ_ALTERNATE_SL, req_msg);
alt_path->mtu_selector = IB_SA_EQ;
- alt_path->mtu = cm_req_get_path_mtu(req_msg);
+ alt_path->mtu =
+ IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
alt_path->rate_selector = IB_SA_EQ;
- alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
+ alt_path->rate = IBA_GET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg);
alt_path->packet_life_time_selector = IB_SA_EQ;
alt_path->packet_life_time =
- cm_req_get_alt_local_ack_timeout(req_msg);
+ IBA_GET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg);
alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
- alt_path->service_id = req_msg->service_id;
+ alt_path->service_id =
+ cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
if (sa_path_is_roce(alt_path))
alt_path->roce.route_resolved = false;
@@ -1698,23 +1716,25 @@ static void cm_format_req_event(struct cm_work *work,
} else {
param->alternate_path = NULL;
}
- param->remote_ca_guid = req_msg->local_ca_guid;
- param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
- param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
+ param->remote_ca_guid =
+ cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
+ param->remote_qkey = IBA_GET(CM_REQ_LOCAL_Q_KEY, req_msg);
+ param->remote_qpn = IBA_GET(CM_REQ_LOCAL_QPN, req_msg);
param->qp_type = cm_req_get_qp_type(req_msg);
- param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
- param->responder_resources = cm_req_get_init_depth(req_msg);
- param->initiator_depth = cm_req_get_resp_res(req_msg);
+ param->starting_psn = IBA_GET(CM_REQ_STARTING_PSN, req_msg);
+ param->responder_resources = IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
+ param->initiator_depth = IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
param->local_cm_response_timeout =
- cm_req_get_remote_resp_timeout(req_msg);
- param->flow_control = cm_req_get_flow_ctrl(req_msg);
+ IBA_GET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg);
+ param->flow_control = IBA_GET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg);
param->remote_cm_response_timeout =
- cm_req_get_local_resp_timeout(req_msg);
- param->retry_count = cm_req_get_retry_count(req_msg);
- param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
- param->srq = cm_req_get_srq(req_msg);
+ IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg);
+ param->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
+ param->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
+ param->srq = IBA_GET(CM_REQ_SRQ, req_msg);
param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
- work->cm_event.private_data = &req_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_REQ_PRIVATE_DATA, req_msg);
}
static void cm_process_work(struct cm_id_private *cm_id_priv,
@@ -1748,13 +1768,16 @@ static void cm_format_mra(struct cm_mra_msg *mra_msg,
const void *private_data, u8 private_data_len)
{
cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
- cm_mra_set_msg_mraed(mra_msg, msg_mraed);
- mra_msg->local_comm_id = cm_id_priv->id.local_id;
- mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
- cm_mra_set_service_timeout(mra_msg, service_timeout);
+ IBA_SET(CM_MRA_MESSAGE_MRAED, mra_msg, msg_mraed);
+ IBA_SET(CM_MRA_LOCAL_COMM_ID, mra_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_MRA_REMOTE_COMM_ID, mra_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
+ IBA_SET(CM_MRA_SERVICE_TIMEOUT, mra_msg, service_timeout);
if (private_data && private_data_len)
- memcpy(mra_msg->private_data, private_data, private_data_len);
+ IBA_SET_MEM(CM_MRA_PRIVATE_DATA, mra_msg, private_data,
+ private_data_len);
}
static void cm_format_rej(struct cm_rej_msg *rej_msg,
@@ -1766,36 +1789,42 @@ static void cm_format_rej(struct cm_rej_msg *rej_msg,
u8 private_data_len)
{
cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
- rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
+ IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
switch(cm_id_priv->id.state) {
case IB_CM_REQ_RCVD:
- rej_msg->local_comm_id = 0;
- cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
+ IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, be32_to_cpu(0));
+ IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
break;
case IB_CM_MRA_REQ_SENT:
- rej_msg->local_comm_id = cm_id_priv->id.local_id;
- cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
+ IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
break;
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
- rej_msg->local_comm_id = cm_id_priv->id.local_id;
- cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
+ IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REP);
break;
default:
- rej_msg->local_comm_id = cm_id_priv->id.local_id;
- cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
+ IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg,
+ CM_MSG_RESPONSE_OTHER);
break;
}
- rej_msg->reason = cpu_to_be16(reason);
+ IBA_SET(CM_REJ_REASON, rej_msg, reason);
if (ari && ari_length) {
- cm_rej_set_reject_info_len(rej_msg, ari_length);
- memcpy(rej_msg->ari, ari, ari_length);
+ IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
+ IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
}
if (private_data && private_data_len)
- memcpy(rej_msg->private_data, private_data, private_data_len);
+ IBA_SET_MEM(CM_REJ_PRIVATE_DATA, rej_msg, private_data,
+ private_data_len);
}
static void cm_dup_req_handler(struct cm_work *work,
@@ -1855,7 +1884,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
spin_lock_irq(&cm.lock);
timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
if (timewait_info) {
- cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
spin_unlock_irq(&cm.lock);
if (cur_cm_id_priv) {
@@ -1869,7 +1898,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
cm_cleanup_timewait(cm_id_priv->timewait_info);
- cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
spin_unlock_irq(&cm.lock);
@@ -1885,8 +1914,9 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
}
/* Find matching listen request. */
- listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
- req_msg->service_id);
+ listen_cm_id_priv = cm_find_listen(
+ cm_id_priv->id.device,
+ cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)));
if (!listen_cm_id_priv) {
cm_cleanup_timewait(cm_id_priv->timewait_info);
spin_unlock_irq(&cm.lock);
@@ -1895,8 +1925,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
NULL, 0);
goto out;
}
- atomic_inc(&listen_cm_id_priv->refcount);
- atomic_inc(&cm_id_priv->refcount);
+ refcount_inc(&listen_cm_id_priv->refcount);
+ refcount_inc(&cm_id_priv->refcount);
cm_id_priv->id.state = IB_CM_REQ_RCVD;
atomic_inc(&cm_id_priv->work_count);
spin_unlock_irq(&cm.lock);
@@ -1911,24 +1941,32 @@ out:
*/
static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
{
- if (!cm_req_get_primary_subnet_local(req_msg)) {
- if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
- req_msg->primary_local_lid = ib_lid_be16(wc->slid);
- cm_req_set_primary_sl(req_msg, wc->sl);
+ if (!IBA_GET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg)) {
+ if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
+ req_msg)) == IB_LID_PERMISSIVE) {
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(ib_lid_be16(wc->slid)));
+ IBA_SET(CM_REQ_PRIMARY_SL, req_msg, wc->sl);
}
- if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
- req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
+ if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
+ req_msg)) == IB_LID_PERMISSIVE)
+ IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
+ wc->dlid_path_bits);
}
- if (!cm_req_get_alt_subnet_local(req_msg)) {
- if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
- req_msg->alt_local_lid = ib_lid_be16(wc->slid);
- cm_req_set_alt_sl(req_msg, wc->sl);
+ if (!IBA_GET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg)) {
+ if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
+ req_msg)) == IB_LID_PERMISSIVE) {
+ IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(ib_lid_be16(wc->slid)));
+ IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, wc->sl);
}
- if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
- req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
+ if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
+ req_msg)) == IB_LID_PERMISSIVE)
+ IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
+ wc->dlid_path_bits);
}
}
@@ -1948,7 +1986,8 @@ static int cm_req_handler(struct cm_work *work)
return PTR_ERR(cm_id);
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- cm_id_priv->id.remote_id = req_msg->local_comm_id;
+ cm_id_priv->id.remote_id =
+ cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
@@ -1960,9 +1999,12 @@ static int cm_req_handler(struct cm_work *work)
ret = PTR_ERR(cm_id_priv->timewait_info);
goto destroy;
}
- cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
- cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
- cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
+ cm_id_priv->timewait_info->work.remote_id =
+ cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
+ cm_id_priv->timewait_info->remote_ca_guid =
+ cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
+ cm_id_priv->timewait_info->remote_qpn =
+ cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
listen_cm_id_priv = cm_match_req(work, cm_id_priv);
if (!listen_cm_id_priv) {
@@ -1974,7 +2016,8 @@ static int cm_req_handler(struct cm_work *work)
cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
cm_id_priv->id.context = listen_cm_id_priv->id.context;
- cm_id_priv->id.service_id = req_msg->service_id;
+ cm_id_priv->id.service_id =
+ cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
cm_id_priv->id.service_mask = ~cpu_to_be64(0);
cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
@@ -1991,10 +2034,11 @@ static int cm_req_handler(struct cm_work *work)
work->path[0].rec_type =
sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
} else {
- cm_path_set_rec_type(work->port->cm_dev->ib_device,
- work->port->port_num,
- &work->path[0],
- &req_msg->primary_local_gid);
+ cm_path_set_rec_type(
+ work->port->cm_dev->ib_device, work->port->port_num,
+ &work->path[0],
+ IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID,
+ req_msg));
}
if (cm_req_has_alt_path(req_msg))
work->path[1].rec_type = work->path[0].rec_type;
@@ -2034,16 +2078,19 @@ static int cm_req_handler(struct cm_work *work)
}
cm_id_priv->tid = req_msg->hdr.tid;
cm_id_priv->timeout_ms = cm_convert_to_ms(
- cm_req_get_local_resp_timeout(req_msg));
- cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
- cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
- cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
- cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
- cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
- cm_id_priv->pkey = req_msg->pkey;
- cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
- cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
- cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
+ IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg));
+ cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg);
+ cm_id_priv->remote_qpn =
+ cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
+ cm_id_priv->initiator_depth =
+ IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
+ cm_id_priv->responder_resources =
+ IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
+ cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
+ cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
+ cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
+ cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
+ cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
@@ -2052,7 +2099,7 @@ static int cm_req_handler(struct cm_work *work)
return 0;
rejected:
- atomic_dec(&cm_id_priv->refcount);
+ refcount_dec(&cm_id_priv->refcount);
cm_deref_id(listen_cm_id_priv);
free_timeinfo:
kfree(cm_id_priv->timewait_info);
@@ -2066,29 +2113,35 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
struct ib_cm_rep_param *param)
{
cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
- rep_msg->local_comm_id = cm_id_priv->id.local_id;
- rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
- cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
- rep_msg->resp_resources = param->responder_resources;
- cm_rep_set_target_ack_delay(rep_msg,
- cm_id_priv->av.port->cm_dev->ack_delay);
- cm_rep_set_failover(rep_msg, param->failover_accepted);
- cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
- rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
+ IBA_SET(CM_REP_LOCAL_COMM_ID, rep_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_REP_REMOTE_COMM_ID, rep_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
+ IBA_SET(CM_REP_STARTING_PSN, rep_msg, param->starting_psn);
+ IBA_SET(CM_REP_RESPONDER_RESOURCES, rep_msg,
+ param->responder_resources);
+ IBA_SET(CM_REP_TARGET_ACK_DELAY, rep_msg,
+ cm_id_priv->av.port->cm_dev->ack_delay);
+ IBA_SET(CM_REP_FAILOVER_ACCEPTED, rep_msg, param->failover_accepted);
+ IBA_SET(CM_REP_RNR_RETRY_COUNT, rep_msg, param->rnr_retry_count);
+ IBA_SET(CM_REP_LOCAL_CA_GUID, rep_msg,
+ be64_to_cpu(cm_id_priv->id.device->node_guid));
if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
- rep_msg->initiator_depth = param->initiator_depth;
- cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
- cm_rep_set_srq(rep_msg, param->srq);
- cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
+ IBA_SET(CM_REP_INITIATOR_DEPTH, rep_msg,
+ param->initiator_depth);
+ IBA_SET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg,
+ param->flow_control);
+ IBA_SET(CM_REP_SRQ, rep_msg, param->srq);
+ IBA_SET(CM_REP_LOCAL_QPN, rep_msg, param->qp_num);
} else {
- cm_rep_set_srq(rep_msg, 1);
- cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
+ IBA_SET(CM_REP_SRQ, rep_msg, 1);
+ IBA_SET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, rep_msg, param->qp_num);
}
if (param->private_data && param->private_data_len)
- memcpy(rep_msg->private_data, param->private_data,
- param->private_data_len);
+ IBA_SET_MEM(CM_REP_PRIVATE_DATA, rep_msg, param->private_data,
+ param->private_data_len);
}
int ib_send_cm_rep(struct ib_cm_id *cm_id,
@@ -2134,7 +2187,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
cm_id_priv->msg = msg;
cm_id_priv->initiator_depth = param->initiator_depth;
cm_id_priv->responder_resources = param->responder_resources;
- cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
+ cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2148,11 +2201,14 @@ static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
u8 private_data_len)
{
cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
- rtu_msg->local_comm_id = cm_id_priv->id.local_id;
- rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
+ IBA_SET(CM_RTU_LOCAL_COMM_ID, rtu_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_RTU_REMOTE_COMM_ID, rtu_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
if (private_data && private_data_len)
- memcpy(rtu_msg->private_data, private_data, private_data_len);
+ IBA_SET_MEM(CM_RTU_PRIVATE_DATA, rtu_msg, private_data,
+ private_data_len);
}
int ib_send_cm_rtu(struct ib_cm_id *cm_id,
@@ -2215,18 +2271,20 @@ static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
param = &work->cm_event.param.rep_rcvd;
- param->remote_ca_guid = rep_msg->local_ca_guid;
- param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
+ param->remote_ca_guid =
+ cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg));
+ param->remote_qkey = IBA_GET(CM_REP_LOCAL_Q_KEY, rep_msg);
param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
- param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
- param->responder_resources = rep_msg->initiator_depth;
- param->initiator_depth = rep_msg->resp_resources;
- param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
- param->failover_accepted = cm_rep_get_failover(rep_msg);
- param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
- param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
- param->srq = cm_rep_get_srq(rep_msg);
- work->cm_event.private_data = &rep_msg->private_data;
+ param->starting_psn = IBA_GET(CM_REP_STARTING_PSN, rep_msg);
+ param->responder_resources = IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg);
+ param->initiator_depth = IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg);
+ param->target_ack_delay = IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg);
+ param->failover_accepted = IBA_GET(CM_REP_FAILOVER_ACCEPTED, rep_msg);
+ param->flow_control = IBA_GET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg);
+ param->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
+ param->srq = IBA_GET(CM_REP_SRQ, rep_msg);
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_REP_PRIVATE_DATA, rep_msg);
}
static void cm_dup_rep_handler(struct cm_work *work)
@@ -2237,8 +2295,9 @@ static void cm_dup_rep_handler(struct cm_work *work)
int ret;
rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
- rep_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)),
+ cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg)));
if (!cm_id_priv)
return;
@@ -2282,11 +2341,12 @@ static int cm_rep_handler(struct cm_work *work)
struct cm_timewait_info *timewait_info;
rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
if (!cm_id_priv) {
cm_dup_rep_handler(work);
pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
- be32_to_cpu(rep_msg->remote_comm_id));
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
return -EINVAL;
}
@@ -2300,15 +2360,18 @@ static int cm_rep_handler(struct cm_work *work)
default:
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
- pr_debug("%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
- __func__, cm_id_priv->id.state,
- be32_to_cpu(rep_msg->local_comm_id),
- be32_to_cpu(rep_msg->remote_comm_id));
+ pr_debug(
+ "%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
+ __func__, cm_id_priv->id.state,
+ IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
goto error;
}
- cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
- cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
+ cm_id_priv->timewait_info->work.remote_id =
+ cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg));
+ cm_id_priv->timewait_info->remote_ca_guid =
+ cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg));
cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
spin_lock(&cm.lock);
@@ -2318,7 +2381,7 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
pr_debug("%s: Failed to insert remote id %d\n", __func__,
- be32_to_cpu(rep_msg->remote_comm_id));
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
goto error;
}
/* Check for a stale connection. */
@@ -2327,7 +2390,7 @@ static int cm_rep_handler(struct cm_work *work)
rb_erase(&cm_id_priv->timewait_info->remote_id_node,
&cm.remote_id_table);
cm_id_priv->timewait_info->inserted_remote_id = 0;
- cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
spin_unlock(&cm.lock);
@@ -2336,9 +2399,10 @@ static int cm_rep_handler(struct cm_work *work)
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
- pr_debug("%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
- __func__, be32_to_cpu(rep_msg->local_comm_id),
- be32_to_cpu(rep_msg->remote_comm_id));
+ pr_debug(
+ "%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
+ __func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
if (cur_cm_id_priv) {
cm_id = &cur_cm_id_priv->id;
@@ -2351,13 +2415,17 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock(&cm.lock);
cm_id_priv->id.state = IB_CM_REP_RCVD;
- cm_id_priv->id.remote_id = rep_msg->local_comm_id;
+ cm_id_priv->id.remote_id =
+ cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg));
cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
- cm_id_priv->initiator_depth = rep_msg->resp_resources;
- cm_id_priv->responder_resources = rep_msg->initiator_depth;
- cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
- cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
- cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
+ cm_id_priv->initiator_depth =
+ IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg);
+ cm_id_priv->responder_resources =
+ IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg);
+ cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
+ cm_id_priv->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
+ cm_id_priv->target_ack_delay =
+ IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg);
cm_id_priv->av.timeout =
cm_ack_timeout(cm_id_priv->target_ack_delay,
cm_id_priv->av.timeout - 1);
@@ -2423,12 +2491,14 @@ static int cm_rtu_handler(struct cm_work *work)
int ret;
rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
- rtu_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_RTU_REMOTE_COMM_ID, rtu_msg)),
+ cpu_to_be32(IBA_GET(CM_RTU_LOCAL_COMM_ID, rtu_msg)));
if (!cm_id_priv)
return -EINVAL;
- work->cm_event.private_data = &rtu_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_RTU_PRIVATE_DATA, rtu_msg);
spin_lock_irq(&cm_id_priv->lock);
if (cm_id_priv->id.state != IB_CM_REP_SENT &&
@@ -2463,12 +2533,16 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
{
cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
cm_form_tid(cm_id_priv));
- dreq_msg->local_comm_id = cm_id_priv->id.local_id;
- dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
- cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
+ IBA_SET(CM_DREQ_LOCAL_COMM_ID, dreq_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_DREQ_REMOTE_COMM_ID, dreq_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
+ IBA_SET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg,
+ be32_to_cpu(cm_id_priv->remote_qpn));
if (private_data && private_data_len)
- memcpy(dreq_msg->private_data, private_data, private_data_len);
+ IBA_SET_MEM(CM_DREQ_PRIVATE_DATA, dreq_msg, private_data,
+ private_data_len);
}
int ib_send_cm_dreq(struct ib_cm_id *cm_id,
@@ -2528,11 +2602,14 @@ static void cm_format_drep(struct cm_drep_msg *drep_msg,
u8 private_data_len)
{
cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
- drep_msg->local_comm_id = cm_id_priv->id.local_id;
- drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
+ IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
if (private_data && private_data_len)
- memcpy(drep_msg->private_data, private_data, private_data_len);
+ IBA_SET_MEM(CM_DREP_PRIVATE_DATA, drep_msg, private_data,
+ private_data_len);
}
int ib_send_cm_drep(struct ib_cm_id *cm_id,
@@ -2600,8 +2677,10 @@ static int cm_issue_drep(struct cm_port *port,
drep_msg = (struct cm_drep_msg *) msg->mad;
cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
- drep_msg->remote_comm_id = dreq_msg->local_comm_id;
- drep_msg->local_comm_id = dreq_msg->remote_comm_id;
+ IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg,
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg));
+ IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
+ IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
@@ -2618,22 +2697,26 @@ static int cm_dreq_handler(struct cm_work *work)
int ret;
dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
- dreq_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)),
+ cpu_to_be32(IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg)));
if (!cm_id_priv) {
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
counter[CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
- pr_debug("%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
- __func__, be32_to_cpu(dreq_msg->local_comm_id),
- be32_to_cpu(dreq_msg->remote_comm_id));
+ pr_debug(
+ "%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
+ __func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+ IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
return -EINVAL;
}
- work->cm_event.private_data = &dreq_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_DREQ_PRIVATE_DATA, dreq_msg);
spin_lock_irq(&cm_id_priv->lock);
- if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
+ if (cm_id_priv->local_qpn !=
+ cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg)))
goto unlock;
switch (cm_id_priv->id.state) {
@@ -2699,12 +2782,14 @@ static int cm_drep_handler(struct cm_work *work)
int ret;
drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
- drep_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_DREP_REMOTE_COMM_ID, drep_msg)),
+ cpu_to_be32(IBA_GET(CM_DREP_LOCAL_COMM_ID, drep_msg)));
if (!cm_id_priv)
return -EINVAL;
- work->cm_event.private_data = &drep_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_DREP_PRIVATE_DATA, drep_msg);
spin_lock_irq(&cm_id_priv->lock);
if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
@@ -2800,10 +2885,11 @@ static void cm_format_rej_event(struct cm_work *work)
rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
param = &work->cm_event.param.rej_rcvd;
- param->ari = rej_msg->ari;
- param->ari_length = cm_rej_get_reject_info_len(rej_msg);
- param->reason = __be16_to_cpu(rej_msg->reason);
- work->cm_event.private_data = &rej_msg->private_data;
+ param->ari = IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg);
+ param->ari_length = IBA_GET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg);
+ param->reason = IBA_GET(CM_REJ_REASON, rej_msg);
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_REJ_PRIVATE_DATA, rej_msg);
}
static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
@@ -2812,29 +2898,29 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
struct cm_id_private *cm_id_priv;
__be32 remote_id;
- remote_id = rej_msg->local_comm_id;
+ remote_id = cpu_to_be32(IBA_GET(CM_REJ_LOCAL_COMM_ID, rej_msg));
- if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
+ if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT) {
spin_lock_irq(&cm.lock);
- timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
- remote_id);
+ timewait_info = cm_find_remote_id(
+ *((__be64 *)IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg)),
+ remote_id);
if (!timewait_info) {
spin_unlock_irq(&cm.lock);
return NULL;
}
- cm_id_priv = xa_load(&cm.local_id_table,
- cm_local_id(timewait_info->work.local_id));
- if (cm_id_priv) {
- if (cm_id_priv->id.remote_id == remote_id)
- atomic_inc(&cm_id_priv->refcount);
- else
- cm_id_priv = NULL;
- }
+ cm_id_priv =
+ cm_acquire_id(timewait_info->work.local_id, remote_id);
spin_unlock_irq(&cm.lock);
- } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
- cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
+ } else if (IBA_GET(CM_REJ_MESSAGE_REJECTED, rej_msg) ==
+ CM_MSG_RESPONSE_REQ)
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
+ 0);
else
- cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
+ remote_id);
return cm_id_priv;
}
@@ -2862,7 +2948,7 @@ static int cm_rej_handler(struct cm_work *work)
/* fall through */
case IB_CM_REQ_RCVD:
case IB_CM_MRA_REQ_SENT:
- if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
+ if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_STALE_CONN)
cm_enter_timewait(cm_id_priv);
else
cm_reset_to_idle(cm_id_priv);
@@ -2992,13 +3078,16 @@ EXPORT_SYMBOL(ib_send_cm_mra);
static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
{
- switch (cm_mra_get_msg_mraed(mra_msg)) {
+ switch (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg)) {
case CM_MSG_RESPONSE_REQ:
- return cm_acquire_id(mra_msg->remote_comm_id, 0);
+ return cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)),
+ 0);
case CM_MSG_RESPONSE_REP:
case CM_MSG_RESPONSE_OTHER:
- return cm_acquire_id(mra_msg->remote_comm_id,
- mra_msg->local_comm_id);
+ return cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)),
+ cpu_to_be32(IBA_GET(CM_MRA_LOCAL_COMM_ID, mra_msg)));
default:
return NULL;
}
@@ -3015,30 +3104,34 @@ static int cm_mra_handler(struct cm_work *work)
if (!cm_id_priv)
return -EINVAL;
- work->cm_event.private_data = &mra_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_MRA_PRIVATE_DATA, mra_msg);
work->cm_event.param.mra_rcvd.service_timeout =
- cm_mra_get_service_timeout(mra_msg);
- timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
+ IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg);
+ timeout = cm_convert_to_ms(IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg)) +
cm_convert_to_ms(cm_id_priv->av.timeout);
spin_lock_irq(&cm_id_priv->lock);
switch (cm_id_priv->id.state) {
case IB_CM_REQ_SENT:
- if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
+ if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
+ CM_MSG_RESPONSE_REQ ||
ib_modify_mad(cm_id_priv->av.port->mad_agent,
cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
break;
case IB_CM_REP_SENT:
- if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
+ if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
+ CM_MSG_RESPONSE_REP ||
ib_modify_mad(cm_id_priv->av.port->mad_agent,
cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
break;
case IB_CM_ESTABLISHED:
- if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
+ if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
+ CM_MSG_RESPONSE_OTHER ||
cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
ib_modify_mad(cm_id_priv->av.port->mad_agent,
cm_id_priv->msg, timeout)) {
@@ -3080,117 +3173,23 @@ out:
return -EINVAL;
}
-static void cm_format_lap(struct cm_lap_msg *lap_msg,
- struct cm_id_private *cm_id_priv,
- struct sa_path_rec *alternate_path,
- const void *private_data,
- u8 private_data_len)
-{
- bool alt_ext = false;
-
- if (alternate_path->rec_type == SA_PATH_REC_TYPE_OPA)
- alt_ext = opa_is_extended_lid(alternate_path->opa.dlid,
- alternate_path->opa.slid);
- cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
- cm_form_tid(cm_id_priv));
- lap_msg->local_comm_id = cm_id_priv->id.local_id;
- lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
- cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
- /* todo: need remote CM response timeout */
- cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
- lap_msg->alt_local_lid =
- htons(ntohl(sa_path_get_slid(alternate_path)));
- lap_msg->alt_remote_lid =
- htons(ntohl(sa_path_get_dlid(alternate_path)));
- lap_msg->alt_local_gid = alternate_path->sgid;
- lap_msg->alt_remote_gid = alternate_path->dgid;
- if (alt_ext) {
- lap_msg->alt_local_gid.global.interface_id
- = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.slid));
- lap_msg->alt_remote_gid.global.interface_id
- = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.dlid));
- }
- cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
- cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
- lap_msg->alt_hop_limit = alternate_path->hop_limit;
- cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
- cm_lap_set_sl(lap_msg, alternate_path->sl);
- cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
- cm_lap_set_local_ack_timeout(lap_msg,
- cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
- alternate_path->packet_life_time));
-
- if (private_data && private_data_len)
- memcpy(lap_msg->private_data, private_data, private_data_len);
-}
-
-int ib_send_cm_lap(struct ib_cm_id *cm_id,
- struct sa_path_rec *alternate_path,
- const void *private_data,
- u8 private_data_len)
-{
- struct cm_id_private *cm_id_priv;
- struct ib_mad_send_buf *msg;
- unsigned long flags;
- int ret;
-
- if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
- return -EINVAL;
-
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_ESTABLISHED ||
- (cm_id->lap_state != IB_CM_LAP_UNINIT &&
- cm_id->lap_state != IB_CM_LAP_IDLE)) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av,
- cm_id_priv);
- if (ret)
- goto out;
- cm_id_priv->alt_av.timeout =
- cm_ack_timeout(cm_id_priv->target_ack_delay,
- cm_id_priv->alt_av.timeout - 1);
-
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
-
- cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
- alternate_path, private_data, private_data_len);
- msg->timeout_ms = cm_id_priv->timeout_ms;
- msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
-
- ret = ib_post_send_mad(msg, NULL);
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- return ret;
- }
-
- cm_id->lap_state = IB_CM_LAP_SENT;
- cm_id_priv->msg = msg;
-
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- return ret;
-}
-EXPORT_SYMBOL(ib_send_cm_lap);
-
static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg,
struct sa_path_rec *path)
{
u32 lid;
if (path->rec_type != SA_PATH_REC_TYPE_OPA) {
- sa_path_set_dlid(path, ntohs(lap_msg->alt_local_lid));
- sa_path_set_slid(path, ntohs(lap_msg->alt_remote_lid));
+ sa_path_set_dlid(path, IBA_GET(CM_LAP_ALTERNATE_LOCAL_PORT_LID,
+ lap_msg));
+ sa_path_set_slid(path, IBA_GET(CM_LAP_ALTERNATE_REMOTE_PORT_LID,
+ lap_msg));
} else {
- lid = opa_get_lid_from_gid(&lap_msg->alt_local_gid);
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg));
sa_path_set_dlid(path, lid);
- lid = opa_get_lid_from_gid(&lap_msg->alt_remote_gid);
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg));
sa_path_set_slid(path, lid);
}
}
@@ -3199,20 +3198,23 @@ static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
struct sa_path_rec *path,
struct cm_lap_msg *lap_msg)
{
- path->dgid = lap_msg->alt_local_gid;
- path->sgid = lap_msg->alt_remote_gid;
- path->flow_label = cm_lap_get_flow_label(lap_msg);
- path->hop_limit = lap_msg->alt_hop_limit;
- path->traffic_class = cm_lap_get_traffic_class(lap_msg);
+ path->dgid = *IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg);
+ path->sgid =
+ *IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg);
+ path->flow_label =
+ cpu_to_be32(IBA_GET(CM_LAP_ALTERNATE_FLOW_LABEL, lap_msg));
+ path->hop_limit = IBA_GET(CM_LAP_ALTERNATE_HOP_LIMIT, lap_msg);
+ path->traffic_class = IBA_GET(CM_LAP_ALTERNATE_TRAFFIC_CLASS, lap_msg);
path->reversible = 1;
path->pkey = cm_id_priv->pkey;
- path->sl = cm_lap_get_sl(lap_msg);
+ path->sl = IBA_GET(CM_LAP_ALTERNATE_SL, lap_msg);
path->mtu_selector = IB_SA_EQ;
path->mtu = cm_id_priv->path_mtu;
path->rate_selector = IB_SA_EQ;
- path->rate = cm_lap_get_packet_rate(lap_msg);
+ path->rate = IBA_GET(CM_LAP_ALTERNATE_PACKET_RATE, lap_msg);
path->packet_life_time_selector = IB_SA_EQ;
- path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
+ path->packet_life_time =
+ IBA_GET(CM_LAP_ALTERNATE_LOCAL_ACK_TIMEOUT, lap_msg);
path->packet_life_time -= (path->packet_life_time > 0);
cm_format_path_lid_from_lap(lap_msg, path);
}
@@ -3234,20 +3236,22 @@ static int cm_lap_handler(struct cm_work *work)
/* todo: verify LAP request and send reject APR if invalid. */
lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
- lap_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_LAP_REMOTE_COMM_ID, lap_msg)),
+ cpu_to_be32(IBA_GET(CM_LAP_LOCAL_COMM_ID, lap_msg)));
if (!cm_id_priv)
return -EINVAL;
param = &work->cm_event.param.lap_rcvd;
memset(&work->path[0], 0, sizeof(work->path[1]));
cm_path_set_rec_type(work->port->cm_dev->ib_device,
- work->port->port_num,
- &work->path[0],
- &lap_msg->alt_local_gid);
+ work->port->port_num, &work->path[0],
+ IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID,
+ lap_msg));
param->alternate_path = &work->path[0];
cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
- work->cm_event.private_data = &lap_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_LAP_PRIVATE_DATA, lap_msg);
spin_lock_irq(&cm_id_priv->lock);
if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
@@ -3312,72 +3316,6 @@ deref: cm_deref_id(cm_id_priv);
return -EINVAL;
}
-static void cm_format_apr(struct cm_apr_msg *apr_msg,
- struct cm_id_private *cm_id_priv,
- enum ib_cm_apr_status status,
- void *info,
- u8 info_length,
- const void *private_data,
- u8 private_data_len)
-{
- cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
- apr_msg->local_comm_id = cm_id_priv->id.local_id;
- apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
- apr_msg->ap_status = (u8) status;
-
- if (info && info_length) {
- apr_msg->info_length = info_length;
- memcpy(apr_msg->info, info, info_length);
- }
-
- if (private_data && private_data_len)
- memcpy(apr_msg->private_data, private_data, private_data_len);
-}
-
-int ib_send_cm_apr(struct ib_cm_id *cm_id,
- enum ib_cm_apr_status status,
- void *info,
- u8 info_length,
- const void *private_data,
- u8 private_data_len)
-{
- struct cm_id_private *cm_id_priv;
- struct ib_mad_send_buf *msg;
- unsigned long flags;
- int ret;
-
- if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
- (info && info_length > IB_CM_APR_INFO_LENGTH))
- return -EINVAL;
-
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_ESTABLISHED ||
- (cm_id->lap_state != IB_CM_LAP_RCVD &&
- cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
-
- cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
- info, info_length, private_data, private_data_len);
- ret = ib_post_send_mad(msg, NULL);
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- return ret;
- }
-
- cm_id->lap_state = IB_CM_LAP_IDLE;
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- return ret;
-}
-EXPORT_SYMBOL(ib_send_cm_apr);
-
static int cm_apr_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
@@ -3392,15 +3330,20 @@ static int cm_apr_handler(struct cm_work *work)
return -EINVAL;
apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
- apr_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_APR_REMOTE_COMM_ID, apr_msg)),
+ cpu_to_be32(IBA_GET(CM_APR_LOCAL_COMM_ID, apr_msg)));
if (!cm_id_priv)
return -EINVAL; /* Unmatched reply. */
- work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
- work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
- work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
- work->cm_event.private_data = &apr_msg->private_data;
+ work->cm_event.param.apr_rcvd.ap_status =
+ IBA_GET(CM_APR_AR_STATUS, apr_msg);
+ work->cm_event.param.apr_rcvd.apr_info =
+ IBA_GET_MEM_PTR(CM_APR_ADDITIONAL_INFORMATION, apr_msg);
+ work->cm_event.param.apr_rcvd.info_len =
+ IBA_GET(CM_APR_ADDITIONAL_INFORMATION_LENGTH, apr_msg);
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_APR_PRIVATE_DATA, apr_msg);
spin_lock_irq(&cm_id_priv->lock);
if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
@@ -3434,7 +3377,7 @@ static int cm_timewait_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv;
int ret;
- timewait_info = (struct cm_timewait_info *)work;
+ timewait_info = container_of(work, struct cm_timewait_info, work);
spin_lock_irq(&cm.lock);
list_del(&timewait_info->list);
spin_unlock_irq(&cm.lock);
@@ -3472,13 +3415,16 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
{
cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
cm_form_tid(cm_id_priv));
- sidr_req_msg->request_id = cm_id_priv->id.local_id;
- sidr_req_msg->pkey = param->path->pkey;
- sidr_req_msg->service_id = param->service_id;
+ IBA_SET(CM_SIDR_REQ_REQUESTID, sidr_req_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg,
+ be16_to_cpu(param->path->pkey));
+ IBA_SET(CM_SIDR_REQ_SERVICEID, sidr_req_msg,
+ be64_to_cpu(param->service_id));
if (param->private_data && param->private_data_len)
- memcpy(sidr_req_msg->private_data, param->private_data,
- param->private_data_len);
+ IBA_SET_MEM(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg,
+ param->private_data, param->private_data_len);
}
int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
@@ -3542,13 +3488,15 @@ static void cm_format_sidr_req_event(struct cm_work *work,
sidr_req_msg = (struct cm_sidr_req_msg *)
work->mad_recv_wc->recv_buf.mad;
param = &work->cm_event.param.sidr_req_rcvd;
- param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
+ param->pkey = IBA_GET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg);
param->listen_id = listen_id;
- param->service_id = sidr_req_msg->service_id;
+ param->service_id =
+ cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
param->bth_pkey = cm_get_bth_pkey(work);
param->port = work->port->port_num;
param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr;
- work->cm_event.private_data = &sidr_req_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg);
}
static int cm_sidr_req_handler(struct cm_work *work)
@@ -3576,7 +3524,8 @@ static int cm_sidr_req_handler(struct cm_work *work)
if (ret)
goto out;
- cm_id_priv->id.remote_id = sidr_req_msg->request_id;
+ cm_id_priv->id.remote_id =
+ cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
cm_id_priv->tid = sidr_req_msg->hdr.tid;
atomic_inc(&cm_id_priv->work_count);
@@ -3589,20 +3538,22 @@ static int cm_sidr_req_handler(struct cm_work *work)
goto out; /* Duplicate message. */
}
cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
- cur_cm_id_priv = cm_find_listen(cm_id->device,
- sidr_req_msg->service_id);
+ cur_cm_id_priv = cm_find_listen(
+ cm_id->device,
+ cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)));
if (!cur_cm_id_priv) {
spin_unlock_irq(&cm.lock);
cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
goto out; /* No match. */
}
- atomic_inc(&cur_cm_id_priv->refcount);
- atomic_inc(&cm_id_priv->refcount);
+ refcount_inc(&cur_cm_id_priv->refcount);
+ refcount_inc(&cm_id_priv->refcount);
spin_unlock_irq(&cm.lock);
cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
cm_id_priv->id.context = cur_cm_id_priv->id.context;
- cm_id_priv->id.service_id = sidr_req_msg->service_id;
+ cm_id_priv->id.service_id =
+ cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
cm_id_priv->id.service_mask = ~cpu_to_be64(0);
cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
@@ -3620,18 +3571,21 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
{
cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
cm_id_priv->tid);
- sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
- sidr_rep_msg->status = param->status;
- cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
- sidr_rep_msg->service_id = cm_id_priv->id.service_id;
- sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
+ IBA_SET(CM_SIDR_REP_REQUESTID, sidr_rep_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
+ IBA_SET(CM_SIDR_REP_STATUS, sidr_rep_msg, param->status);
+ IBA_SET(CM_SIDR_REP_QPN, sidr_rep_msg, param->qp_num);
+ IBA_SET(CM_SIDR_REP_SERVICEID, sidr_rep_msg,
+ be64_to_cpu(cm_id_priv->id.service_id));
+ IBA_SET(CM_SIDR_REP_Q_KEY, sidr_rep_msg, param->qkey);
if (param->info && param->info_length)
- memcpy(sidr_rep_msg->info, param->info, param->info_length);
+ IBA_SET_MEM(CM_SIDR_REP_ADDITIONAL_INFORMATION, sidr_rep_msg,
+ param->info, param->info_length);
if (param->private_data && param->private_data_len)
- memcpy(sidr_rep_msg->private_data, param->private_data,
- param->private_data_len);
+ IBA_SET_MEM(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg,
+ param->private_data, param->private_data_len);
}
int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
@@ -3691,13 +3645,16 @@ static void cm_format_sidr_rep_event(struct cm_work *work,
sidr_rep_msg = (struct cm_sidr_rep_msg *)
work->mad_recv_wc->recv_buf.mad;
param = &work->cm_event.param.sidr_rep_rcvd;
- param->status = sidr_rep_msg->status;
- param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
- param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
- param->info = &sidr_rep_msg->info;
- param->info_len = sidr_rep_msg->info_length;
+ param->status = IBA_GET(CM_SIDR_REP_STATUS, sidr_rep_msg);
+ param->qkey = IBA_GET(CM_SIDR_REP_Q_KEY, sidr_rep_msg);
+ param->qpn = IBA_GET(CM_SIDR_REP_QPN, sidr_rep_msg);
+ param->info = IBA_GET_MEM_PTR(CM_SIDR_REP_ADDITIONAL_INFORMATION,
+ sidr_rep_msg);
+ param->info_len = IBA_GET(CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH,
+ sidr_rep_msg);
param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
- work->cm_event.private_data = &sidr_rep_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg);
}
static int cm_sidr_rep_handler(struct cm_work *work)
@@ -3707,7 +3664,8 @@ static int cm_sidr_rep_handler(struct cm_work *work)
sidr_rep_msg = (struct cm_sidr_rep_msg *)
work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_SIDR_REP_REQUESTID, sidr_rep_msg)), 0);
if (!cm_id_priv)
return -EINVAL; /* Unmatched reply. */
@@ -4399,6 +4357,7 @@ error2:
error1:
port_modify.set_port_cap_mask = 0;
port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
+ kfree(port);
while (--i) {
if (!rdma_cap_ib_cm(ib_device, i))
continue;
@@ -4407,6 +4366,7 @@ error1:
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
ib_unregister_mad_agent(port->mad_agent);
cm_remove_port_fs(port);
+ kfree(port);
}
free:
kfree(cm_dev);
@@ -4460,6 +4420,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
spin_unlock_irq(&cm.state_lock);
ib_unregister_mad_agent(cur_mad_agent);
cm_remove_port_fs(port);
+ kfree(port);
}
kfree(cm_dev);
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 3d16d614aff6..0cc40656b5c5 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -1,39 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2004, 2011 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING the madirectory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use source and binary forms, with or
- * withmodification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retathe above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE
- * SOFTWARE.
+ * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
*/
-#if !defined(CM_MSGS_H)
+#ifndef CM_MSGS_H
#define CM_MSGS_H
+#include <rdma/ibta_vol1_c12.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_cm.h>
@@ -44,120 +19,14 @@
#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */
-struct cm_req_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 rsvd4;
- __be64 service_id;
- __be64 local_ca_guid;
- __be32 rsvd24;
- __be32 local_qkey;
- /* local QPN:24, responder resources:8 */
- __be32 offset32;
- /* local EECN:24, initiator depth:8 */
- __be32 offset36;
- /*
- * remote EECN:24, remote CM response timeout:5,
- * transport service type:2, end-to-end flow control:1
- */
- __be32 offset40;
- /* starting PSN:24, local CM response timeout:5, retry count:3 */
- __be32 offset44;
- __be16 pkey;
- /* path MTU:4, RDC exists:1, RNR retry count:3. */
- u8 offset50;
- /* max CM Retries:4, SRQ:1, extended transport type:3 */
- u8 offset51;
-
- __be16 primary_local_lid;
- __be16 primary_remote_lid;
- union ib_gid primary_local_gid;
- union ib_gid primary_remote_gid;
- /* flow label:20, rsvd:6, packet rate:6 */
- __be32 primary_offset88;
- u8 primary_traffic_class;
- u8 primary_hop_limit;
- /* SL:4, subnet local:1, rsvd:3 */
- u8 primary_offset94;
- /* local ACK timeout:5, rsvd:3 */
- u8 primary_offset95;
-
- __be16 alt_local_lid;
- __be16 alt_remote_lid;
- union ib_gid alt_local_gid;
- union ib_gid alt_remote_gid;
- /* flow label:20, rsvd:6, packet rate:6 */
- __be32 alt_offset132;
- u8 alt_traffic_class;
- u8 alt_hop_limit;
- /* SL:4, subnet local:1, rsvd:3 */
- u8 alt_offset138;
- /* local ACK timeout:5, rsvd:3 */
- u8 alt_offset139;
-
- u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)];
-
-} __packed;
-
-static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg)
-{
- return cpu_to_be32(be32_to_cpu(req_msg->offset32) >> 8);
-}
-
-static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, __be32 qpn)
-{
- req_msg->offset32 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
- (be32_to_cpu(req_msg->offset32) &
- 0x000000FF));
-}
-
-static inline u8 cm_req_get_resp_res(struct cm_req_msg *req_msg)
-{
- return (u8) be32_to_cpu(req_msg->offset32);
-}
-
-static inline void cm_req_set_resp_res(struct cm_req_msg *req_msg, u8 resp_res)
-{
- req_msg->offset32 = cpu_to_be32(resp_res |
- (be32_to_cpu(req_msg->offset32) &
- 0xFFFFFF00));
-}
-
-static inline u8 cm_req_get_init_depth(struct cm_req_msg *req_msg)
-{
- return (u8) be32_to_cpu(req_msg->offset36);
-}
-
-static inline void cm_req_set_init_depth(struct cm_req_msg *req_msg,
- u8 init_depth)
-{
- req_msg->offset36 = cpu_to_be32(init_depth |
- (be32_to_cpu(req_msg->offset36) &
- 0xFFFFFF00));
-}
-
-static inline u8 cm_req_get_remote_resp_timeout(struct cm_req_msg *req_msg)
-{
- return (u8) ((be32_to_cpu(req_msg->offset40) & 0xF8) >> 3);
-}
-
-static inline void cm_req_set_remote_resp_timeout(struct cm_req_msg *req_msg,
- u8 resp_timeout)
-{
- req_msg->offset40 = cpu_to_be32((resp_timeout << 3) |
- (be32_to_cpu(req_msg->offset40) &
- 0xFFFFFF07));
-}
-
static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
{
- u8 transport_type = (u8) (be32_to_cpu(req_msg->offset40) & 0x06) >> 1;
+ u8 transport_type = IBA_GET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg);
switch(transport_type) {
case 0: return IB_QPT_RC;
case 1: return IB_QPT_UC;
case 3:
- switch (req_msg->offset51 & 0x7) {
+ switch (IBA_GET(CM_REQ_EXTENDED_TRANSPORT_TYPE, req_msg)) {
case 1: return IB_QPT_XRC_TGT;
default: return 0;
}
@@ -170,240 +39,17 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
{
switch(qp_type) {
case IB_QPT_UC:
- req_msg->offset40 = cpu_to_be32((be32_to_cpu(
- req_msg->offset40) &
- 0xFFFFFFF9) | 0x2);
+ IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 1);
break;
case IB_QPT_XRC_INI:
- req_msg->offset40 = cpu_to_be32((be32_to_cpu(
- req_msg->offset40) &
- 0xFFFFFFF9) | 0x6);
- req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1;
+ IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 3);
+ IBA_SET(CM_REQ_EXTENDED_TRANSPORT_TYPE, req_msg, 1);
break;
default:
- req_msg->offset40 = cpu_to_be32(be32_to_cpu(
- req_msg->offset40) &
- 0xFFFFFFF9);
+ IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 0);
}
}
-static inline u8 cm_req_get_flow_ctrl(struct cm_req_msg *req_msg)
-{
- return be32_to_cpu(req_msg->offset40) & 0x1;
-}
-
-static inline void cm_req_set_flow_ctrl(struct cm_req_msg *req_msg,
- u8 flow_ctrl)
-{
- req_msg->offset40 = cpu_to_be32((flow_ctrl & 0x1) |
- (be32_to_cpu(req_msg->offset40) &
- 0xFFFFFFFE));
-}
-
-static inline __be32 cm_req_get_starting_psn(struct cm_req_msg *req_msg)
-{
- return cpu_to_be32(be32_to_cpu(req_msg->offset44) >> 8);
-}
-
-static inline void cm_req_set_starting_psn(struct cm_req_msg *req_msg,
- __be32 starting_psn)
-{
- req_msg->offset44 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
- (be32_to_cpu(req_msg->offset44) & 0x000000FF));
-}
-
-static inline u8 cm_req_get_local_resp_timeout(struct cm_req_msg *req_msg)
-{
- return (u8) ((be32_to_cpu(req_msg->offset44) & 0xF8) >> 3);
-}
-
-static inline void cm_req_set_local_resp_timeout(struct cm_req_msg *req_msg,
- u8 resp_timeout)
-{
- req_msg->offset44 = cpu_to_be32((resp_timeout << 3) |
- (be32_to_cpu(req_msg->offset44) & 0xFFFFFF07));
-}
-
-static inline u8 cm_req_get_retry_count(struct cm_req_msg *req_msg)
-{
- return (u8) (be32_to_cpu(req_msg->offset44) & 0x7);
-}
-
-static inline void cm_req_set_retry_count(struct cm_req_msg *req_msg,
- u8 retry_count)
-{
- req_msg->offset44 = cpu_to_be32((retry_count & 0x7) |
- (be32_to_cpu(req_msg->offset44) & 0xFFFFFFF8));
-}
-
-static inline u8 cm_req_get_path_mtu(struct cm_req_msg *req_msg)
-{
- return req_msg->offset50 >> 4;
-}
-
-static inline void cm_req_set_path_mtu(struct cm_req_msg *req_msg, u8 path_mtu)
-{
- req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF) | (path_mtu << 4));
-}
-
-static inline u8 cm_req_get_rnr_retry_count(struct cm_req_msg *req_msg)
-{
- return req_msg->offset50 & 0x7;
-}
-
-static inline void cm_req_set_rnr_retry_count(struct cm_req_msg *req_msg,
- u8 rnr_retry_count)
-{
- req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF8) |
- (rnr_retry_count & 0x7));
-}
-
-static inline u8 cm_req_get_max_cm_retries(struct cm_req_msg *req_msg)
-{
- return req_msg->offset51 >> 4;
-}
-
-static inline void cm_req_set_max_cm_retries(struct cm_req_msg *req_msg,
- u8 retries)
-{
- req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF) | (retries << 4));
-}
-
-static inline u8 cm_req_get_srq(struct cm_req_msg *req_msg)
-{
- return (req_msg->offset51 & 0x8) >> 3;
-}
-
-static inline void cm_req_set_srq(struct cm_req_msg *req_msg, u8 srq)
-{
- req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF7) |
- ((srq & 0x1) << 3));
-}
-
-static inline __be32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg)
-{
- return cpu_to_be32(be32_to_cpu(req_msg->primary_offset88) >> 12);
-}
-
-static inline void cm_req_set_primary_flow_label(struct cm_req_msg *req_msg,
- __be32 flow_label)
-{
- req_msg->primary_offset88 = cpu_to_be32(
- (be32_to_cpu(req_msg->primary_offset88) &
- 0x00000FFF) |
- (be32_to_cpu(flow_label) << 12));
-}
-
-static inline u8 cm_req_get_primary_packet_rate(struct cm_req_msg *req_msg)
-{
- return (u8) (be32_to_cpu(req_msg->primary_offset88) & 0x3F);
-}
-
-static inline void cm_req_set_primary_packet_rate(struct cm_req_msg *req_msg,
- u8 rate)
-{
- req_msg->primary_offset88 = cpu_to_be32(
- (be32_to_cpu(req_msg->primary_offset88) &
- 0xFFFFFFC0) | (rate & 0x3F));
-}
-
-static inline u8 cm_req_get_primary_sl(struct cm_req_msg *req_msg)
-{
- return (u8) (req_msg->primary_offset94 >> 4);
-}
-
-static inline void cm_req_set_primary_sl(struct cm_req_msg *req_msg, u8 sl)
-{
- req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0x0F) |
- (sl << 4));
-}
-
-static inline u8 cm_req_get_primary_subnet_local(struct cm_req_msg *req_msg)
-{
- return (u8) ((req_msg->primary_offset94 & 0x08) >> 3);
-}
-
-static inline void cm_req_set_primary_subnet_local(struct cm_req_msg *req_msg,
- u8 subnet_local)
-{
- req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0xF7) |
- ((subnet_local & 0x1) << 3));
-}
-
-static inline u8 cm_req_get_primary_local_ack_timeout(struct cm_req_msg *req_msg)
-{
- return (u8) (req_msg->primary_offset95 >> 3);
-}
-
-static inline void cm_req_set_primary_local_ack_timeout(struct cm_req_msg *req_msg,
- u8 local_ack_timeout)
-{
- req_msg->primary_offset95 = (u8) ((req_msg->primary_offset95 & 0x07) |
- (local_ack_timeout << 3));
-}
-
-static inline __be32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg)
-{
- return cpu_to_be32(be32_to_cpu(req_msg->alt_offset132) >> 12);
-}
-
-static inline void cm_req_set_alt_flow_label(struct cm_req_msg *req_msg,
- __be32 flow_label)
-{
- req_msg->alt_offset132 = cpu_to_be32(
- (be32_to_cpu(req_msg->alt_offset132) &
- 0x00000FFF) |
- (be32_to_cpu(flow_label) << 12));
-}
-
-static inline u8 cm_req_get_alt_packet_rate(struct cm_req_msg *req_msg)
-{
- return (u8) (be32_to_cpu(req_msg->alt_offset132) & 0x3F);
-}
-
-static inline void cm_req_set_alt_packet_rate(struct cm_req_msg *req_msg,
- u8 rate)
-{
- req_msg->alt_offset132 = cpu_to_be32(
- (be32_to_cpu(req_msg->alt_offset132) &
- 0xFFFFFFC0) | (rate & 0x3F));
-}
-
-static inline u8 cm_req_get_alt_sl(struct cm_req_msg *req_msg)
-{
- return (u8) (req_msg->alt_offset138 >> 4);
-}
-
-static inline void cm_req_set_alt_sl(struct cm_req_msg *req_msg, u8 sl)
-{
- req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0x0F) |
- (sl << 4));
-}
-
-static inline u8 cm_req_get_alt_subnet_local(struct cm_req_msg *req_msg)
-{
- return (u8) ((req_msg->alt_offset138 & 0x08) >> 3);
-}
-
-static inline void cm_req_set_alt_subnet_local(struct cm_req_msg *req_msg,
- u8 subnet_local)
-{
- req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0xF7) |
- ((subnet_local & 0x1) << 3));
-}
-
-static inline u8 cm_req_get_alt_local_ack_timeout(struct cm_req_msg *req_msg)
-{
- return (u8) (req_msg->alt_offset139 >> 3);
-}
-
-static inline void cm_req_set_alt_local_ack_timeout(struct cm_req_msg *req_msg,
- u8 local_ack_timeout)
-{
- req_msg->alt_offset139 = (u8) ((req_msg->alt_offset139 & 0x07) |
- (local_ack_timeout << 3));
-}
-
/* Message REJected or MRAed */
enum cm_msg_response {
CM_MSG_RESPONSE_REQ = 0x0,
@@ -411,419 +57,12 @@ enum cm_msg_response {
CM_MSG_RESPONSE_OTHER = 0x2
};
- struct cm_mra_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
- /* message MRAed:2, rsvd:6 */
- u8 offset8;
- /* service timeout:5, rsvd:3 */
- u8 offset9;
-
- u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg)
-{
- return (u8) (mra_msg->offset8 >> 6);
-}
-
-static inline void cm_mra_set_msg_mraed(struct cm_mra_msg *mra_msg, u8 msg)
-{
- mra_msg->offset8 = (u8) ((mra_msg->offset8 & 0x3F) | (msg << 6));
-}
-
-static inline u8 cm_mra_get_service_timeout(struct cm_mra_msg *mra_msg)
-{
- return (u8) (mra_msg->offset9 >> 3);
-}
-
-static inline void cm_mra_set_service_timeout(struct cm_mra_msg *mra_msg,
- u8 service_timeout)
-{
- mra_msg->offset9 = (u8) ((mra_msg->offset9 & 0x07) |
- (service_timeout << 3));
-}
-
-struct cm_rej_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
- /* message REJected:2, rsvd:6 */
- u8 offset8;
- /* reject info length:7, rsvd:1. */
- u8 offset9;
- __be16 reason;
- u8 ari[IB_CM_REJ_ARI_LENGTH];
-
- u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg)
-{
- return (u8) (rej_msg->offset8 >> 6);
-}
-
-static inline void cm_rej_set_msg_rejected(struct cm_rej_msg *rej_msg, u8 msg)
-{
- rej_msg->offset8 = (u8) ((rej_msg->offset8 & 0x3F) | (msg << 6));
-}
-
-static inline u8 cm_rej_get_reject_info_len(struct cm_rej_msg *rej_msg)
-{
- return (u8) (rej_msg->offset9 >> 1);
-}
-
-static inline void cm_rej_set_reject_info_len(struct cm_rej_msg *rej_msg,
- u8 len)
-{
- rej_msg->offset9 = (u8) ((rej_msg->offset9 & 0x1) | (len << 1));
-}
-
-struct cm_rep_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
- __be32 local_qkey;
- /* local QPN:24, rsvd:8 */
- __be32 offset12;
- /* local EECN:24, rsvd:8 */
- __be32 offset16;
- /* starting PSN:24 rsvd:8 */
- __be32 offset20;
- u8 resp_resources;
- u8 initiator_depth;
- /* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */
- u8 offset26;
- /* RNR retry count:3, SRQ:1, rsvd:5 */
- u8 offset27;
- __be64 local_ca_guid;
-
- u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg)
-{
- return cpu_to_be32(be32_to_cpu(rep_msg->offset12) >> 8);
-}
-
-static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn)
-{
- rep_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
- (be32_to_cpu(rep_msg->offset12) & 0x000000FF));
-}
-
-static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg)
-{
- return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8);
-}
-
-static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn)
-{
- rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) |
- (be32_to_cpu(rep_msg->offset16) & 0x000000FF));
-}
-
static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type)
{
return (qp_type == IB_QPT_XRC_INI) ?
- cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg);
-}
-
-static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
-{
- return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8);
-}
-
-static inline void cm_rep_set_starting_psn(struct cm_rep_msg *rep_msg,
- __be32 starting_psn)
-{
- rep_msg->offset20 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
- (be32_to_cpu(rep_msg->offset20) & 0x000000FF));
-}
-
-static inline u8 cm_rep_get_target_ack_delay(struct cm_rep_msg *rep_msg)
-{
- return (u8) (rep_msg->offset26 >> 3);
-}
-
-static inline void cm_rep_set_target_ack_delay(struct cm_rep_msg *rep_msg,
- u8 target_ack_delay)
-{
- rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0x07) |
- (target_ack_delay << 3));
-}
-
-static inline u8 cm_rep_get_failover(struct cm_rep_msg *rep_msg)
-{
- return (u8) ((rep_msg->offset26 & 0x06) >> 1);
-}
-
-static inline void cm_rep_set_failover(struct cm_rep_msg *rep_msg, u8 failover)
-{
- rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xF9) |
- ((failover & 0x3) << 1));
-}
-
-static inline u8 cm_rep_get_flow_ctrl(struct cm_rep_msg *rep_msg)
-{
- return (u8) (rep_msg->offset26 & 0x01);
-}
-
-static inline void cm_rep_set_flow_ctrl(struct cm_rep_msg *rep_msg,
- u8 flow_ctrl)
-{
- rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xFE) |
- (flow_ctrl & 0x1));
-}
-
-static inline u8 cm_rep_get_rnr_retry_count(struct cm_rep_msg *rep_msg)
-{
- return (u8) (rep_msg->offset27 >> 5);
-}
-
-static inline void cm_rep_set_rnr_retry_count(struct cm_rep_msg *rep_msg,
- u8 rnr_retry_count)
-{
- rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0x1F) |
- (rnr_retry_count << 5));
-}
-
-static inline u8 cm_rep_get_srq(struct cm_rep_msg *rep_msg)
-{
- return (u8) ((rep_msg->offset27 >> 4) & 0x1);
-}
-
-static inline void cm_rep_set_srq(struct cm_rep_msg *rep_msg, u8 srq)
-{
- rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0xEF) |
- ((srq & 0x1) << 4));
-}
-
-struct cm_rtu_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
-
- u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-struct cm_dreq_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
- /* remote QPN/EECN:24, rsvd:8 */
- __be32 offset8;
-
- u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg)
-{
- return cpu_to_be32(be32_to_cpu(dreq_msg->offset8) >> 8);
-}
-
-static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, __be32 qpn)
-{
- dreq_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
- (be32_to_cpu(dreq_msg->offset8) & 0x000000FF));
-}
-
-struct cm_drep_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
-
- u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-struct cm_lap_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
-
- __be32 rsvd8;
- /* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */
- __be32 offset12;
- __be32 rsvd16;
-
- __be16 alt_local_lid;
- __be16 alt_remote_lid;
- union ib_gid alt_local_gid;
- union ib_gid alt_remote_gid;
- /* flow label:20, rsvd:4, traffic class:8 */
- __be32 offset56;
- u8 alt_hop_limit;
- /* rsvd:2, packet rate:6 */
- u8 offset61;
- /* SL:4, subnet local:1, rsvd:3 */
- u8 offset62;
- /* local ACK timeout:5, rsvd:3 */
- u8 offset63;
-
- u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE];
-} __packed;
-
-static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg)
-{
- return cpu_to_be32(be32_to_cpu(lap_msg->offset12) >> 8);
-}
-
-static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, __be32 qpn)
-{
- lap_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
- (be32_to_cpu(lap_msg->offset12) &
- 0x000000FF));
-}
-
-static inline u8 cm_lap_get_remote_resp_timeout(struct cm_lap_msg *lap_msg)
-{
- return (u8) ((be32_to_cpu(lap_msg->offset12) & 0xF8) >> 3);
-}
-
-static inline void cm_lap_set_remote_resp_timeout(struct cm_lap_msg *lap_msg,
- u8 resp_timeout)
-{
- lap_msg->offset12 = cpu_to_be32((resp_timeout << 3) |
- (be32_to_cpu(lap_msg->offset12) &
- 0xFFFFFF07));
-}
-
-static inline __be32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg)
-{
- return cpu_to_be32(be32_to_cpu(lap_msg->offset56) >> 12);
-}
-
-static inline void cm_lap_set_flow_label(struct cm_lap_msg *lap_msg,
- __be32 flow_label)
-{
- lap_msg->offset56 = cpu_to_be32(
- (be32_to_cpu(lap_msg->offset56) & 0x00000FFF) |
- (be32_to_cpu(flow_label) << 12));
-}
-
-static inline u8 cm_lap_get_traffic_class(struct cm_lap_msg *lap_msg)
-{
- return (u8) be32_to_cpu(lap_msg->offset56);
-}
-
-static inline void cm_lap_set_traffic_class(struct cm_lap_msg *lap_msg,
- u8 traffic_class)
-{
- lap_msg->offset56 = cpu_to_be32(traffic_class |
- (be32_to_cpu(lap_msg->offset56) &
- 0xFFFFFF00));
-}
-
-static inline u8 cm_lap_get_packet_rate(struct cm_lap_msg *lap_msg)
-{
- return lap_msg->offset61 & 0x3F;
-}
-
-static inline void cm_lap_set_packet_rate(struct cm_lap_msg *lap_msg,
- u8 packet_rate)
-{
- lap_msg->offset61 = (packet_rate & 0x3F) | (lap_msg->offset61 & 0xC0);
-}
-
-static inline u8 cm_lap_get_sl(struct cm_lap_msg *lap_msg)
-{
- return lap_msg->offset62 >> 4;
-}
-
-static inline void cm_lap_set_sl(struct cm_lap_msg *lap_msg, u8 sl)
-{
- lap_msg->offset62 = (sl << 4) | (lap_msg->offset62 & 0x0F);
-}
-
-static inline u8 cm_lap_get_subnet_local(struct cm_lap_msg *lap_msg)
-{
- return (lap_msg->offset62 >> 3) & 0x1;
-}
-
-static inline void cm_lap_set_subnet_local(struct cm_lap_msg *lap_msg,
- u8 subnet_local)
-{
- lap_msg->offset62 = ((subnet_local & 0x1) << 3) |
- (lap_msg->offset61 & 0xF7);
-}
-static inline u8 cm_lap_get_local_ack_timeout(struct cm_lap_msg *lap_msg)
-{
- return lap_msg->offset63 >> 3;
-}
-
-static inline void cm_lap_set_local_ack_timeout(struct cm_lap_msg *lap_msg,
- u8 local_ack_timeout)
-{
- lap_msg->offset63 = (local_ack_timeout << 3) |
- (lap_msg->offset63 & 0x07);
-}
-
-struct cm_apr_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
-
- u8 info_length;
- u8 ap_status;
- __be16 rsvd;
- u8 info[IB_CM_APR_INFO_LENGTH];
-
- u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE];
-} __packed;
-
-struct cm_sidr_req_msg {
- struct ib_mad_hdr hdr;
-
- __be32 request_id;
- __be16 pkey;
- __be16 rsvd;
- __be64 service_id;
-
- u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)];
-} __packed;
-
-struct cm_sidr_rep_msg {
- struct ib_mad_hdr hdr;
-
- __be32 request_id;
- u8 status;
- u8 info_length;
- __be16 rsvd;
- /* QPN:24, rsvd:8 */
- __be32 offset8;
- __be64 service_id;
- __be32 qkey;
- u8 info[IB_CM_SIDR_REP_INFO_LENGTH];
-
- u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE];
-} __packed;
-
-static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg)
-{
- return cpu_to_be32(be32_to_cpu(sidr_rep_msg->offset8) >> 8);
-}
-
-static inline void cm_sidr_rep_set_qpn(struct cm_sidr_rep_msg *sidr_rep_msg,
- __be32 qpn)
-{
- sidr_rep_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
- (be32_to_cpu(sidr_rep_msg->offset8) &
- 0x000000FF));
+ cpu_to_be32(IBA_GET(CM_REP_LOCAL_EE_CONTEXT_NUMBER,
+ rep_msg)) :
+ cpu_to_be32(IBA_GET(CM_REP_LOCAL_QPN, rep_msg));
}
#endif /* CM_MSGS_H */
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 19f1730a4f24..72f032160c4b 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1,36 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2005 Voltaire Inc. All rights reserved.
* Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
- * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/completion.h>
@@ -63,6 +36,7 @@
#include "core_priv.h"
#include "cma_priv.h"
+#include "cma_trace.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -904,6 +878,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
id_priv->id.route.addr.dev_addr.net = get_net(net);
id_priv->seq_num &= 0x00ffffff;
+ trace_cm_id_create(id_priv);
return &id_priv->id;
}
EXPORT_SYMBOL(__rdma_create_id);
@@ -955,27 +930,34 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (id->device != pd->device)
- return -EINVAL;
+ if (id->device != pd->device) {
+ ret = -EINVAL;
+ goto out_err;
+ }
qp_init_attr->port_num = id->port_num;
qp = ib_create_qp(pd, qp_init_attr);
- if (IS_ERR(qp))
- return PTR_ERR(qp);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto out_err;
+ }
if (id->qp_type == IB_QPT_UD)
ret = cma_init_ud_qp(id_priv, qp);
else
ret = cma_init_conn_qp(id_priv, qp);
if (ret)
- goto err;
+ goto out_destroy;
id->qp = qp;
id_priv->qp_num = qp->qp_num;
id_priv->srq = (qp->srq != NULL);
+ trace_cm_qp_create(id_priv, pd, qp_init_attr, 0);
return 0;
-err:
+out_destroy:
ib_destroy_qp(qp);
+out_err:
+ trace_cm_qp_create(id_priv, pd, qp_init_attr, ret);
return ret;
}
EXPORT_SYMBOL(rdma_create_qp);
@@ -985,6 +967,7 @@ void rdma_destroy_qp(struct rdma_cm_id *id)
struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
+ trace_cm_qp_destroy(id_priv);
mutex_lock(&id_priv->qp_mutex);
ib_destroy_qp(id_priv->id.qp);
id_priv->id.qp = NULL;
@@ -1838,6 +1821,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
enum rdma_cm_state state;
id_priv = container_of(id, struct rdma_id_private, id);
+ trace_cm_id_destroy(id_priv);
state = cma_exch(id_priv, RDMA_CM_DESTROYING);
cma_cancel_operation(id_priv, state);
@@ -1890,6 +1874,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
if (ret)
goto reject;
+ trace_cm_send_rtu(id_priv);
ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
if (ret)
goto reject;
@@ -1898,6 +1883,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
reject:
pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret);
cma_modify_qp_err(id_priv);
+ trace_cm_send_rej(id_priv);
ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
return ret;
@@ -1917,6 +1903,17 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event,
event->param.conn.qp_num = rep_data->remote_qpn;
}
+static int cma_cm_event_handler(struct rdma_id_private *id_priv,
+ struct rdma_cm_event *event)
+{
+ int ret;
+
+ trace_cm_event_handler(id_priv, event);
+ ret = id_priv->id.event_handler(&id_priv->id, event);
+ trace_cm_event_done(id_priv, event, ret);
+ return ret;
+}
+
static int cma_ib_handler(struct ib_cm_id *cm_id,
const struct ib_cm_event *ib_event)
{
@@ -1939,8 +1936,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
break;
case IB_CM_REP_RECEIVED:
if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
- (id_priv->id.qp_type != IB_QPT_UD))
+ (id_priv->id.qp_type != IB_QPT_UD)) {
+ trace_cm_send_mra(id_priv);
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+ }
if (id_priv->id.qp) {
event.status = cma_rep_recv(id_priv);
event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
@@ -1985,7 +1984,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
goto out;
}
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
@@ -2146,6 +2145,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
if (IS_ERR(listen_id))
return PTR_ERR(listen_id);
+ trace_cm_req_handler(listen_id, ib_event->event);
if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) {
ret = -EINVAL;
goto net_dev_put;
@@ -2188,7 +2188,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
* until we're done accessing it.
*/
atomic_inc(&conn_id->refcount);
- ret = conn_id->id.event_handler(&conn_id->id, &event);
+ ret = cma_cm_event_handler(conn_id, &event);
if (ret)
goto err3;
/*
@@ -2197,8 +2197,10 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
*/
mutex_lock(&lock);
if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
- (conn_id->id.qp_type != IB_QPT_UD))
+ (conn_id->id.qp_type != IB_QPT_UD)) {
+ trace_cm_send_mra(cm_id->context);
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+ }
mutex_unlock(&lock);
mutex_unlock(&conn_id->handler_mutex);
mutex_unlock(&listen_id->handler_mutex);
@@ -2313,7 +2315,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
event.status = iw_event->status;
event.param.conn.private_data = iw_event->private_data;
event.param.conn.private_data_len = iw_event->private_data_len;
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.iw = NULL;
@@ -2390,15 +2392,16 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
* until we're done accessing it.
*/
atomic_inc(&conn_id->refcount);
- ret = conn_id->id.event_handler(&conn_id->id, &event);
+ ret = cma_cm_event_handler(conn_id, &event);
if (ret) {
/* User wants to destroy the CM ID */
conn_id->cm_id.iw = NULL;
cma_exch(conn_id, RDMA_CM_DESTROYING);
mutex_unlock(&conn_id->handler_mutex);
+ mutex_unlock(&listen_id->handler_mutex);
cma_deref_id(conn_id);
rdma_destroy_id(&conn_id->id);
- goto out;
+ return ret;
}
mutex_unlock(&conn_id->handler_mutex);
@@ -2461,6 +2464,7 @@ static int cma_listen_handler(struct rdma_cm_id *id,
id->context = id_priv->id.context;
id->event_handler = id_priv->id.event_handler;
+ trace_cm_event_handler(id_priv, event);
return id_priv->id.event_handler(id, event);
}
@@ -2530,7 +2534,9 @@ EXPORT_SYMBOL(rdma_set_service_type);
* This function should be called before rdma_connect() on active side,
* and on passive side before rdma_accept(). It is applicable to primary
* path only. The timeout will affect the local side of the QP, it is not
- * negotiated with remote side and zero disables the timer.
+ * negotiated with remote side and zero disables the timer. In case it is
+ * set before rdma_resolve_route, the value will also be used to determine
+ * PacketLifeTime for RoCE.
*
* Return: 0 for success
*/
@@ -2635,7 +2641,7 @@ static void cma_work_handler(struct work_struct *_work)
if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
goto out;
- if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+ if (cma_cm_event_handler(id_priv, &work->event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
destroy = 1;
}
@@ -2658,7 +2664,7 @@ static void cma_ndev_work_handler(struct work_struct *_work)
id_priv->state == RDMA_CM_DEVICE_REMOVAL)
goto out;
- if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+ if (cma_cm_event_handler(id_priv, &work->event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
destroy = 1;
}
@@ -2827,22 +2833,65 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
return 0;
}
-static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio)
{
- int prio;
struct net_device *dev;
- prio = rt_tos2priority(tos);
- dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
+ dev = vlan_dev_real_dev(vlan_ndev);
if (dev->num_tc)
return netdev_get_prio_tc_map(dev, prio);
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
+ return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) &
+ VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+}
+
+struct iboe_prio_tc_map {
+ int input_prio;
+ int output_tc;
+ bool found;
+};
+
+static int get_lower_vlan_dev_tc(struct net_device *dev, void *data)
+{
+ struct iboe_prio_tc_map *map = data;
+
+ if (is_vlan_dev(dev))
+ map->output_tc = get_vlan_ndev_tc(dev, map->input_prio);
+ else if (dev->num_tc)
+ map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio);
+ else
+ map->output_tc = 0;
+ /* We are interested only in first level VLAN device, so always
+ * return 1 to stop iterating over next level devices.
+ */
+ map->found = true;
+ return 1;
+}
+
+static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+{
+ struct iboe_prio_tc_map prio_tc_map = {};
+ int prio = rt_tos2priority(tos);
+
+ /* If VLAN device, get it directly from the VLAN netdev */
if (is_vlan_dev(ndev))
- return (vlan_dev_get_egress_qos_mask(ndev, prio) &
- VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
-#endif
- return 0;
+ return get_vlan_ndev_tc(ndev, prio);
+
+ prio_tc_map.input_prio = prio;
+ rcu_read_lock();
+ netdev_walk_all_lower_dev_rcu(ndev,
+ get_lower_vlan_dev_tc,
+ &prio_tc_map);
+ rcu_read_unlock();
+ /* If map is found from lower device, use it; Otherwise
+ * continue with the current netdevice to get priority to tc map.
+ */
+ if (prio_tc_map.found)
+ return prio_tc_map.output_tc;
+ else if (ndev->num_tc)
+ return netdev_get_prio_tc_map(ndev, prio);
+ else
+ return 0;
}
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
@@ -2896,7 +2945,16 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->rate = iboe_get_rate(ndev);
dev_put(ndev);
route->path_rec->packet_life_time_selector = IB_SA_EQ;
- route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
+ /* In case ACK timeout is set, use this value to calculate
+ * PacketLifeTime. As per IBTA 12.7.34,
+ * local ACK timeout = (2 * PacketLifeTime + Local CA’s ACK delay).
+ * Assuming a negligible local ACK delay, we can use
+ * PacketLifeTime = local ACK timeout/2
+ * as a reasonable approximation for RoCE networks.
+ */
+ route->path_rec->packet_life_time = id_priv->timeout_set ?
+ id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME;
+
if (!route->path_rec->mtu) {
ret = -EINVAL;
goto err2;
@@ -3046,7 +3104,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
if (status)
pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
status);
- } else {
+ } else if (status) {
pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
}
@@ -3061,7 +3119,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
} else
event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
- if (id_priv->id.event_handler(&id_priv->id, &event)) {
+ if (cma_cm_event_handler(id_priv, &event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
rdma_destroy_id(&id_priv->id);
@@ -3090,6 +3148,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
+ atomic_inc(&id_priv->refcount);
cma_init_resolve_addr_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
@@ -3116,6 +3175,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
&(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
+ atomic_inc(&id_priv->refcount);
cma_init_resolve_addr_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
@@ -3708,7 +3768,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
goto out;
}
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
@@ -3772,6 +3832,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
req.max_cm_retries = CMA_MAX_CM_RETRIES;
+ trace_cm_send_sidr_req(id_priv);
ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
if (ret) {
ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -3845,6 +3906,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
req.max_cm_retries = CMA_MAX_CM_RETRIES;
req.srq = id_priv->srq ? 1 : 0;
+ trace_cm_send_req(id_priv);
ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
out:
if (ret && !IS_ERR(id)) {
@@ -3958,6 +4020,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
rep.srq = id_priv->srq ? 1 : 0;
+ trace_cm_send_rep(id_priv);
ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
out:
return ret;
@@ -4007,6 +4070,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
rep.private_data = private_data;
rep.private_data_len = private_data_len;
+ trace_cm_send_sidr_rep(id_priv);
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
@@ -4092,13 +4156,15 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
return -EINVAL;
if (rdma_cap_ib_cm(id->device, id->port_num)) {
- if (id->qp_type == IB_QPT_UD)
+ if (id->qp_type == IB_QPT_UD) {
ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
private_data, private_data_len);
- else
+ } else {
+ trace_cm_send_rej(id_priv);
ret = ib_send_cm_rej(id_priv->cm_id.ib,
IB_CM_REJ_CONSUMER_DEFINED, NULL,
0, private_data, private_data_len);
+ }
} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = iw_cm_reject(id_priv->cm_id.iw,
private_data, private_data_len);
@@ -4123,8 +4189,13 @@ int rdma_disconnect(struct rdma_cm_id *id)
if (ret)
goto out;
/* Initiate or respond to a disconnect. */
- if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
- ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
+ trace_cm_disconnect(id_priv);
+ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) {
+ if (!ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0))
+ trace_cm_sent_drep(id_priv);
+ } else {
+ trace_cm_sent_dreq(id_priv);
+ }
} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
} else
@@ -4190,7 +4261,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
} else
event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
@@ -4595,6 +4666,7 @@ static void cma_add_one(struct ib_device *device)
cma_listen_on_dev(id_priv, cma_dev);
mutex_unlock(&lock);
+ trace_cm_add_one(device);
return;
free_gid_type:
@@ -4625,7 +4697,7 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv)
goto out;
event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
out:
mutex_unlock(&id_priv->handler_mutex);
return ret;
@@ -4663,6 +4735,8 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
{
struct cma_device *cma_dev = client_data;
+ trace_cm_remove_one(device);
+
if (!cma_dev)
return;
@@ -4724,13 +4798,18 @@ static int __init cma_init(void)
if (ret)
goto err;
- cma_configfs_init();
+ ret = cma_configfs_init();
+ if (ret)
+ goto err_ib;
return 0;
+err_ib:
+ ib_unregister_client(&cma_client);
err:
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
+ unregister_pernet_subsys(&cma_pernet_operations);
err_wq:
destroy_workqueue(cma_wq);
return ret;
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 3ec2c415bb70..8b0b5ae22e4c 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -342,12 +342,18 @@ static struct configfs_subsystem cma_subsys = {
int __init cma_configfs_init(void)
{
+ int ret;
+
config_group_init(&cma_subsys.su_group);
mutex_init(&cma_subsys.su_mutex);
- return configfs_register_subsystem(&cma_subsys);
+ ret = configfs_register_subsystem(&cma_subsys);
+ if (ret)
+ mutex_destroy(&cma_subsys.su_mutex);
+ return ret;
}
void __exit cma_configfs_exit(void)
{
configfs_unregister_subsystem(&cma_subsys);
+ mutex_destroy(&cma_subsys.su_mutex);
}
diff --git a/drivers/infiniband/core/cma_trace.c b/drivers/infiniband/core/cma_trace.c
new file mode 100644
index 000000000000..b314a281e10e
--- /dev/null
+++ b/drivers/infiniband/core/cma_trace.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for the RDMA Connection Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define CREATE_TRACE_POINTS
+
+#include <rdma/rdma_cm.h>
+#include <rdma/ib_cm.h>
+#include "cma_priv.h"
+
+#include "cma_trace.h"
diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h
new file mode 100644
index 000000000000..81e36bf13159
--- /dev/null
+++ b/drivers/infiniband/core/cma_trace.h
@@ -0,0 +1,391 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace point definitions for the RDMA Connect Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rdma_cma
+
+#if !defined(_TRACE_RDMA_CMA_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _TRACE_RDMA_CMA_H
+
+#include <linux/tracepoint.h>
+#include <trace/events/rdma.h>
+
+/*
+ * enum ib_cm_event_type, from include/rdma/ib_cm.h
+ */
+#define IB_CM_EVENT_LIST \
+ ib_cm_event(REQ_ERROR) \
+ ib_cm_event(REQ_RECEIVED) \
+ ib_cm_event(REP_ERROR) \
+ ib_cm_event(REP_RECEIVED) \
+ ib_cm_event(RTU_RECEIVED) \
+ ib_cm_event(USER_ESTABLISHED) \
+ ib_cm_event(DREQ_ERROR) \
+ ib_cm_event(DREQ_RECEIVED) \
+ ib_cm_event(DREP_RECEIVED) \
+ ib_cm_event(TIMEWAIT_EXIT) \
+ ib_cm_event(MRA_RECEIVED) \
+ ib_cm_event(REJ_RECEIVED) \
+ ib_cm_event(LAP_ERROR) \
+ ib_cm_event(LAP_RECEIVED) \
+ ib_cm_event(APR_RECEIVED) \
+ ib_cm_event(SIDR_REQ_ERROR) \
+ ib_cm_event(SIDR_REQ_RECEIVED) \
+ ib_cm_event_end(SIDR_REP_RECEIVED)
+
+#undef ib_cm_event
+#undef ib_cm_event_end
+
+#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_EVENT_LIST
+
+#undef ib_cm_event
+#undef ib_cm_event_end
+
+#define ib_cm_event(x) { IB_CM_##x, #x },
+#define ib_cm_event_end(x) { IB_CM_##x, #x }
+
+#define rdma_show_ib_cm_event(x) \
+ __print_symbolic(x, IB_CM_EVENT_LIST)
+
+
+DECLARE_EVENT_CLASS(cma_fsm_class,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv
+ ),
+
+ TP_ARGS(id_priv),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, tos)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->tos = id_priv->tos;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos
+ )
+);
+
+#define DEFINE_CMA_FSM_EVENT(name) \
+ DEFINE_EVENT(cma_fsm_class, cm_##name, \
+ TP_PROTO( \
+ const struct rdma_id_private *id_priv \
+ ), \
+ TP_ARGS(id_priv))
+
+DEFINE_CMA_FSM_EVENT(send_rtu);
+DEFINE_CMA_FSM_EVENT(send_rej);
+DEFINE_CMA_FSM_EVENT(send_mra);
+DEFINE_CMA_FSM_EVENT(send_sidr_req);
+DEFINE_CMA_FSM_EVENT(send_sidr_rep);
+DEFINE_CMA_FSM_EVENT(disconnect);
+DEFINE_CMA_FSM_EVENT(sent_drep);
+DEFINE_CMA_FSM_EVENT(sent_dreq);
+DEFINE_CMA_FSM_EVENT(id_destroy);
+
+TRACE_EVENT(cm_id_create,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv
+ ),
+
+ TP_ARGS(id_priv),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ ),
+
+ TP_printk("cm.id=%u",
+ __entry->cm_id
+ )
+);
+
+DECLARE_EVENT_CLASS(cma_qp_class,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv
+ ),
+
+ TP_ARGS(id_priv),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, tos)
+ __field(u32, qp_num)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->tos = id_priv->tos;
+ __entry->qp_num = id_priv->qp_num;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u qp_num=%u",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+ __entry->qp_num
+ )
+);
+
+#define DEFINE_CMA_QP_EVENT(name) \
+ DEFINE_EVENT(cma_qp_class, cm_##name, \
+ TP_PROTO( \
+ const struct rdma_id_private *id_priv \
+ ), \
+ TP_ARGS(id_priv))
+
+DEFINE_CMA_QP_EVENT(send_req);
+DEFINE_CMA_QP_EVENT(send_rep);
+DEFINE_CMA_QP_EVENT(qp_destroy);
+
+/*
+ * enum ib_wp_type, from include/rdma/ib_verbs.h
+ */
+#define IB_QP_TYPE_LIST \
+ ib_qp_type(SMI) \
+ ib_qp_type(GSI) \
+ ib_qp_type(RC) \
+ ib_qp_type(UC) \
+ ib_qp_type(UD) \
+ ib_qp_type(RAW_IPV6) \
+ ib_qp_type(RAW_ETHERTYPE) \
+ ib_qp_type(RAW_PACKET) \
+ ib_qp_type(XRC_INI) \
+ ib_qp_type_end(XRC_TGT)
+
+#undef ib_qp_type
+#undef ib_qp_type_end
+
+#define ib_qp_type(x) TRACE_DEFINE_ENUM(IB_QPT_##x);
+#define ib_qp_type_end(x) TRACE_DEFINE_ENUM(IB_QPT_##x);
+
+IB_QP_TYPE_LIST
+
+#undef ib_qp_type
+#undef ib_qp_type_end
+
+#define ib_qp_type(x) { IB_QPT_##x, #x },
+#define ib_qp_type_end(x) { IB_QPT_##x, #x }
+
+#define rdma_show_qp_type(x) \
+ __print_symbolic(x, IB_QP_TYPE_LIST)
+
+
+TRACE_EVENT(cm_qp_create,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv,
+ const struct ib_pd *pd,
+ const struct ib_qp_init_attr *qp_init_attr,
+ int rc
+ ),
+
+ TP_ARGS(id_priv, pd, qp_init_attr, rc),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, pd_id)
+ __field(u32, tos)
+ __field(u32, qp_num)
+ __field(u32, send_wr)
+ __field(u32, recv_wr)
+ __field(int, rc)
+ __field(unsigned long, qp_type)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->pd_id = pd->res.id;
+ __entry->tos = id_priv->tos;
+ __entry->send_wr = qp_init_attr->cap.max_send_wr;
+ __entry->recv_wr = qp_init_attr->cap.max_recv_wr;
+ __entry->rc = rc;
+ if (!rc) {
+ __entry->qp_num = id_priv->qp_num;
+ __entry->qp_type = id_priv->id.qp_type;
+ } else {
+ __entry->qp_num = 0;
+ __entry->qp_type = 0;
+ }
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u pd.id=%u qp_type=%s"
+ " send_wr=%u recv_wr=%u qp_num=%u rc=%d",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr,
+ __entry->tos, __entry->pd_id,
+ rdma_show_qp_type(__entry->qp_type), __entry->send_wr,
+ __entry->recv_wr, __entry->qp_num, __entry->rc
+ )
+);
+
+TRACE_EVENT(cm_req_handler,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv,
+ int event
+ ),
+
+ TP_ARGS(id_priv, event),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, tos)
+ __field(unsigned long, event)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->tos = id_priv->tos;
+ __entry->event = event;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s (%lu)",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+ rdma_show_ib_cm_event(__entry->event), __entry->event
+ )
+);
+
+TRACE_EVENT(cm_event_handler,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv,
+ const struct rdma_cm_event *event
+ ),
+
+ TP_ARGS(id_priv, event),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, tos)
+ __field(unsigned long, event)
+ __field(int, status)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->tos = id_priv->tos;
+ __entry->event = event->event;
+ __entry->status = event->status;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s (%lu/%d)",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+ rdma_show_cm_event(__entry->event), __entry->event,
+ __entry->status
+ )
+);
+
+TRACE_EVENT(cm_event_done,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv,
+ const struct rdma_cm_event *event,
+ int result
+ ),
+
+ TP_ARGS(id_priv, event, result),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, tos)
+ __field(unsigned long, event)
+ __field(int, result)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->tos = id_priv->tos;
+ __entry->event = event->event;
+ __entry->result = result;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s consumer returns %d",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+ rdma_show_cm_event(__entry->event), __entry->result
+ )
+);
+
+DECLARE_EVENT_CLASS(cma_client_class,
+ TP_PROTO(
+ const struct ib_device *device
+ ),
+
+ TP_ARGS(device),
+
+ TP_STRUCT__entry(
+ __string(name, device->name)
+ ),
+
+ TP_fast_assign(
+ __assign_str(name, device->name);
+ ),
+
+ TP_printk("device name=%s",
+ __get_str(name)
+ )
+);
+
+#define DEFINE_CMA_CLIENT_EVENT(name) \
+ DEFINE_EVENT(cma_client_class, cm_##name, \
+ TP_PROTO( \
+ const struct ib_device *device \
+ ), \
+ TP_ARGS(device))
+
+DEFINE_CMA_CLIENT_EVENT(add_one);
+DEFINE_CMA_CLIENT_EVENT(remove_one);
+
+#endif /* _TRACE_RDMA_CMA_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE cma_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index beee7b7e0d9a..b1457b3464d3 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -36,6 +36,8 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/cgroup_rdma.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <rdma/ib_verbs.h>
#include <rdma/opa_addr.h>
@@ -54,8 +56,26 @@ struct pkey_index_qp_list {
struct list_head qp_list;
};
+/**
+ * struct rdma_dev_net - rdma net namespace metadata for a net
+ * @nl_sock: Pointer to netlink socket
+ * @net: Pointer to owner net namespace
+ * @id: xarray id to identify the net namespace.
+ */
+struct rdma_dev_net {
+ struct sock *nl_sock;
+ possible_net_t net;
+ u32 id;
+};
+
extern const struct attribute_group ib_dev_attr_group;
extern bool ib_devices_shared_netns;
+extern unsigned int rdma_dev_net_id;
+
+static inline struct rdma_dev_net *rdma_net_to_dev_net(struct net *net)
+{
+ return net_generic(net, rdma_dev_net_id);
+}
int ib_device_register_sysfs(struct ib_device *device);
void ib_device_unregister_sysfs(struct ib_device *device);
@@ -129,6 +149,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
+void ib_dispatch_event_clients(struct ib_event *event);
#ifdef CONFIG_CGROUP_RDMA
void ib_device_register_rdmacg(struct ib_device *device);
@@ -179,7 +200,7 @@ void ib_mad_cleanup(void);
int ib_sa_init(void);
void ib_sa_cleanup(void);
-int rdma_nl_init(void);
+void rdma_nl_init(void);
void rdma_nl_exit(void);
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
@@ -300,7 +321,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
struct ib_pd *pd,
struct ib_qp_init_attr *attr,
struct ib_udata *udata,
- struct ib_uobject *uobj)
+ struct ib_uqp_object *uobj)
{
enum ib_qp_type qp_type = attr->qp_type;
struct ib_qp *qp;
@@ -365,4 +386,18 @@ void ib_port_unregister_module_stat(struct kobject *kobj);
int ib_device_set_netns_put(struct sk_buff *skb,
struct ib_device *dev, u32 ns_fd);
+
+int rdma_nl_net_init(struct rdma_dev_net *rnet);
+void rdma_nl_net_exit(struct rdma_dev_net *rnet);
+
+struct rdma_umap_priv {
+ struct vm_area_struct *vma;
+ struct list_head list;
+ struct rdma_user_mmap_entry *entry;
+};
+
+void rdma_umap_priv_init(struct rdma_umap_priv *priv,
+ struct vm_area_struct *vma,
+ struct rdma_user_mmap_entry *entry);
+
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index b79890739a2c..2257d7f7810f 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -149,13 +149,18 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
struct auto_mode_param *param = &counter->mode.param;
bool match = true;
- if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res))
- return false;
-
- /* Ensure that counter belong to right PID */
- if (!rdma_is_kernel_res(&counter->res) &&
- !rdma_is_kernel_res(&qp->res) &&
- (task_pid_vnr(counter->res.task) != current->pid))
+ /*
+ * Ensure that counter belongs to the right PID. This operation can
+ * race with user space which kills the process and leaves QP and
+ * counters orphans.
+ *
+ * It is not a big deal because exitted task will leave both QP and
+ * counter in the same bucket of zombie process. Just ensure that
+ * process is still alive before procedding.
+ *
+ */
+ if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task) ||
+ !task_pid_nr(qp->res.task))
return false;
if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE)
@@ -231,9 +236,6 @@ static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp,
rt = &dev->res[RDMA_RESTRACK_COUNTER];
xa_lock(&rt->xa);
xa_for_each(&rt->xa, id, res) {
- if (!rdma_is_visible_in_pid_ns(res))
- continue;
-
counter = container_of(res, struct rdma_counter, res);
if ((counter->device != qp->device) || (counter->port != port))
goto next;
@@ -284,6 +286,9 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
struct rdma_counter *counter;
int ret;
+ if (!qp->res.valid)
+ return 0;
+
if (!rdma_is_port_valid(dev, port))
return -EINVAL;
@@ -414,9 +419,6 @@ static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num)
if (IS_ERR(res))
return NULL;
- if (!rdma_is_visible_in_pid_ns(res))
- goto err;
-
qp = container_of(res, struct ib_qp, res);
if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
goto err;
@@ -424,7 +426,7 @@ static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num)
return qp;
err:
- rdma_restrack_put(&qp->res);
+ rdma_restrack_put(res);
return NULL;
}
@@ -447,11 +449,6 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
if (IS_ERR(res))
return NULL;
- if (!rdma_is_visible_in_pid_ns(res)) {
- rdma_restrack_put(res);
- return NULL;
- }
-
counter = container_of(res, struct rdma_counter, res);
kref_get(&counter->kref);
rdma_restrack_put(res);
@@ -465,10 +462,15 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
u32 qp_num, u32 counter_id)
{
+ struct rdma_port_counter *port_counter;
struct rdma_counter *counter;
struct ib_qp *qp;
int ret;
+ port_counter = &dev->port_data[port].port_counter;
+ if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
+ return -EINVAL;
+
qp = rdma_counter_get_qp(dev, qp_num);
if (!qp)
return -ENOENT;
@@ -505,6 +507,7 @@ err:
int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
u32 qp_num, u32 *counter_id)
{
+ struct rdma_port_counter *port_counter;
struct rdma_counter *counter;
struct ib_qp *qp;
int ret;
@@ -512,9 +515,13 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
if (!rdma_is_port_valid(dev, port))
return -EINVAL;
- if (!dev->port_data[port].port_counter.hstats)
+ port_counter = &dev->port_data[port].port_counter;
+ if (!port_counter->hstats)
return -EOPNOTSUPP;
+ if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
+ return -EINVAL;
+
qp = rdma_counter_get_qp(dev, qp_num);
if (!qp)
return -ENOENT;
@@ -601,7 +608,7 @@ int rdma_counter_get_mode(struct ib_device *dev, u8 port,
void rdma_counter_init(struct ib_device *dev)
{
struct rdma_port_counter *port_counter;
- u32 port;
+ u32 port, i;
if (!dev->port_data)
return;
@@ -622,13 +629,12 @@ void rdma_counter_init(struct ib_device *dev)
return;
fail:
- rdma_for_each_port(dev, port) {
+ for (i = port; i >= rdma_start_port(dev); i--) {
port_counter = &dev->port_data[port].port_counter;
kfree(port_counter->hstats);
port_counter->hstats = NULL;
+ mutex_destroy(&port_counter->lock);
}
-
- return;
}
void rdma_counter_release(struct ib_device *dev)
@@ -639,5 +645,6 @@ void rdma_counter_release(struct ib_device *dev)
rdma_for_each_port(dev, port) {
port_counter = &dev->port_data[port].port_counter;
kfree(port_counter->hstats);
+ mutex_destroy(&port_counter->lock);
}
}
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index 7c599878ccf7..4f25b2400694 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -7,6 +7,8 @@
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
+#include <trace/events/rdma_core.h>
+
/* # of WCs to poll for with a single call to ib_poll_cq */
#define IB_POLL_BATCH 16
#define IB_POLL_BATCH_DIRECT 8
@@ -41,6 +43,7 @@ static void ib_cq_rdma_dim_work(struct work_struct *w)
dim->state = DIM_START_MEASURE;
+ trace_cq_modify(cq, comps, usec);
cq->device->ops.modify_cq(cq, comps, usec);
}
@@ -65,18 +68,29 @@ static void rdma_dim_init(struct ib_cq *cq)
INIT_WORK(&dim->work, ib_cq_rdma_dim_work);
}
+static int __poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
+{
+ int rc;
+
+ rc = ib_poll_cq(cq, num_entries, wc);
+ trace_cq_poll(cq, num_entries, rc);
+ return rc;
+}
+
static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
int batch)
{
int i, n, completed = 0;
+ trace_cq_process(cq);
+
/*
* budget might be (-1) if the caller does not
* want to bound this call, thus we need unsigned
* minimum here.
*/
- while ((n = ib_poll_cq(cq, min_t(u32, batch,
- budget - completed), wcs)) > 0) {
+ while ((n = __poll_cq(cq, min_t(u32, batch,
+ budget - completed), wcs)) > 0) {
for (i = 0; i < n; i++) {
struct ib_wc *wc = &wcs[i];
@@ -131,8 +145,10 @@ static int ib_poll_handler(struct irq_poll *iop, int budget)
completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH);
if (completed < budget) {
irq_poll_complete(&cq->iop);
- if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+ if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) {
+ trace_cq_reschedule(cq);
irq_poll_sched(&cq->iop);
+ }
}
if (dim)
@@ -143,6 +159,7 @@ static int ib_poll_handler(struct irq_poll *iop, int budget)
static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
{
+ trace_cq_schedule(cq);
irq_poll_sched(&cq->iop);
}
@@ -162,6 +179,7 @@ static void ib_cq_poll_work(struct work_struct *work)
static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
{
+ trace_cq_schedule(cq);
queue_work(cq->comp_wq, &cq->work);
}
@@ -239,6 +257,7 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
goto out_destroy_cq;
}
+ trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx);
return cq;
out_destroy_cq:
@@ -248,11 +267,40 @@ out_free_wc:
kfree(cq->wc);
out_free_cq:
kfree(cq);
+ trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret);
return ERR_PTR(ret);
}
EXPORT_SYMBOL(__ib_alloc_cq_user);
/**
+ * __ib_alloc_cq_any - allocate a completion queue
+ * @dev: device to allocate the CQ for
+ * @private: driver private data, accessible from cq->cq_context
+ * @nr_cqe: number of CQEs to allocate
+ * @poll_ctx: context to poll the CQ from
+ * @caller: module owner name
+ *
+ * Attempt to spread ULP Completion Queues over each device's interrupt
+ * vectors. A simple best-effort mechanism is used.
+ */
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+ int nr_cqe, enum ib_poll_context poll_ctx,
+ const char *caller)
+{
+ static atomic_t counter;
+ int comp_vector = 0;
+
+ if (dev->num_comp_vectors > 1)
+ comp_vector =
+ atomic_inc_return(&counter) %
+ min_t(int, dev->num_comp_vectors, num_online_cpus());
+
+ return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
+ caller, NULL);
+}
+EXPORT_SYMBOL(__ib_alloc_cq_any);
+
+/**
* ib_free_cq_user - free a completion queue
* @cq: completion queue to free.
* @udata: User data or NULL for kernel object
@@ -276,6 +324,7 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
WARN_ON_ONCE(1);
}
+ trace_cq_free(cq);
rdma_restrack_del(&cq->res);
cq->device->ops.destroy_cq(cq, udata);
if (cq->dim)
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index ea8661a00651..f6c255202d7f 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -39,7 +39,6 @@
#include <linux/init.h>
#include <linux/netdevice.h>
#include <net/net_namespace.h>
-#include <net/netns/generic.h>
#include <linux/security.h>
#include <linux/notifier.h>
#include <linux/hashtable.h>
@@ -111,17 +110,7 @@ static void ib_client_put(struct ib_client *client)
*/
#define CLIENT_DATA_REGISTERED XA_MARK_1
-/**
- * struct rdma_dev_net - rdma net namespace metadata for a net
- * @net: Pointer to owner net namespace
- * @id: xarray id to identify the net namespace.
- */
-struct rdma_dev_net {
- possible_net_t net;
- u32 id;
-};
-
-static unsigned int rdma_dev_net_id;
+unsigned int rdma_dev_net_id;
/*
* A list of net namespaces is maintained in an xarray. This is necessary
@@ -139,17 +128,14 @@ module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444);
MODULE_PARM_DESC(netns_mode,
"Share device among net namespaces; default=1 (shared)");
/**
- * rdma_dev_access_netns() - Return whether a rdma device can be accessed
+ * rdma_dev_access_netns() - Return whether an rdma device can be accessed
* from a specified net namespace or not.
- * @device: Pointer to rdma device which needs to be checked
+ * @dev: Pointer to rdma device which needs to be checked
* @net: Pointer to net namesapce for which access to be checked
*
- * rdma_dev_access_netns() - Return whether a rdma device can be accessed
- * from a specified net namespace or not. When
- * rdma device is in shared mode, it ignores the
- * net namespace. When rdma device is exclusive
- * to a net namespace, rdma device net namespace is
- * checked against the specified one.
+ * When the rdma device is in shared mode, it ignores the net namespace.
+ * When the rdma device is exclusive to a net namespace, rdma device net
+ * namespace is checked against the specified one.
*/
bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net)
{
@@ -514,6 +500,9 @@ static void ib_device_release(struct device *device)
rcu_head);
}
+ mutex_destroy(&dev->unregistration_lock);
+ mutex_destroy(&dev->compat_devs_mutex);
+
xa_destroy(&dev->compat_devs);
xa_destroy(&dev->client_data);
kfree_rcu(dev, rcu_head);
@@ -598,7 +587,8 @@ struct ib_device *_ib_alloc_device(size_t size)
rdma_init_coredev(&device->coredev, device, &init_net);
INIT_LIST_HEAD(&device->event_handler_list);
- spin_lock_init(&device->event_handler_lock);
+ spin_lock_init(&device->qp_open_list_lock);
+ init_rwsem(&device->event_handler_rwsem);
mutex_init(&device->unregistration_lock);
/*
* client_data needs to be alloc because we don't want our mark to be
@@ -1060,7 +1050,7 @@ int rdma_compatdev_set(u8 enable)
static void rdma_dev_exit_net(struct net *net)
{
- struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
struct ib_device *dev;
unsigned long index;
int ret;
@@ -1094,25 +1084,32 @@ static void rdma_dev_exit_net(struct net *net)
}
up_read(&devices_rwsem);
+ rdma_nl_net_exit(rnet);
xa_erase(&rdma_nets, rnet->id);
}
static __net_init int rdma_dev_init_net(struct net *net)
{
- struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
unsigned long index;
struct ib_device *dev;
int ret;
+ write_pnet(&rnet->net, net);
+
+ ret = rdma_nl_net_init(rnet);
+ if (ret)
+ return ret;
+
/* No need to create any compat devices in default init_net. */
if (net_eq(net, &init_net))
return 0;
- write_pnet(&rnet->net, net);
-
ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL);
- if (ret)
+ if (ret) {
+ rdma_nl_net_exit(rnet);
return ret;
+ }
down_read(&devices_rwsem);
xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
@@ -1200,9 +1197,21 @@ static void setup_dma_device(struct ib_device *device)
WARN_ON_ONCE(!parent);
device->dma_device = parent;
}
- /* Setup default max segment size for all IB devices */
- dma_set_max_seg_size(device->dma_device, SZ_2G);
+ if (!device->dev.dma_parms) {
+ if (parent) {
+ /*
+ * The caller did not provide DMA parameters, so
+ * 'parent' probably represents a PCI device. The PCI
+ * core sets the maximum segment size to 64
+ * KB. Increase this parameter to 2 GB.
+ */
+ device->dev.dma_parms = parent->dma_parms;
+ dma_set_max_seg_size(device->dma_device, SZ_2G);
+ } else {
+ WARN_ON_ONCE(true);
+ }
+ }
}
/*
@@ -1318,7 +1327,9 @@ out:
/**
* ib_register_device - Register an IB device with IB core
- * @device:Device to register
+ * @device: Device to register
+ * @name: unique string device name. This may include a '%' which will
+ * cause a unique index to be added to the passed device name.
*
* Low-level drivers use ib_register_device() to register their
* devices with the IB core. All registered clients will receive a
@@ -1445,7 +1456,7 @@ out:
/**
* ib_unregister_device - Unregister an IB device
- * @device: The device to unregister
+ * @ib_dev: The device to unregister
*
* Unregister an IB device. All clients will receive a remove callback.
*
@@ -1467,7 +1478,7 @@ EXPORT_SYMBOL(ib_unregister_device);
/**
* ib_unregister_device_and_put - Unregister a device while holding a 'get'
- * device: The device to unregister
+ * @ib_dev: The device to unregister
*
* This is the same as ib_unregister_device(), except it includes an internal
* ib_device_put() that should match a 'get' obtained by the caller.
@@ -1537,7 +1548,7 @@ static void ib_unregister_work(struct work_struct *work)
/**
* ib_unregister_device_queued - Unregister a device using a work queue
- * device: The device to unregister
+ * @ib_dev: The device to unregister
*
* This schedules an asynchronous unregistration using a WQ for the device. A
* driver should use this to avoid holding locks while doing unregistration,
@@ -1921,17 +1932,15 @@ EXPORT_SYMBOL(ib_set_client_data);
*
* ib_register_event_handler() registers an event handler that will be
* called back when asynchronous IB events occur (as defined in
- * chapter 11 of the InfiniBand Architecture Specification). This
- * callback may occur in interrupt context.
+ * chapter 11 of the InfiniBand Architecture Specification). This
+ * callback occurs in workqueue context.
*/
void ib_register_event_handler(struct ib_event_handler *event_handler)
{
- unsigned long flags;
-
- spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+ down_write(&event_handler->device->event_handler_rwsem);
list_add_tail(&event_handler->list,
&event_handler->device->event_handler_list);
- spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
+ up_write(&event_handler->device->event_handler_rwsem);
}
EXPORT_SYMBOL(ib_register_event_handler);
@@ -1944,61 +1953,78 @@ EXPORT_SYMBOL(ib_register_event_handler);
*/
void ib_unregister_event_handler(struct ib_event_handler *event_handler)
{
- unsigned long flags;
-
- spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+ down_write(&event_handler->device->event_handler_rwsem);
list_del(&event_handler->list);
- spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
+ up_write(&event_handler->device->event_handler_rwsem);
}
EXPORT_SYMBOL(ib_unregister_event_handler);
-/**
- * ib_dispatch_event - Dispatch an asynchronous event
- * @event:Event to dispatch
- *
- * Low-level drivers must call ib_dispatch_event() to dispatch the
- * event to all registered event handlers when an asynchronous event
- * occurs.
- */
-void ib_dispatch_event(struct ib_event *event)
+void ib_dispatch_event_clients(struct ib_event *event)
{
- unsigned long flags;
struct ib_event_handler *handler;
- spin_lock_irqsave(&event->device->event_handler_lock, flags);
+ down_read(&event->device->event_handler_rwsem);
list_for_each_entry(handler, &event->device->event_handler_list, list)
handler->handler(handler, event);
- spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
+ up_read(&event->device->event_handler_rwsem);
}
-EXPORT_SYMBOL(ib_dispatch_event);
-/**
- * ib_query_port - Query IB port attributes
- * @device:Device to query
- * @port_num:Port number to query
- * @port_attr:Port attributes
- *
- * ib_query_port() returns the attributes of a port through the
- * @port_attr pointer.
- */
-int ib_query_port(struct ib_device *device,
- u8 port_num,
- struct ib_port_attr *port_attr)
+static int iw_query_port(struct ib_device *device,
+ u8 port_num,
+ struct ib_port_attr *port_attr)
{
- union ib_gid gid;
- int err;
+ struct in_device *inetdev;
+ struct net_device *netdev;
- if (!rdma_is_port_valid(device, port_num))
- return -EINVAL;
+ memset(port_attr, 0, sizeof(*port_attr));
+
+ netdev = ib_device_get_netdev(device, port_num);
+ if (!netdev)
+ return -ENODEV;
+
+ port_attr->max_mtu = IB_MTU_4096;
+ port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
+
+ if (!netif_carrier_ok(netdev)) {
+ port_attr->state = IB_PORT_DOWN;
+ port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+ } else {
+ rcu_read_lock();
+ inetdev = __in_dev_get_rcu(netdev);
+
+ if (inetdev && inetdev->ifa_list) {
+ port_attr->state = IB_PORT_ACTIVE;
+ port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ } else {
+ port_attr->state = IB_PORT_INIT;
+ port_attr->phys_state =
+ IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
+ }
+
+ rcu_read_unlock();
+ }
+
+ dev_put(netdev);
+ return device->ops.query_port(device, port_num, port_attr);
+}
+
+static int __ib_query_port(struct ib_device *device,
+ u8 port_num,
+ struct ib_port_attr *port_attr)
+{
+ union ib_gid gid = {};
+ int err;
memset(port_attr, 0, sizeof(*port_attr));
+
err = device->ops.query_port(device, port_num, port_attr);
if (err || port_attr->subnet_prefix)
return err;
- if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
+ if (rdma_port_get_link_layer(device, port_num) !=
+ IB_LINK_LAYER_INFINIBAND)
return 0;
err = device->ops.query_gid(device, port_num, 0, &gid);
@@ -2008,6 +2034,28 @@ int ib_query_port(struct ib_device *device,
port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
return 0;
}
+
+/**
+ * ib_query_port - Query IB port attributes
+ * @device:Device to query
+ * @port_num:Port number to query
+ * @port_attr:Port attributes
+ *
+ * ib_query_port() returns the attributes of a port through the
+ * @port_attr pointer.
+ */
+int ib_query_port(struct ib_device *device,
+ u8 port_num,
+ struct ib_port_attr *port_attr)
+{
+ if (!rdma_is_port_valid(device, port_num))
+ return -EINVAL;
+
+ if (rdma_protocol_iwarp(device, port_num))
+ return iw_query_port(device, port_num, port_attr);
+ else
+ return __ib_query_port(device, port_num, port_attr);
+}
EXPORT_SYMBOL(ib_query_port);
static void add_ndev_hash(struct ib_port_data *pdata)
@@ -2311,7 +2359,7 @@ int ib_modify_device(struct ib_device *device,
struct ib_device_modify *device_modify)
{
if (!device->ops.modify_device)
- return -ENOSYS;
+ return -EOPNOTSUPP;
return device->ops.modify_device(device, device_modify_mask,
device_modify);
@@ -2342,8 +2390,12 @@ int ib_modify_port(struct ib_device *device,
rc = device->ops.modify_port(device, port_num,
port_modify_mask,
port_modify);
+ else if (rdma_protocol_roce(device, port_num) &&
+ ((port_modify->set_port_cap_mask & ~IB_PORT_CM_SUP) == 0 ||
+ (port_modify->clr_port_cap_mask & ~IB_PORT_CM_SUP) == 0))
+ rc = 0;
else
- rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS;
+ rc = -EOPNOTSUPP;
return rc;
}
EXPORT_SYMBOL(ib_modify_port);
@@ -2552,6 +2604,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, drain_sq);
SET_DEVICE_OP(dev_ops, enable_driver);
SET_DEVICE_OP(dev_ops, fill_res_entry);
+ SET_DEVICE_OP(dev_ops, fill_stat_entry);
SET_DEVICE_OP(dev_ops, get_dev_fw_str);
SET_DEVICE_OP(dev_ops, get_dma_mr);
SET_DEVICE_OP(dev_ops, get_hw_stats);
@@ -2560,6 +2613,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, get_port_immutable);
SET_DEVICE_OP(dev_ops, get_vector_affinity);
SET_DEVICE_OP(dev_ops, get_vf_config);
+ SET_DEVICE_OP(dev_ops, get_vf_guid);
SET_DEVICE_OP(dev_ops, get_vf_stats);
SET_DEVICE_OP(dev_ops, init_port);
SET_DEVICE_OP(dev_ops, iw_accept);
@@ -2574,6 +2628,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
SET_DEVICE_OP(dev_ops, map_phys_fmr);
SET_DEVICE_OP(dev_ops, mmap);
+ SET_DEVICE_OP(dev_ops, mmap_free);
SET_DEVICE_OP(dev_ops, modify_ah);
SET_DEVICE_OP(dev_ops, modify_cq);
SET_DEVICE_OP(dev_ops, modify_device);
@@ -2660,11 +2715,7 @@ static int __init ib_core_init(void)
goto err_comp_unbound;
}
- ret = rdma_nl_init();
- if (ret) {
- pr_warn("Couldn't init IB netlink interface: err %d\n", ret);
- goto err_sysfs;
- }
+ rdma_nl_init();
ret = addr_init();
if (ret) {
@@ -2711,8 +2762,6 @@ err_mad:
err_addr:
addr_cleanup();
err_ibnl:
- rdma_nl_exit();
-err_sysfs:
class_unregister(&ib_class);
err_comp_unbound:
destroy_workqueue(ib_comp_unbound_wq);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 7d841b689a1e..e08aec427027 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -148,13 +148,6 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
hlist_del_init(&fmr->cache_node);
fmr->remap_count = 0;
list_add_tail(&fmr->fmr->list, &fmr_list);
-
-#ifdef DEBUG
- if (fmr->ref_count !=0) {
- pr_warn(PFX "Unmapping FMR 0x%08x with ref count %d\n",
- fmr, fmr->ref_count);
- }
-#endif
}
list_splice_init(&pool->dirty_list, &unmap_list);
@@ -496,12 +489,6 @@ void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
}
}
-#ifdef DEBUG
- if (fmr->ref_count < 0)
- pr_warn(PFX "FMR %p has ref count %d < 0\n",
- fmr, fmr->ref_count);
-#endif
-
spin_unlock_irqrestore(&pool->pool_lock, flags);
}
EXPORT_SYMBOL(ib_fmr_pool_unmap);
diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c
new file mode 100644
index 000000000000..b51bd7087a88
--- /dev/null
+++ b/drivers/infiniband/core/ib_core_uverbs.c
@@ -0,0 +1,367 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2019 Marvell. All rights reserved.
+ */
+#include <linux/xarray.h>
+#include "uverbs.h"
+#include "core_priv.h"
+
+/**
+ * rdma_umap_priv_init() - Initialize the private data of a vma
+ *
+ * @priv: The already allocated private data
+ * @vma: The vm area struct that needs private data
+ * @entry: entry into the mmap_xa that needs to be linked with
+ * this vma
+ *
+ * Each time we map IO memory into user space this keeps track of the
+ * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
+ * to point to the zero page and allow the hot unplug to proceed.
+ *
+ * This is necessary for cases like PCI physical hot unplug as the actual BAR
+ * memory may vanish after this and access to it from userspace could MCE.
+ *
+ * RDMA drivers supporting disassociation must have their user space designed
+ * to cope in some way with their IO pages going to the zero page.
+ *
+ */
+void rdma_umap_priv_init(struct rdma_umap_priv *priv,
+ struct vm_area_struct *vma,
+ struct rdma_user_mmap_entry *entry)
+{
+ struct ib_uverbs_file *ufile = vma->vm_file->private_data;
+
+ priv->vma = vma;
+ if (entry) {
+ kref_get(&entry->ref);
+ priv->entry = entry;
+ }
+ vma->vm_private_data = priv;
+ /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
+
+ mutex_lock(&ufile->umap_lock);
+ list_add(&priv->list, &ufile->umaps);
+ mutex_unlock(&ufile->umap_lock);
+}
+EXPORT_SYMBOL(rdma_umap_priv_init);
+
+/**
+ * rdma_user_mmap_io() - Map IO memory into a process
+ *
+ * @ucontext: associated user context
+ * @vma: the vma related to the current mmap call
+ * @pfn: pfn to map
+ * @size: size to map
+ * @prot: pgprot to use in remap call
+ * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
+ * if mmap_entry is not used by the driver
+ *
+ * This is to be called by drivers as part of their mmap() functions if they
+ * wish to send something like PCI-E BAR memory to userspace.
+ *
+ * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
+ * success.
+ */
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+ unsigned long pfn, unsigned long size, pgprot_t prot,
+ struct rdma_user_mmap_entry *entry)
+{
+ struct ib_uverbs_file *ufile = ucontext->ufile;
+ struct rdma_umap_priv *priv;
+
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ if (vma->vm_end - vma->vm_start != size)
+ return -EINVAL;
+
+ /* Driver is using this wrong, must be called by ib_uverbs_mmap */
+ if (WARN_ON(!vma->vm_file ||
+ vma->vm_file->private_data != ufile))
+ return -EINVAL;
+ lockdep_assert_held(&ufile->device->disassociate_srcu);
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ vma->vm_page_prot = prot;
+ if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
+ kfree(priv);
+ return -EAGAIN;
+ }
+
+ rdma_umap_priv_init(priv, vma, entry);
+ return 0;
+}
+EXPORT_SYMBOL(rdma_user_mmap_io);
+
+/**
+ * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
+ *
+ * @ucontext: associated user context
+ * @pgoff: The mmap offset >> PAGE_SHIFT
+ *
+ * This function is called when a user tries to mmap with an offset (returned
+ * by rdma_user_mmap_get_offset()) it initially received from the driver. The
+ * rdma_user_mmap_entry was created by the function
+ * rdma_user_mmap_entry_insert(). This function increases the refcnt of the
+ * entry so that it won't be deleted from the xarray in the meantime.
+ *
+ * Return an reference to an entry if exists or NULL if there is no
+ * match. rdma_user_mmap_entry_put() must be called to put the reference.
+ */
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
+ unsigned long pgoff)
+{
+ struct rdma_user_mmap_entry *entry;
+
+ if (pgoff > U32_MAX)
+ return NULL;
+
+ xa_lock(&ucontext->mmap_xa);
+
+ entry = xa_load(&ucontext->mmap_xa, pgoff);
+
+ /*
+ * If refcount is zero, entry is already being deleted, driver_removed
+ * indicates that the no further mmaps are possible and we waiting for
+ * the active VMAs to be closed.
+ */
+ if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
+ !kref_get_unless_zero(&entry->ref))
+ goto err;
+
+ xa_unlock(&ucontext->mmap_xa);
+
+ ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n",
+ pgoff, entry->npages);
+
+ return entry;
+
+err:
+ xa_unlock(&ucontext->mmap_xa);
+ return NULL;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
+
+/**
+ * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
+ *
+ * @ucontext: associated user context
+ * @vma: the vma being mmap'd into
+ *
+ * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
+ * checks that the VMA is correct.
+ */
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma)
+{
+ struct rdma_user_mmap_entry *entry;
+
+ if (!(vma->vm_flags & VM_SHARED))
+ return NULL;
+ entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
+ if (!entry)
+ return NULL;
+ if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
+ rdma_user_mmap_entry_put(entry);
+ return NULL;
+ }
+ return entry;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_get);
+
+static void rdma_user_mmap_entry_free(struct kref *kref)
+{
+ struct rdma_user_mmap_entry *entry =
+ container_of(kref, struct rdma_user_mmap_entry, ref);
+ struct ib_ucontext *ucontext = entry->ucontext;
+ unsigned long i;
+
+ /*
+ * Erase all entries occupied by this single entry, this is deferred
+ * until all VMA are closed so that the mmap offsets remain unique.
+ */
+ xa_lock(&ucontext->mmap_xa);
+ for (i = 0; i < entry->npages; i++)
+ __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
+ xa_unlock(&ucontext->mmap_xa);
+
+ ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n",
+ entry->start_pgoff, entry->npages);
+
+ if (ucontext->device->ops.mmap_free)
+ ucontext->device->ops.mmap_free(entry);
+}
+
+/**
+ * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
+ *
+ * @entry: an entry in the mmap_xa
+ *
+ * This function is called when the mapping is closed if it was
+ * an io mapping or when the driver is done with the entry for
+ * some other reason.
+ * Should be called after rdma_user_mmap_entry_get was called
+ * and entry is no longer needed. This function will erase the
+ * entry and free it if its refcnt reaches zero.
+ */
+void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
+{
+ kref_put(&entry->ref, rdma_user_mmap_entry_free);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_put);
+
+/**
+ * rdma_user_mmap_entry_remove() - Drop reference to entry and
+ * mark it as unmmapable
+ *
+ * @entry: the entry to insert into the mmap_xa
+ *
+ * Drivers can call this to prevent userspace from creating more mappings for
+ * entry, however existing mmaps continue to exist and ops->mmap_free() will
+ * not be called until all user mmaps are destroyed.
+ */
+void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
+{
+ if (!entry)
+ return;
+
+ xa_lock(&entry->ucontext->mmap_xa);
+ entry->driver_removed = true;
+ xa_unlock(&entry->ucontext->mmap_xa);
+ kref_put(&entry->ref, rdma_user_mmap_entry_free);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
+
+/**
+ * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
+ * in a given range.
+ *
+ * @ucontext: associated user context.
+ * @entry: the entry to insert into the mmap_xa
+ * @length: length of the address that will be mmapped
+ * @min_pgoff: minimum pgoff to be returned
+ * @max_pgoff: maximum pgoff to be returned
+ *
+ * This function should be called by drivers that use the rdma_user_mmap
+ * interface for implementing their mmap syscall A database of mmap offsets is
+ * handled in the core and helper functions are provided to insert entries
+ * into the database and extract entries when the user calls mmap with the
+ * given offset. The function allocates a unique page offset in a given range
+ * that should be provided to user, the user will use the offset to retrieve
+ * information such as address to be mapped and how.
+ *
+ * Return: 0 on success and -ENOMEM on failure
+ */
+int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length, u32 min_pgoff,
+ u32 max_pgoff)
+{
+ struct ib_uverbs_file *ufile = ucontext->ufile;
+ XA_STATE(xas, &ucontext->mmap_xa, min_pgoff);
+ u32 xa_first, xa_last, npages;
+ int err;
+ u32 i;
+
+ if (!entry)
+ return -EINVAL;
+
+ kref_init(&entry->ref);
+ entry->ucontext = ucontext;
+
+ /*
+ * We want the whole allocation to be done without interruption from a
+ * different thread. The allocation requires finding a free range and
+ * storing. During the xa_insert the lock could be released, possibly
+ * allowing another thread to choose the same range.
+ */
+ mutex_lock(&ufile->umap_lock);
+
+ xa_lock(&ucontext->mmap_xa);
+
+ /* We want to find an empty range */
+ npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
+ entry->npages = npages;
+ while (true) {
+ /* First find an empty index */
+ xas_find_marked(&xas, max_pgoff, XA_FREE_MARK);
+ if (xas.xa_node == XAS_RESTART)
+ goto err_unlock;
+
+ xa_first = xas.xa_index;
+
+ /* Is there enough room to have the range? */
+ if (check_add_overflow(xa_first, npages, &xa_last))
+ goto err_unlock;
+
+ /*
+ * Now look for the next present entry. If an entry doesn't
+ * exist, we found an empty range and can proceed.
+ */
+ xas_next_entry(&xas, xa_last - 1);
+ if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last)
+ break;
+ }
+
+ for (i = xa_first; i < xa_last; i++) {
+ err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
+ if (err)
+ goto err_undo;
+ }
+
+ /*
+ * Internally the kernel uses a page offset, in libc this is a byte
+ * offset. Drivers should not return pgoff to userspace.
+ */
+ entry->start_pgoff = xa_first;
+ xa_unlock(&ucontext->mmap_xa);
+ mutex_unlock(&ufile->umap_lock);
+
+ ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n",
+ entry->start_pgoff, npages);
+
+ return 0;
+
+err_undo:
+ for (; i > xa_first; i--)
+ __xa_erase(&ucontext->mmap_xa, i - 1);
+
+err_unlock:
+ xa_unlock(&ucontext->mmap_xa);
+ mutex_unlock(&ufile->umap_lock);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
+
+/**
+ * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
+ *
+ * @ucontext: associated user context.
+ * @entry: the entry to insert into the mmap_xa
+ * @length: length of the address that will be mmapped
+ *
+ * This function should be called by drivers that use the rdma_user_mmap
+ * interface for handling user mmapped addresses. The database is handled in
+ * the core and helper functions are provided to insert entries into the
+ * database and extract entries when the user calls mmap with the given offset.
+ * The function allocates a unique page offset that should be provided to user,
+ * the user will use the offset to retrieve information such as address to
+ * be mapped and how.
+ *
+ * Return: 0 on success and -ENOMEM on failure
+ */
+int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length)
+{
+ return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
+ U32_MAX);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 72141c5b7c95..ade71823370f 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -372,6 +372,7 @@ EXPORT_SYMBOL(iw_cm_disconnect);
static void destroy_cm_id(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
+ struct ib_qp *qp;
unsigned long flags;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
@@ -389,6 +390,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);
spin_lock_irqsave(&cm_id_priv->lock, flags);
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
+
switch (cm_id_priv->state) {
case IW_CM_STATE_LISTEN:
cm_id_priv->state = IW_CM_STATE_DESTROYING;
@@ -401,7 +405,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
cm_id_priv->state = IW_CM_STATE_DESTROYING;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
/* Abrupt close of the connection */
- (void)iwcm_modify_qp_err(cm_id_priv->qp);
+ (void)iwcm_modify_qp_err(qp);
spin_lock_irqsave(&cm_id_priv->lock, flags);
break;
case IW_CM_STATE_IDLE:
@@ -426,11 +430,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
BUG();
break;
}
- if (cm_id_priv->qp) {
- cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
- cm_id_priv->qp = NULL;
- }
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id_priv->id.device->ops.iw_rem_ref(qp);
if (cm_id->mapped) {
iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr);
@@ -671,11 +673,11 @@ int iw_cm_accept(struct iw_cm_id *cm_id,
BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
cm_id_priv->state = IW_CM_STATE_IDLE;
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id_priv->qp) {
- cm_id->device->ops.iw_rem_ref(qp);
- cm_id_priv->qp = NULL;
- }
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id->device->ops.iw_rem_ref(qp);
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
wake_up_all(&cm_id_priv->connect_wait);
}
@@ -696,7 +698,7 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
struct iwcm_id_private *cm_id_priv;
int ret;
unsigned long flags;
- struct ib_qp *qp;
+ struct ib_qp *qp = NULL;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
@@ -730,13 +732,13 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
return 0; /* success */
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id_priv->qp) {
- cm_id->device->ops.iw_rem_ref(qp);
- cm_id_priv->qp = NULL;
- }
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
cm_id_priv->state = IW_CM_STATE_IDLE;
err:
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id->device->ops.iw_rem_ref(qp);
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
wake_up_all(&cm_id_priv->connect_wait);
return ret;
@@ -878,6 +880,7 @@ static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
struct iw_cm_event *iw_event)
{
+ struct ib_qp *qp = NULL;
unsigned long flags;
int ret;
@@ -896,11 +899,13 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
} else {
/* REJECTED or RESET */
- cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
+ qp = cm_id_priv->qp;
cm_id_priv->qp = NULL;
cm_id_priv->state = IW_CM_STATE_IDLE;
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id_priv->id.device->ops.iw_rem_ref(qp);
ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
if (iw_event->private_data_len)
@@ -942,21 +947,18 @@ static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
struct iw_cm_event *iw_event)
{
+ struct ib_qp *qp;
unsigned long flags;
- int ret = 0;
+ int ret = 0, notify_event = 0;
spin_lock_irqsave(&cm_id_priv->lock, flags);
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
- if (cm_id_priv->qp) {
- cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
- cm_id_priv->qp = NULL;
- }
switch (cm_id_priv->state) {
case IW_CM_STATE_ESTABLISHED:
case IW_CM_STATE_CLOSING:
cm_id_priv->state = IW_CM_STATE_IDLE;
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
+ notify_event = 1;
break;
case IW_CM_STATE_DESTROYING:
break;
@@ -965,6 +967,10 @@ static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id_priv->id.device->ops.iw_rem_ref(qp);
+ if (notify_event)
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
return ret;
}
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 2452b0ddcf0d..46686990a827 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -112,7 +112,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n",
__func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name);
- ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
+ ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNAVAILABLE;
@@ -124,8 +124,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
return ret;
pid_query_error:
pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
- if (skb)
- dev_kfree_skb(skb);
+ dev_kfree_skb(skb);
if (nlmsg_request)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
@@ -202,7 +201,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
nlmsg_end(skb, nlh);
nlmsg_request->req_buffer = pm_msg;
- ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
+ ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNDEFINED;
@@ -214,8 +213,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
add_mapping_error:
pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
add_mapping_error_nowarn:
- if (skb)
- dev_kfree_skb(skb);
+ dev_kfree_skb(skb);
if (nlmsg_request)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
@@ -297,7 +295,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
nlmsg_end(skb, nlh);
nlmsg_request->req_buffer = pm_msg;
- ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
+ ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
err_str = "Unable to send a nlmsg";
@@ -308,8 +306,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
query_mapping_error:
pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
query_mapping_error_nowarn:
- if (skb)
- dev_kfree_skb(skb);
+ dev_kfree_skb(skb);
if (nlmsg_request)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
@@ -364,7 +361,7 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
nlmsg_end(skb, nlh);
- ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
+ ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNDEFINED;
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 41929bb83739..13495b43dbc1 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -645,7 +645,7 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
nlmsg_end(skb, nlh);
- ret = rdma_nl_unicast(skb, iwpm_pid);
+ ret = rdma_nl_unicast(&init_net, skb, iwpm_pid);
if (ret) {
skb = NULL;
err_str = "Unable to send a nlmsg";
@@ -655,8 +655,7 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
return 0;
mapinfo_num_error:
pr_info("%s: %s\n", __func__, err_str);
- if (skb)
- dev_kfree_skb(skb);
+ dev_kfree_skb(skb);
return ret;
}
@@ -674,7 +673,7 @@ static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid)
return -ENOMEM;
}
nlh->nlmsg_type = NLMSG_DONE;
- ret = rdma_nl_unicast(skb, iwpm_pid);
+ ret = rdma_nl_unicast(&init_net, skb, iwpm_pid);
if (ret)
pr_warn("%s Unable to send a nlmsg\n", __func__);
return ret;
@@ -778,8 +777,7 @@ send_mapping_info_unlock:
send_mapping_info_exit:
if (ret) {
pr_warn("%s: %s (ret = %d)\n", __func__, err_str, ret);
- if (skb)
- dev_kfree_skb(skb);
+ dev_kfree_skb(skb);
return ret;
}
send_nlmsg_done(skb, nl_client, iwpm_pid);
@@ -824,7 +822,7 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version)
goto hello_num_error;
nlmsg_end(skb, nlh);
- ret = rdma_nl_unicast(skb, iwpm_pid);
+ ret = rdma_nl_unicast(&init_net, skb, iwpm_pid);
if (ret) {
skb = NULL;
err_str = "Unable to send a nlmsg";
@@ -834,7 +832,6 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version)
return 0;
hello_num_error:
pr_info("%s: %s\n", __func__, err_str);
- if (skb)
- dev_kfree_skb(skb);
+ dev_kfree_skb(skb);
return ret;
}
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
index 7e2bcc72f66c..1bf87d9fd0bd 100644
--- a/drivers/infiniband/core/iwpm_util.h
+++ b/drivers/infiniband/core/iwpm_util.h
@@ -210,8 +210,10 @@ int iwpm_mapinfo_available(void);
/**
* iwpm_compare_sockaddr - Compare two sockaddr storage structs
+ * @a_sockaddr: first sockaddr to compare
+ * @b_sockaddr: second sockaddr to compare
*
- * Returns 0 if they are holding the same ip/tcp address info,
+ * Return: 0 if they are holding the same ip/tcp address info,
* otherwise returns 1
*/
int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
@@ -272,6 +274,7 @@ void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg);
* iwpm_send_hello - Send hello response to iwpmd
*
* @nl_client: The index of the netlink client
+ * @iwpm_pid: The pid of the user space port mapper
* @abi_version: The kernel's abi_version
*
* Returns 0 on success or a negative error code
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 9947d16edef2..c54db13fa9b0 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -913,9 +913,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
/* No GRH for DR SMP */
ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL,
- (const struct ib_mad_hdr *)smp, mad_size,
- (struct ib_mad_hdr *)mad_priv->mad,
- &mad_size, &out_mad_pkey_index);
+ (const struct ib_mad *)smp,
+ (struct ib_mad *)mad_priv->mad, &mad_size,
+ &out_mad_pkey_index);
switch (ret)
{
case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
@@ -1397,25 +1397,6 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
}
EXPORT_SYMBOL(ib_free_recv_mad);
-struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
- u8 rmpp_version,
- ib_mad_send_handler send_handler,
- ib_mad_recv_handler recv_handler,
- void *context)
-{
- return ERR_PTR(-EINVAL); /* XXX: for now */
-}
-EXPORT_SYMBOL(ib_redirect_mad_qp);
-
-int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
- struct ib_wc *wc)
-{
- dev_err(&mad_agent->device->dev,
- "ib_process_mad_wc() not implemented yet\n");
- return 0;
-}
-EXPORT_SYMBOL(ib_process_mad_wc);
-
static int method_in_use(struct ib_mad_mgmt_method_table **method,
struct ib_mad_reg_req *mad_reg_req)
{
@@ -2340,9 +2321,9 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
if (port_priv->device->ops.process_mad) {
ret = port_priv->device->ops.process_mad(
port_priv->device, 0, port_priv->port_num, wc,
- &recv->grh, (const struct ib_mad_hdr *)recv->mad,
- recv->mad_size, (struct ib_mad_hdr *)response->mad,
- &mad_size, &resp_mad_pkey_index);
+ &recv->grh, (const struct ib_mad *)recv->mad,
+ (struct ib_mad *)response->mad, &mad_size,
+ &resp_mad_pkey_index);
if (opa)
wc->pkey_index = resp_mad_pkey_index;
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index eecfc0b377c9..8cd31ef25eff 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -36,20 +36,25 @@
#include <linux/export.h>
#include <net/netlink.h>
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/sock.h>
#include <rdma/rdma_netlink.h>
#include <linux/module.h>
#include "core_priv.h"
-static DEFINE_MUTEX(rdma_nl_mutex);
-static struct sock *nls;
static struct {
- const struct rdma_nl_cbs *cb_table;
+ const struct rdma_nl_cbs *cb_table;
+ /* Synchronizes between ongoing netlink commands and netlink client
+ * unregistration.
+ */
+ struct rw_semaphore sem;
} rdma_nl_types[RDMA_NL_NUM_CLIENTS];
bool rdma_nl_chk_listeners(unsigned int group)
{
- return netlink_has_listeners(nls, group);
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(&init_net);
+
+ return netlink_has_listeners(rnet->nl_sock, group);
}
EXPORT_SYMBOL(rdma_nl_chk_listeners);
@@ -73,62 +78,53 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op)
return (op < max_num_ops[type]) ? true : false;
}
-static bool is_nl_valid(unsigned int type, unsigned int op)
+static const struct rdma_nl_cbs *
+get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op)
{
const struct rdma_nl_cbs *cb_table;
- if (!is_nl_msg_valid(type, op))
- return false;
+ /*
+ * Currently only NLDEV client is supporting netlink commands in
+ * non init_net net namespace.
+ */
+ if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV)
+ return NULL;
- if (!rdma_nl_types[type].cb_table) {
- mutex_unlock(&rdma_nl_mutex);
- request_module("rdma-netlink-subsys-%d", type);
- mutex_lock(&rdma_nl_mutex);
- }
+ cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
+ if (!cb_table) {
+ /*
+ * Didn't get valid reference of the table, attempt module
+ * load once.
+ */
+ up_read(&rdma_nl_types[type].sem);
- cb_table = rdma_nl_types[type].cb_table;
+ request_module("rdma-netlink-subsys-%d", type);
+ down_read(&rdma_nl_types[type].sem);
+ cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
+ }
if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
- return false;
- return true;
+ return NULL;
+ return cb_table;
}
void rdma_nl_register(unsigned int index,
const struct rdma_nl_cbs cb_table[])
{
- mutex_lock(&rdma_nl_mutex);
- if (!is_nl_msg_valid(index, 0)) {
- /*
- * All clients are not interesting in success/failure of
- * this call. They want to see the print to error log and
- * continue their initialization. Print warning for them,
- * because it is programmer's error to be here.
- */
- mutex_unlock(&rdma_nl_mutex);
- WARN(true,
- "The not-valid %u index was supplied to RDMA netlink\n",
- index);
+ if (WARN_ON(!is_nl_msg_valid(index, 0)) ||
+ WARN_ON(READ_ONCE(rdma_nl_types[index].cb_table)))
return;
- }
- if (rdma_nl_types[index].cb_table) {
- mutex_unlock(&rdma_nl_mutex);
- WARN(true,
- "The %u index is already registered in RDMA netlink\n",
- index);
- return;
- }
-
- rdma_nl_types[index].cb_table = cb_table;
- mutex_unlock(&rdma_nl_mutex);
+ /* Pairs with the READ_ONCE in is_nl_valid() */
+ smp_store_release(&rdma_nl_types[index].cb_table, cb_table);
}
EXPORT_SYMBOL(rdma_nl_register);
void rdma_nl_unregister(unsigned int index)
{
- mutex_lock(&rdma_nl_mutex);
+ down_write(&rdma_nl_types[index].sem);
rdma_nl_types[index].cb_table = NULL;
- mutex_unlock(&rdma_nl_mutex);
+ up_write(&rdma_nl_types[index].sem);
}
EXPORT_SYMBOL(rdma_nl_unregister);
@@ -160,15 +156,21 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
unsigned int index = RDMA_NL_GET_CLIENT(type);
unsigned int op = RDMA_NL_GET_OP(type);
const struct rdma_nl_cbs *cb_table;
+ int err = -EINVAL;
- if (!is_nl_valid(index, op))
+ if (!is_nl_msg_valid(index, op))
return -EINVAL;
- cb_table = rdma_nl_types[index].cb_table;
+ down_read(&rdma_nl_types[index].sem);
+ cb_table = get_cb_table(skb, index, op);
+ if (!cb_table)
+ goto done;
if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) &&
- !netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
+ !netlink_capable(skb, CAP_NET_ADMIN)) {
+ err = -EPERM;
+ goto done;
+ }
/*
* LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't
@@ -176,8 +178,8 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
*/
if (index == RDMA_NL_LS) {
if (cb_table[op].doit)
- return cb_table[op].doit(skb, nlh, extack);
- return -EINVAL;
+ err = cb_table[op].doit(skb, nlh, extack);
+ goto done;
}
/* FIXME: Convert IWCM to properly handle doit callbacks */
if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) {
@@ -185,14 +187,15 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
.dump = cb_table[op].dump,
};
if (c.dump)
- return netlink_dump_start(nls, skb, nlh, &c);
- return -EINVAL;
+ err = netlink_dump_start(skb->sk, skb, nlh, &c);
+ goto done;
}
if (cb_table[op].doit)
- return cb_table[op].doit(skb, nlh, extack);
-
- return 0;
+ err = cb_table[op].doit(skb, nlh, extack);
+done:
+ up_read(&rdma_nl_types[index].sem);
+ return err;
}
/*
@@ -253,57 +256,76 @@ skip:
static void rdma_nl_rcv(struct sk_buff *skb)
{
- mutex_lock(&rdma_nl_mutex);
rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg);
- mutex_unlock(&rdma_nl_mutex);
}
-int rdma_nl_unicast(struct sk_buff *skb, u32 pid)
+int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid)
{
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
int err;
- err = netlink_unicast(nls, skb, pid, MSG_DONTWAIT);
+ err = netlink_unicast(rnet->nl_sock, skb, pid, MSG_DONTWAIT);
return (err < 0) ? err : 0;
}
EXPORT_SYMBOL(rdma_nl_unicast);
-int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid)
+int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid)
{
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
int err;
- err = netlink_unicast(nls, skb, pid, 0);
+ err = netlink_unicast(rnet->nl_sock, skb, pid, 0);
return (err < 0) ? err : 0;
}
EXPORT_SYMBOL(rdma_nl_unicast_wait);
-int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags)
+int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
+ unsigned int group, gfp_t flags)
{
- return nlmsg_multicast(nls, skb, 0, group, flags);
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
+
+ return nlmsg_multicast(rnet->nl_sock, skb, 0, group, flags);
}
EXPORT_SYMBOL(rdma_nl_multicast);
-int __init rdma_nl_init(void)
+void rdma_nl_init(void)
+{
+ int idx;
+
+ for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
+ init_rwsem(&rdma_nl_types[idx].sem);
+}
+
+void rdma_nl_exit(void)
+{
+ int idx;
+
+ for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
+ WARN(rdma_nl_types[idx].cb_table,
+ "Netlink client %d wasn't released prior to unloading %s\n",
+ idx, KBUILD_MODNAME);
+}
+
+int rdma_nl_net_init(struct rdma_dev_net *rnet)
{
+ struct net *net = read_pnet(&rnet->net);
struct netlink_kernel_cfg cfg = {
.input = rdma_nl_rcv,
};
+ struct sock *nls;
- nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg);
+ nls = netlink_kernel_create(net, NETLINK_RDMA, &cfg);
if (!nls)
return -ENOMEM;
nls->sk_sndtimeo = 10 * HZ;
+ rnet->nl_sock = nls;
return 0;
}
-void rdma_nl_exit(void)
+void rdma_nl_net_exit(struct rdma_dev_net *rnet)
{
- int idx;
-
- for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
- rdma_nl_unregister(idx);
-
- netlink_kernel_release(nls);
+ netlink_kernel_release(rnet->nl_sock);
}
MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_RDMA);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 87d40d1ecdde..37b433aa7306 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -41,6 +41,10 @@
#include "core_priv.h"
#include "cma_priv.h"
#include "restrack.h"
+#include "uverbs.h"
+
+typedef int (*res_fill_func_t)(struct sk_buff*, bool,
+ struct rdma_restrack_entry*, uint32_t);
/*
* Sort array elements by the netlink attribute name
@@ -180,6 +184,19 @@ static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
return 0;
}
+int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
+ const char *str)
+{
+ if (put_driver_name_print_type(msg, name,
+ RDMA_NLDEV_PRINT_TYPE_UNSPEC))
+ return -EMSGSIZE;
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
+ return -EMSGSIZE;
+
+ return 0;
+}
+EXPORT_SYMBOL(rdma_nl_put_driver_string);
+
int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
{
return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
@@ -382,8 +399,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
if (!names[i])
continue;
- curr = rdma_restrack_count(device, i,
- task_active_pid_ns(current));
+ curr = rdma_restrack_count(device, i);
ret = fill_res_info_entry(msg, names[i], curr);
if (ret)
goto err;
@@ -400,20 +416,34 @@ err:
static int fill_res_name_pid(struct sk_buff *msg,
struct rdma_restrack_entry *res)
{
+ int err = 0;
+
/*
* For user resources, user is should read /proc/PID/comm to get the
* name of the task file.
*/
if (rdma_is_kernel_res(res)) {
- if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
- res->kern_name))
- return -EMSGSIZE;
+ err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
+ res->kern_name);
} else {
- if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
- task_pid_vnr(res->task)))
- return -EMSGSIZE;
+ pid_t pid;
+
+ pid = task_pid_vnr(res->task);
+ /*
+ * Task is dead and in zombie state.
+ * There is no need to print PID anymore.
+ */
+ if (pid)
+ /*
+ * This part is racy, task can be killed and PID will
+ * be zero right here but it is ok, next query won't
+ * return PID. We don't promise real-time reflection
+ * of SW objects.
+ */
+ err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
}
- return 0;
+
+ return err ? -EMSGSIZE : 0;
}
static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
@@ -424,6 +454,14 @@ static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
return dev->ops.fill_res_entry(msg, res);
}
+static bool fill_stat_entry(struct ib_device *dev, struct sk_buff *msg,
+ struct rdma_restrack_entry *res)
+{
+ if (!dev->ops.fill_stat_entry)
+ return false;
+ return dev->ops.fill_stat_entry(msg, res);
+}
+
static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
{
@@ -562,7 +600,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
goto err;
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
- cq->uobject->context->res.id))
+ cq->uobject->uevent.uobject.context->res.id))
goto err;
if (fill_res_name_pid(msg, res))
@@ -699,9 +737,6 @@ static int fill_stat_counter_qps(struct sk_buff *msg,
rt = &counter->device->res[RDMA_RESTRACK_QP];
xa_lock(&rt->xa);
xa_for_each(&rt->xa, id, res) {
- if (!rdma_is_visible_in_pid_ns(res))
- continue;
-
qp = container_of(res, struct ib_qp, res);
if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
continue;
@@ -724,8 +759,8 @@ err:
return ret;
}
-static int fill_stat_hwcounter_entry(struct sk_buff *msg,
- const char *name, u64 value)
+int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
+ u64 value)
{
struct nlattr *entry_attr;
@@ -747,6 +782,25 @@ err:
nla_nest_cancel(msg, entry_attr);
return -EMSGSIZE;
}
+EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
+
+static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_mr *mr = container_of(res, struct ib_mr, res);
+ struct ib_device *dev = mr->pd->device;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
+ goto err;
+
+ if (fill_stat_entry(dev, msg, res))
+ goto err;
+
+ return 0;
+
+err:
+ return -EMSGSIZE;
+}
static int fill_stat_counter_hwcounters(struct sk_buff *msg,
struct rdma_counter *counter)
@@ -760,7 +814,7 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg,
return -EMSGSIZE;
for (i = 0; i < st->num_counters; i++)
- if (fill_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
+ if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
goto err;
nla_nest_end(msg, table_attr);
@@ -779,7 +833,7 @@ static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
container_of(res, struct rdma_counter, res);
if (port && port != counter->port)
- return 0;
+ return -EAGAIN;
/* Dump it even query failed */
rdma_counter_query_stats(counter);
@@ -832,7 +886,7 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
@@ -972,7 +1026,7 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
@@ -1074,7 +1128,7 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
@@ -1118,8 +1172,6 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,
}
struct nldev_fill_res_entry {
- int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin,
- struct rdma_restrack_entry *res, u32 port);
enum rdma_nldev_attr nldev_attr;
enum rdma_nldev_command nldev_cmd;
u8 flags;
@@ -1133,21 +1185,18 @@ enum nldev_res_flags {
static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
[RDMA_RESTRACK_QP] = {
- .fill_res_func = fill_res_qp_entry,
.nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_LQPN,
},
[RDMA_RESTRACK_CM_ID] = {
- .fill_res_func = fill_res_cm_id_entry,
.nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
},
[RDMA_RESTRACK_CQ] = {
- .fill_res_func = fill_res_cq_entry,
.nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
.flags = NLDEV_PER_DEV,
@@ -1155,7 +1204,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
.id = RDMA_NLDEV_ATTR_RES_CQN,
},
[RDMA_RESTRACK_MR] = {
- .fill_res_func = fill_res_mr_entry,
.nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
.flags = NLDEV_PER_DEV,
@@ -1163,7 +1211,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
.id = RDMA_NLDEV_ATTR_RES_MRN,
},
[RDMA_RESTRACK_PD] = {
- .fill_res_func = fill_res_pd_entry,
.nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
.flags = NLDEV_PER_DEV,
@@ -1171,7 +1218,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
.id = RDMA_NLDEV_ATTR_RES_PDN,
},
[RDMA_RESTRACK_COUNTER] = {
- .fill_res_func = fill_res_counter_entry,
.nldev_cmd = RDMA_NLDEV_CMD_STAT_GET,
.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
@@ -1181,7 +1227,8 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack,
- enum rdma_restrack_type res_type)
+ enum rdma_restrack_type res_type,
+ res_fill_func_t fill_func)
{
const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
@@ -1223,15 +1270,10 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err;
}
- if (!rdma_is_visible_in_pid_ns(res)) {
- ret = -ENOENT;
- goto err_get;
- }
-
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
- goto err;
+ goto err_get;
}
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
@@ -1244,14 +1286,16 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
}
has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
- ret = fe->fill_res_func(msg, has_cap_net_admin, res, port);
+
+ ret = fill_func(msg, has_cap_net_admin, res, port);
+
rdma_restrack_put(res);
if (ret)
goto err_free;
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
@@ -1264,7 +1308,8 @@ err:
static int res_get_common_dumpit(struct sk_buff *skb,
struct netlink_callback *cb,
- enum rdma_restrack_type res_type)
+ enum rdma_restrack_type res_type,
+ res_fill_func_t fill_func)
{
const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
@@ -1335,9 +1380,6 @@ static int res_get_common_dumpit(struct sk_buff *skb,
* objects.
*/
xa_for_each(&rt->xa, id, res) {
- if (!rdma_is_visible_in_pid_ns(res))
- continue;
-
if (idx < start || !rdma_restrack_get(res))
goto next;
@@ -1352,7 +1394,8 @@ static int res_get_common_dumpit(struct sk_buff *skb,
goto msg_full;
}
- ret = fe->fill_res_func(skb, has_cap_net_admin, res, port);
+ ret = fill_func(skb, has_cap_net_admin, res, port);
+
rdma_restrack_put(res);
if (ret) {
@@ -1395,17 +1438,19 @@ err_index:
return ret;
}
-#define RES_GET_FUNCS(name, type) \
- static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
+#define RES_GET_FUNCS(name, type) \
+ static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
struct netlink_callback *cb) \
- { \
- return res_get_common_dumpit(skb, cb, type); \
- } \
- static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
- struct nlmsghdr *nlh, \
+ { \
+ return res_get_common_dumpit(skb, cb, type, \
+ fill_res_##name##_entry); \
+ } \
+ static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
+ struct nlmsghdr *nlh, \
struct netlink_ext_ack *extack) \
- { \
- return res_get_common_doit(skb, nlh, extack, type); \
+ { \
+ return res_get_common_doit(skb, nlh, extack, type, \
+ fill_res_##name##_entry); \
}
RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
@@ -1596,7 +1641,7 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
put_device(data.cdev);
if (ibdev)
ib_device_put(ibdev);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
out_data:
put_device(data.cdev);
@@ -1636,7 +1681,7 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
return err;
}
nlmsg_end(msg, nlh);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
}
static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1734,7 +1779,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_fill:
rdma_counter_unbind_qpn(device, port, qpn, cntn);
@@ -1788,10 +1833,6 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
- ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
- if (ret)
- goto err_unbind;
-
if (fill_nldev_handle(msg, device) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
@@ -1800,13 +1841,15 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err_fill;
}
+ ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
+ if (ret)
+ goto err_fill;
+
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_fill:
- rdma_counter_bind_qpn(device, port, qpn, cntn);
-err_unbind:
nlmsg_free(msg);
err:
ib_device_put(device);
@@ -1883,7 +1926,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
for (i = 0; i < num_cnts; i++) {
v = stats->value[i] +
rdma_counter_get_hwstat_value(device, port, i);
- if (fill_stat_hwcounter_entry(msg, stats->names[i], v)) {
+ if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) {
ret = -EMSGSIZE;
goto err_table;
}
@@ -1893,7 +1936,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
mutex_unlock(&stats->lock);
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_table:
nla_nest_cancel(msg, table_attr);
@@ -1965,7 +2008,7 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_msg:
nlmsg_free(msg);
@@ -1992,7 +2035,10 @@ static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
case RDMA_NLDEV_ATTR_RES_QP:
ret = stat_get_doit_qp(skb, nlh, extack, tb);
break;
-
+ case RDMA_NLDEV_ATTR_RES_MR:
+ ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
+ fill_stat_mr_entry);
+ break;
default:
ret = -EINVAL;
break;
@@ -2016,7 +2062,10 @@ static int nldev_stat_get_dumpit(struct sk_buff *skb,
case RDMA_NLDEV_ATTR_RES_QP:
ret = nldev_res_get_counter_dumpit(skb, cb);
break;
-
+ case RDMA_NLDEV_ATTR_RES_MR:
+ ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
+ fill_stat_mr_entry);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index ccf4d069c25c..5128cb16bb48 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -42,26 +42,21 @@
#include "core_priv.h"
#include "rdma_core.h"
-void uverbs_uobject_get(struct ib_uobject *uobject)
-{
- kref_get(&uobject->ref);
-}
-
static void uverbs_uobject_free(struct kref *ref)
{
- struct ib_uobject *uobj =
- container_of(ref, struct ib_uobject, ref);
-
- if (uobj->uapi_object->type_class->needs_kfree_rcu)
- kfree_rcu(uobj, rcu);
- else
- kfree(uobj);
+ kfree_rcu(container_of(ref, struct ib_uobject, ref), rcu);
}
+/*
+ * In order to indicate we no longer needs this uobject, uverbs_uobject_put
+ * is called. When the reference count is decreased, the uobject is freed.
+ * For example, this is used when attaching a completion channel to a CQ.
+ */
void uverbs_uobject_put(struct ib_uobject *uobject)
{
kref_put(&uobject->ref, uverbs_uobject_free);
}
+EXPORT_SYMBOL(uverbs_uobject_put);
static int uverbs_try_lock_object(struct ib_uobject *uobj,
enum rdma_lookup_mode mode)
@@ -135,7 +130,11 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
lockdep_assert_held(&ufile->hw_destroy_rwsem);
assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
- if (uobj->object) {
+ if (reason == RDMA_REMOVE_ABORT) {
+ WARN_ON(!list_empty(&uobj->list));
+ WARN_ON(!uobj->context);
+ uobj->uapi_object->type_class->alloc_abort(uobj);
+ } else if (uobj->object) {
ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
attrs);
if (ret) {
@@ -151,12 +150,6 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
uobj->object = NULL;
}
- if (reason == RDMA_REMOVE_ABORT) {
- WARN_ON(!list_empty(&uobj->list));
- WARN_ON(!uobj->context);
- uobj->uapi_object->type_class->alloc_abort(uobj);
- }
-
uobj->context = NULL;
/*
@@ -263,15 +256,20 @@ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
}
/* alloc_uobj must be undone by uverbs_destroy_uobject() */
-static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
+static struct ib_uobject *alloc_uobj(struct uverbs_attr_bundle *attrs,
const struct uverbs_api_object *obj)
{
+ struct ib_uverbs_file *ufile = attrs->ufile;
struct ib_uobject *uobj;
- struct ib_ucontext *ucontext;
- ucontext = ib_uverbs_get_ucontext_file(ufile);
- if (IS_ERR(ucontext))
- return ERR_CAST(ucontext);
+ if (!attrs->context) {
+ struct ib_ucontext *ucontext =
+ ib_uverbs_get_ucontext_file(ufile);
+
+ if (IS_ERR(ucontext))
+ return ERR_CAST(ucontext);
+ attrs->context = ucontext;
+ }
uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL);
if (!uobj)
@@ -281,7 +279,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
* The object is added to the list in the commit stage.
*/
uobj->ufile = ufile;
- uobj->context = ucontext;
+ uobj->context = attrs->context;
INIT_LIST_HEAD(&uobj->list);
uobj->uapi_object = obj;
/*
@@ -358,9 +356,9 @@ lookup_get_fd_uobject(const struct uverbs_api_object *obj,
uobject = f->private_data;
/*
- * fget(id) ensures we are not currently running uverbs_close_fd,
- * and the caller is expected to ensure that uverbs_close_fd is never
- * done while a call top lookup is possible.
+ * fget(id) ensures we are not currently running
+ * uverbs_uobject_fd_release(), and the caller is expected to ensure
+ * that release is never done while a call to lookup is possible.
*/
if (f->f_op != fd_type->fops) {
fput(f);
@@ -424,12 +422,12 @@ free:
static struct ib_uobject *
alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
- struct ib_uverbs_file *ufile)
+ struct uverbs_attr_bundle *attrs)
{
int ret;
struct ib_uobject *uobj;
- uobj = alloc_uobj(ufile, obj);
+ uobj = alloc_uobj(attrs, obj);
if (IS_ERR(uobj))
return uobj;
@@ -445,7 +443,7 @@ alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
return uobj;
remove:
- xa_erase(&ufile->idr, uobj->id);
+ xa_erase(&attrs->ufile->idr, uobj->id);
uobj_put:
uverbs_uobject_put(uobj);
return ERR_PTR(ret);
@@ -453,31 +451,48 @@ uobj_put:
static struct ib_uobject *
alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
- struct ib_uverbs_file *ufile)
+ struct uverbs_attr_bundle *attrs)
{
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
int new_fd;
struct ib_uobject *uobj;
+ struct file *filp;
+
+ if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release))
+ return ERR_PTR(-EINVAL);
new_fd = get_unused_fd_flags(O_CLOEXEC);
if (new_fd < 0)
return ERR_PTR(new_fd);
- uobj = alloc_uobj(ufile, obj);
- if (IS_ERR(uobj)) {
- put_unused_fd(new_fd);
- return uobj;
+ uobj = alloc_uobj(attrs, obj);
+ if (IS_ERR(uobj))
+ goto err_fd;
+
+ /* Note that uverbs_uobject_fd_release() is called during abort */
+ filp = anon_inode_getfile(fd_type->name, fd_type->fops, NULL,
+ fd_type->flags);
+ if (IS_ERR(filp)) {
+ uobj = ERR_CAST(filp);
+ goto err_uobj;
}
+ uobj->object = filp;
uobj->id = new_fd;
- uobj->ufile = ufile;
+ return uobj;
+err_uobj:
+ uverbs_uobject_put(uobj);
+err_fd:
+ put_unused_fd(new_fd);
return uobj;
}
struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
- struct ib_uverbs_file *ufile,
struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_file *ufile = attrs->ufile;
struct ib_uobject *ret;
if (IS_ERR(obj))
@@ -491,13 +506,11 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
if (!down_read_trylock(&ufile->hw_destroy_rwsem))
return ERR_PTR(-EIO);
- ret = obj->type_class->alloc_begin(obj, ufile);
+ ret = obj->type_class->alloc_begin(obj, attrs);
if (IS_ERR(ret)) {
up_read(&ufile->hw_destroy_rwsem);
return ret;
}
- if (attrs)
- attrs->context = ret->context;
return ret;
}
@@ -544,6 +557,9 @@ static void remove_handle_idr_uobject(struct ib_uobject *uobj)
static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
{
+ struct file *filp = uobj->object;
+
+ fput(filp);
put_unused_fd(uobj->id);
}
@@ -553,7 +569,7 @@ static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
{
const struct uverbs_obj_fd_type *fd_type = container_of(
uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
- int ret = fd_type->context_closed(uobj, why);
+ int ret = fd_type->destroy_object(uobj, why);
if (ib_is_destroy_retryable(ret, why, uobj))
return ret;
@@ -565,7 +581,7 @@ static void remove_handle_fd_uobject(struct ib_uobject *uobj)
{
}
-static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
+static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
{
struct ib_uverbs_file *ufile = uobj->ufile;
void *old;
@@ -579,33 +595,14 @@ static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
*/
old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL);
WARN_ON(old != NULL);
-
- return 0;
}
-static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
+static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
{
- const struct uverbs_obj_fd_type *fd_type = container_of(
- uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
int fd = uobj->id;
- struct file *filp;
-
- /*
- * The kref for uobj is moved into filp->private data and put in
- * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd()
- * must be guaranteed to be called from the provided fops release
- * callback.
- */
- filp = anon_inode_getfile(fd_type->name,
- fd_type->fops,
- uobj,
- fd_type->flags);
- if (IS_ERR(filp))
- return PTR_ERR(filp);
-
- uobj->object = filp;
+ struct file *filp = uobj->object;
- /* Matching put will be done in uverbs_close_fd() */
+ /* Matching put will be done in uverbs_uobject_fd_release() */
kref_get(&uobj->ufile->ref);
/* This shouldn't be used anymore. Use the file object instead */
@@ -613,11 +610,10 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
/*
* NOTE: Once we install the file we loose ownership of our kref on
- * uobj. It will be put by uverbs_close_fd()
+ * uobj. It will be put by uverbs_uobject_fd_release()
*/
+ filp->private_data = uobj;
fd_install(fd, filp);
-
- return 0;
}
/*
@@ -625,19 +621,13 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
* caller can no longer assume uobj is valid. If this function fails it
* destroys the uboject, including the attached HW object.
*/
-int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj,
- struct uverbs_attr_bundle *attrs)
+void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_file *ufile = attrs->ufile;
- int ret;
/* alloc_commit consumes the uobj kref */
- ret = uobj->uapi_object->type_class->alloc_commit(uobj);
- if (ret) {
- uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
- up_read(&ufile->hw_destroy_rwsem);
- return ret;
- }
+ uobj->uapi_object->type_class->alloc_commit(uobj);
/* kref is held so long as the uobj is on the uobj list. */
uverbs_uobject_get(uobj);
@@ -650,8 +640,6 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj,
/* Matches the down_read in rdma_alloc_begin_uobject */
up_read(&ufile->hw_destroy_rwsem);
-
- return 0;
}
/*
@@ -663,7 +651,6 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
{
struct ib_uverbs_file *ufile = uobj->ufile;
- uobj->object = NULL;
uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
/* Matches the down_read in rdma_alloc_begin_uobject */
@@ -681,7 +668,10 @@ static void lookup_put_fd_uobject(struct ib_uobject *uobj,
struct file *filp = uobj->object;
WARN_ON(mode != UVERBS_LOOKUP_READ);
- /* This indirectly calls uverbs_close_fd and free the object */
+ /*
+ * This indirectly calls uverbs_uobject_fd_release() and free the
+ * object
+ */
fput(filp);
}
@@ -744,33 +734,32 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
.lookup_put = lookup_put_idr_uobject,
.destroy_hw = destroy_hw_idr_uobject,
.remove_handle = remove_handle_idr_uobject,
- /*
- * When we destroy an object, we first just lock it for WRITE and
- * actually DESTROY it in the finalize stage. So, the problematic
- * scenario is when we just started the finalize stage of the
- * destruction (nothing was executed yet). Now, the other thread
- * fetched the object for READ access, but it didn't lock it yet.
- * The DESTROY thread continues and starts destroying the object.
- * When the other thread continue - without the RCU, it would
- * access freed memory. However, the rcu_read_lock delays the free
- * until the rcu_read_lock of the READ operation quits. Since the
- * exclusive lock of the object is still taken by the DESTROY flow, the
- * READ operation will get -EBUSY and it'll just bail out.
- */
- .needs_kfree_rcu = true,
};
EXPORT_SYMBOL(uverbs_idr_class);
-void uverbs_close_fd(struct file *f)
+/*
+ * Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct
+ * file_operations release method.
+ */
+int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
{
- struct ib_uobject *uobj = f->private_data;
- struct ib_uverbs_file *ufile = uobj->ufile;
- struct uverbs_attr_bundle attrs = {
- .context = uobj->context,
- .ufile = ufile,
- };
+ struct ib_uverbs_file *ufile;
+ struct ib_uobject *uobj;
+
+ /*
+ * This can only happen if the fput came from alloc_abort_fd_uobject()
+ */
+ if (!filp->private_data)
+ return 0;
+ uobj = filp->private_data;
+ ufile = uobj->ufile;
if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
+ struct uverbs_attr_bundle attrs = {
+ .context = uobj->context,
+ .ufile = ufile,
+ };
+
/*
* lookup_get_fd_uobject holds the kref on the struct file any
* time a FD uobj is locked, which prevents this release
@@ -782,13 +771,14 @@ void uverbs_close_fd(struct file *f)
up_read(&ufile->hw_destroy_rwsem);
}
- /* Matches the get in alloc_begin_fd_uobject */
+ /* Matches the get in alloc_commit_fd_uobject() */
kref_put(&ufile->ref, ib_uverbs_release_file);
/* Pairs with filp->private_data in alloc_begin_fd_uobject */
uverbs_uobject_put(uobj);
+ return 0;
}
-EXPORT_SYMBOL(uverbs_close_fd);
+EXPORT_SYMBOL(uverbs_uobject_fd_release);
/*
* Drop the ucontext off the ufile and completely disconnect it from the
@@ -817,6 +807,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
rdma_restrack_del(&ucontext->res);
ib_dev->ops.dealloc_ucontext(ucontext);
+ WARN_ON(!xa_empty(&ucontext->mmap_xa));
kfree(ucontext);
ufile->ucontext = NULL;
@@ -854,9 +845,7 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
}
/*
- * Destroy the uncontext and every uobject associated with it. If called with
- * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has
- * been completed and ufile->ucontext is NULL.
+ * Destroy the uncontext and every uobject associated with it.
*
* This is internally locked and can be called in parallel from multiple
* contexts.
@@ -864,22 +853,6 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
enum rdma_remove_reason reason)
{
- if (reason == RDMA_REMOVE_CLOSE) {
- /*
- * During destruction we might trigger something that
- * synchronously calls release on any file descriptor. For
- * this reason all paths that come from file_operations
- * release must use try_lock. They can progress knowing that
- * there is an ongoing uverbs_destroy_ufile_hw that will clean
- * up the driver resources.
- */
- if (!mutex_trylock(&ufile->ucontext_lock))
- return;
-
- } else {
- mutex_lock(&ufile->ucontext_lock);
- }
-
down_write(&ufile->hw_destroy_rwsem);
/*
@@ -908,7 +881,6 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
done:
up_write(&ufile->hw_destroy_rwsem);
- mutex_unlock(&ufile->ucontext_lock);
}
const struct uverbs_obj_type_class uverbs_fd_class = {
@@ -919,7 +891,6 @@ const struct uverbs_obj_type_class uverbs_fd_class = {
.lookup_put = lookup_put_fd_uobject,
.destroy_hw = destroy_hw_fd_uobject,
.remove_handle = remove_handle_fd_uobject,
- .needs_kfree_rcu = false,
};
EXPORT_SYMBOL(uverbs_fd_class);
@@ -942,19 +913,17 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
return rdma_lookup_get_uobject(obj, attrs->ufile, id,
UVERBS_LOOKUP_WRITE, attrs);
case UVERBS_ACCESS_NEW:
- return rdma_alloc_begin_uobject(obj, attrs->ufile, attrs);
+ return rdma_alloc_begin_uobject(obj, attrs);
default:
WARN_ON(true);
return ERR_PTR(-EOPNOTSUPP);
}
}
-int uverbs_finalize_object(struct ib_uobject *uobj,
- enum uverbs_obj_access access, bool commit,
- struct uverbs_attr_bundle *attrs)
+void uverbs_finalize_object(struct ib_uobject *uobj,
+ enum uverbs_obj_access access, bool commit,
+ struct uverbs_attr_bundle *attrs)
{
- int ret = 0;
-
/*
* refcounts should be handled at the object level and not at the
* uobject level. Refcounts of the objects themselves are done in
@@ -974,14 +943,11 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
break;
case UVERBS_ACCESS_NEW:
if (commit)
- ret = rdma_alloc_commit_uobject(uobj, attrs);
+ rdma_alloc_commit_uobject(uobj, attrs);
else
rdma_alloc_abort_uobject(uobj, attrs);
break;
default:
WARN_ON(true);
- ret = -EOPNOTSUPP;
}
-
- return ret;
}
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index e63fbda25e1d..33978e0f1262 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -51,29 +51,6 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs);
/*
- * uverbs_uobject_get is called in order to increase the reference count on
- * an uobject. This is useful when a handler wants to keep the uobject's memory
- * alive, regardless if this uobject is still alive in the context's objects
- * repository. Objects are put via uverbs_uobject_put.
- */
-void uverbs_uobject_get(struct ib_uobject *uobject);
-
-/*
- * In order to indicate we no longer needs this uobject, uverbs_uobject_put
- * is called. When the reference count is decreased, the uobject is freed.
- * For example, this is used when attaching a completion channel to a CQ.
- */
-void uverbs_uobject_put(struct ib_uobject *uobject);
-
-/* Indicate this fd is no longer used by this consumer, but its memory isn't
- * necessarily released yet. When the last reference is put, we release the
- * memory. After this call is executed, calling uverbs_uobject_get isn't
- * allowed.
- * This must be called from the release file_operations of the file!
- */
-void uverbs_close_fd(struct file *f);
-
-/*
* Get an ib_uobject that corresponds to the given id from ufile, assuming
* the object is from the given type. Lock it to the required access when
* applicable.
@@ -86,24 +63,9 @@ struct ib_uobject *
uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
s64 id, struct uverbs_attr_bundle *attrs);
-/*
- * Note that certain finalize stages could return a status:
- * (a) alloc_commit could return a failure if the object is committed at the
- * same time when the context is destroyed.
- * (b) remove_commit could fail if the object wasn't destroyed successfully.
- * Since multiple objects could be finalized in one transaction, it is very NOT
- * recommended to have several finalize actions which have side effects.
- * For example, it's NOT recommended to have a certain action which has both
- * a commit action and a destroy action or two destroy objects in the same
- * action. The rule of thumb is to have one destroy or commit action with
- * multiple lookups.
- * The first non zero return value of finalize_object is returned from this
- * function. For example, this could happen when we couldn't destroy an
- * object.
- */
-int uverbs_finalize_object(struct ib_uobject *uobj,
- enum uverbs_obj_access access, bool commit,
- struct uverbs_attr_bundle *attrs);
+void uverbs_finalize_object(struct ib_uobject *uobj,
+ enum uverbs_obj_access access, bool commit,
+ struct uverbs_attr_bundle *attrs);
int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx);
@@ -189,6 +151,7 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
unsigned int num_attrs);
void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
+extern const struct uapi_definition uverbs_def_obj_async_fd[];
extern const struct uapi_definition uverbs_def_obj_counters[];
extern const struct uapi_definition uverbs_def_obj_cq[];
extern const struct uapi_definition uverbs_def_obj_device[];
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index bddff426ee0f..62fbb0ae9cb4 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -107,10 +107,8 @@ void rdma_restrack_clean(struct ib_device *dev)
* rdma_restrack_count() - the current usage of specific object
* @dev: IB device
* @type: actual type of object to operate
- * @ns: PID namespace
*/
-int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
- struct pid_namespace *ns)
+int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type)
{
struct rdma_restrack_root *rt = &dev->res[type];
struct rdma_restrack_entry *e;
@@ -118,12 +116,8 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
u32 cnt = 0;
xa_lock(&rt->xa);
- xas_for_each(&xas, e, U32_MAX) {
- if (ns == &init_pid_ns ||
- (!rdma_is_kernel_res(e) &&
- ns == task_active_pid_ns(e->task)))
- cnt++;
- }
+ xas_for_each(&xas, e, U32_MAX)
+ cnt++;
xa_unlock(&rt->xa);
return cnt;
}
@@ -349,16 +343,3 @@ out:
}
}
EXPORT_SYMBOL(rdma_restrack_del);
-
-bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res)
-{
- /*
- * 1. Kern resources should be visible in init
- * namespace only
- * 2. Present only resources visible in the current
- * namespace
- */
- if (rdma_is_kernel_res(res))
- return task_active_pid_ns(current) == &init_pid_ns;
- return task_active_pid_ns(current) == task_active_pid_ns(res->task);
-}
diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h
index 7bd177cc0a61..d084e5f89849 100644
--- a/drivers/infiniband/core/restrack.h
+++ b/drivers/infiniband/core/restrack.h
@@ -27,5 +27,4 @@ int rdma_restrack_init(struct ib_device *dev);
void rdma_restrack_clean(struct ib_device *dev);
void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
struct task_struct *task);
-bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res);
#endif /* _RDMA_CORE_RESTRACK_H_ */
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index dce06108c8c3..4fad732f9b3c 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -20,14 +20,17 @@ module_param_named(force_mr, rdma_rw_force_mr, bool, 0);
MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations");
/*
- * Check if the device might use memory registration. This is currently only
- * true for iWarp devices. In the future we can hopefully fine tune this based
- * on HCA driver input.
+ * Report whether memory registration should be used. Memory registration must
+ * be used for iWarp devices because of iWARP-specific limitations. Memory
+ * registration is also enabled if registering memory might yield better
+ * performance than using multiple SGE entries, see rdma_rw_io_needs_mr()
*/
static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
{
if (rdma_protocol_iwarp(dev, port_num))
return true;
+ if (dev->attrs.max_sgl_rd)
+ return true;
if (unlikely(rdma_rw_force_mr))
return true;
return false;
@@ -35,17 +38,19 @@ static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
/*
* Check if the device will use memory registration for this RW operation.
- * We currently always use memory registrations for iWarp RDMA READs, and
- * have a debug option to force usage of MRs.
- *
- * XXX: In the future we can hopefully fine tune this based on HCA driver
- * input.
+ * For RDMA READs we must use MRs on iWarp and can optionally use them as an
+ * optimization otherwise. Additionally we have a debug option to force usage
+ * of MRs to help testing this code path.
*/
static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num,
enum dma_data_direction dir, int dma_nents)
{
- if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE)
- return true;
+ if (dir == DMA_FROM_DEVICE) {
+ if (rdma_protocol_iwarp(dev, port_num))
+ return true;
+ if (dev->attrs.max_sgl_rd && dma_nents > dev->attrs.max_sgl_rd)
+ return true;
+ }
if (unlikely(rdma_rw_force_mr))
return true;
return false;
@@ -583,8 +588,10 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
break;
}
- /* P2PDMA contexts do not need to be unmapped */
- if (!is_pci_p2pdma_page(sg_page(sg)))
+ if (is_pci_p2pdma_page(sg_page(sg)))
+ pci_p2pdma_unmap_sg(qp->pd->device->dma_device, sg,
+ sg_cnt, dir);
+ else
ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
}
EXPORT_SYMBOL(rdma_rw_ctx_destroy);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 7d8071c7e564..30d4c126a2db 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -860,7 +860,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
- return rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
+ return rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_mask);
}
static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
@@ -1068,7 +1068,7 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
}
settimeout_out:
- return skb->len;
+ return 0;
}
static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
@@ -1139,7 +1139,7 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
}
resp_out:
- return skb->len;
+ return 0;
}
static void free_sm_ah(struct kref *kref)
@@ -1246,7 +1246,7 @@ static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
* @port_num: Port on the specified device.
* @rec: path record entry to use for ah attributes initialization.
* @ah_attr: address handle attributes to initialization from path record.
- * @sgid_attr: SGID attribute to consider during initialization.
+ * @gid_attr: SGID attribute to consider during initialization.
*
* When ib_init_ah_attr_from_path() returns success,
* (a) for IB link layer it optionally contains a reference to SGID attribute
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 1ab423b19f77..2b4d80393bd0 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -339,22 +339,16 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp,
if (!new_pps)
return NULL;
- if (qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) {
- if (!qp_pps) {
- new_pps->main.port_num = qp_attr->port_num;
- new_pps->main.pkey_index = qp_attr->pkey_index;
- } else {
- new_pps->main.port_num = (qp_attr_mask & IB_QP_PORT) ?
- qp_attr->port_num :
- qp_pps->main.port_num;
-
- new_pps->main.pkey_index =
- (qp_attr_mask & IB_QP_PKEY_INDEX) ?
- qp_attr->pkey_index :
- qp_pps->main.pkey_index;
- }
+ if (qp_attr_mask & IB_QP_PORT)
+ new_pps->main.port_num =
+ (qp_pps) ? qp_pps->main.port_num : qp_attr->port_num;
+ if (qp_attr_mask & IB_QP_PKEY_INDEX)
+ new_pps->main.pkey_index = (qp_pps) ? qp_pps->main.pkey_index :
+ qp_attr->pkey_index;
+ if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT))
new_pps->main.state = IB_PORT_PKEY_VALID;
- } else if (qp_pps) {
+
+ if (!(qp_attr_mask & (IB_QP_PKEY_INDEX || IB_QP_PORT)) && qp_pps) {
new_pps->main.port_num = qp_pps->main.port_num;
new_pps->main.pkey_index = qp_pps->main.pkey_index;
if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)
@@ -426,7 +420,7 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
int ret;
rdma_for_each_port (dev, i) {
- is_ib = rdma_protocol_ib(dev, i++);
+ is_ib = rdma_protocol_ib(dev, i);
if (is_ib)
break;
}
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index b477295a96c2..087682e6969e 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -289,6 +289,24 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
ib_width_enum_to_int(attr.active_width), speed);
}
+static const char *phys_state_to_str(enum ib_port_phys_state phys_state)
+{
+ static const char * phys_state_str[] = {
+ "<unknown>",
+ "Sleep",
+ "Polling",
+ "Disabled",
+ "PortConfigurationTraining",
+ "LinkUp",
+ "LinkErrorRecovery",
+ "Phy Test",
+ };
+
+ if (phys_state < ARRAY_SIZE(phys_state_str))
+ return phys_state_str[phys_state];
+ return "<unknown>";
+}
+
static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
@@ -300,16 +318,8 @@ static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
if (ret)
return ret;
- switch (attr.phys_state) {
- case 1: return sprintf(buf, "1: Sleep\n");
- case 2: return sprintf(buf, "2: Polling\n");
- case 3: return sprintf(buf, "3: Disabled\n");
- case 4: return sprintf(buf, "4: PortConfigurationTraining\n");
- case 5: return sprintf(buf, "5: LinkUp\n");
- case 6: return sprintf(buf, "6: LinkErrorRecovery\n");
- case 7: return sprintf(buf, "7: Phy Test\n");
- default: return sprintf(buf, "%d: <unknown>\n", attr.phys_state);
- }
+ return sprintf(buf, "%d: %s\n", attr.phys_state,
+ phys_state_to_str(attr.phys_state));
}
static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused,
@@ -471,8 +481,8 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
if (!dev->ops.process_mad)
return -ENOSYS;
- in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
- out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+ out_mad = kzalloc(sizeof(*out_mad), GFP_KERNEL);
if (!in_mad || !out_mad) {
ret = -ENOMEM;
goto out;
@@ -487,10 +497,8 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
if (attr != IB_PMA_CLASS_PORT_INFO)
in_mad->data[41] = port_num; /* PortSelect field */
- if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY,
- port_num, NULL, NULL,
- (const struct ib_mad_hdr *)in_mad, mad_size,
- (struct ib_mad_hdr *)out_mad, &mad_size,
+ if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, port_num, NULL, NULL,
+ in_mad, out_mad, &mad_size,
&out_mad_pkey_index) &
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
@@ -1258,7 +1266,7 @@ static ssize_t node_desc_store(struct device *device,
int ret;
if (!dev->ops.modify_device)
- return -EIO;
+ return -EOPNOTSUPP;
memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX));
ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc);
diff --git a/drivers/infiniband/core/trace.c b/drivers/infiniband/core/trace.c
new file mode 100644
index 000000000000..6c3514beac4d
--- /dev/null
+++ b/drivers/infiniband/core/trace.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for core RDMA functions.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define CREATE_TRACE_POINTS
+
+#include <rdma/ib_verbs.h>
+
+#include <trace/events/rdma_core.h>
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 08da840ed7ee..06b6125b5ae1 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -54,10 +54,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
page = sg_page_iter_page(&sg_iter);
- if (umem->writable && dirty)
- put_user_pages_dirty_lock(&page, 1);
- else
- put_user_page(page);
+ unpin_user_pages_dirty_lock(&page, 1, umem->writable && dirty);
}
sg_free_table(&umem->sg_head);
@@ -169,10 +166,13 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
* for any address.
*/
mask |= (sg_dma_address(sg) + pgoff) ^ va;
- if (i && i != (umem->nmap - 1))
- /* restrict by length as well for interior SGEs */
- mask |= sg_dma_len(sg);
va += sg_dma_len(sg) - pgoff;
+ /* Except for the last entry, the ending iova alignment sets
+ * the maximum possible page size as the low bits of the iova
+ * must be zero when starting the next chunk.
+ */
+ if (i != (umem->nmap - 1))
+ mask |= va;
pgoff = 0;
}
best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap);
@@ -184,19 +184,14 @@ EXPORT_SYMBOL(ib_umem_find_best_pgsz);
/**
* ib_umem_get - Pin and DMA map userspace memory.
*
- * If access flags indicate ODP memory, avoid pinning. Instead, stores
- * the mm for future page fault handling in conjunction with MMU notifiers.
- *
- * @udata: userspace context to pin memory for
+ * @device: IB device to connect UMEM
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
- * @dmasync: flush in-flight DMA when the memory region is written
*/
-struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
- size_t size, int access, int dmasync)
+struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
+ size_t size, int access)
{
- struct ib_ucontext *context;
struct ib_umem *umem;
struct page **page_list;
unsigned long lock_limit;
@@ -205,21 +200,9 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
struct mm_struct *mm;
unsigned long npages;
int ret;
- unsigned long dma_attrs = 0;
struct scatterlist *sg;
unsigned int gup_flags = FOLL_WRITE;
- if (!udata)
- return ERR_PTR(-EIO);
-
- context = container_of(udata, struct uverbs_attr_bundle, driver_udata)
- ->context;
- if (!context)
- return ERR_PTR(-EIO);
-
- if (dmasync)
- dma_attrs |= DMA_ATTR_WRITE_BARRIER;
-
/*
* If the combination of the addr and size requested for this memory
* region causes an integer overflow, return error.
@@ -231,36 +214,19 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
if (!can_do_mlock())
return ERR_PTR(-EPERM);
- if (access & IB_ACCESS_ON_DEMAND) {
- umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
- if (!umem)
- return ERR_PTR(-ENOMEM);
- umem->is_odp = 1;
- } else {
- umem = kzalloc(sizeof(*umem), GFP_KERNEL);
- if (!umem)
- return ERR_PTR(-ENOMEM);
- }
+ if (access & IB_ACCESS_ON_DEMAND)
+ return ERR_PTR(-EOPNOTSUPP);
- umem->context = context;
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+ umem->ibdev = device;
umem->length = size;
umem->address = addr;
umem->writable = ib_access_writable(access);
umem->owning_mm = mm = current->mm;
mmgrab(mm);
- if (access & IB_ACCESS_ON_DEMAND) {
- if (WARN_ON_ONCE(!context->invalidate_range)) {
- ret = -EINVAL;
- goto umem_kfree;
- }
-
- ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
- if (ret)
- goto umem_kfree;
- return umem;
- }
-
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
ret = -ENOMEM;
@@ -294,34 +260,28 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
sg = umem->sg_head.sgl;
while (npages) {
- down_read(&mm->mmap_sem);
- ret = get_user_pages(cur_base,
- min_t(unsigned long, npages,
- PAGE_SIZE / sizeof (struct page *)),
- gup_flags | FOLL_LONGTERM,
- page_list, NULL);
- if (ret < 0) {
- up_read(&mm->mmap_sem);
+ ret = pin_user_pages_fast(cur_base,
+ min_t(unsigned long, npages,
+ PAGE_SIZE /
+ sizeof(struct page *)),
+ gup_flags | FOLL_LONGTERM, page_list);
+ if (ret < 0)
goto umem_release;
- }
cur_base += ret * PAGE_SIZE;
npages -= ret;
sg = ib_umem_add_sg_table(sg, page_list, ret,
- dma_get_max_seg_size(context->device->dma_device),
+ dma_get_max_seg_size(device->dma_device),
&umem->sg_nents);
-
- up_read(&mm->mmap_sem);
}
sg_mark_end(sg);
- umem->nmap = ib_dma_map_sg_attrs(context->device,
- umem->sg_head.sgl,
- umem->sg_nents,
- DMA_BIDIRECTIONAL,
- dma_attrs);
+ umem->nmap = ib_dma_map_sg(device,
+ umem->sg_head.sgl,
+ umem->sg_nents,
+ DMA_BIDIRECTIONAL);
if (!umem->nmap) {
ret = -ENOMEM;
@@ -332,7 +292,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
goto out;
umem_release:
- __ib_umem_release(context->device, umem, 0);
+ __ib_umem_release(device, umem, 0);
vma:
atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
out:
@@ -346,15 +306,6 @@ umem_kfree:
}
EXPORT_SYMBOL(ib_umem_get);
-static void __ib_umem_release_tail(struct ib_umem *umem)
-{
- mmdrop(umem->owning_mm);
- if (umem->is_odp)
- kfree(to_ib_umem_odp(umem));
- else
- kfree(umem);
-}
-
/**
* ib_umem_release - release memory pinned with ib_umem_get
* @umem: umem struct to release
@@ -363,30 +314,22 @@ void ib_umem_release(struct ib_umem *umem)
{
if (!umem)
return;
+ if (umem->is_odp)
+ return ib_umem_odp_release(to_ib_umem_odp(umem));
- if (umem->is_odp) {
- ib_umem_odp_release(to_ib_umem_odp(umem));
- __ib_umem_release_tail(umem);
- return;
- }
-
- __ib_umem_release(umem->context->device, umem, 1);
+ __ib_umem_release(umem->ibdev, umem, 1);
atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
- __ib_umem_release_tail(umem);
+ mmdrop(umem->owning_mm);
+ kfree(umem);
}
EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
- int i;
- int n;
+ int i, n = 0;
struct scatterlist *sg;
- if (umem->is_odp)
- return ib_umem_num_pages(umem);
-
- n = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
n += sg_dma_len(sg) >> PAGE_SHIFT;
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index c0e15db34680..b8c657b28380 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -39,417 +39,211 @@
#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/hugetlb.h>
-#include <linux/interval_tree_generic.h>
+#include <linux/interval_tree.h>
#include <linux/pagemap.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
-/*
- * The ib_umem list keeps track of memory regions for which the HW
- * device request to receive notification when the related memory
- * mapping is changed.
- *
- * ib_umem_lock protects the list.
- */
+#include "uverbs.h"
-static u64 node_start(struct umem_odp_node *n)
+static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
+ const struct mmu_interval_notifier_ops *ops)
{
- struct ib_umem_odp *umem_odp =
- container_of(n, struct ib_umem_odp, interval_tree);
-
- return ib_umem_start(umem_odp);
-}
-
-/* Note that the representation of the intervals in the interval tree
- * considers the ending point as contained in the interval, while the
- * function ib_umem_end returns the first address which is not contained
- * in the umem.
- */
-static u64 node_last(struct umem_odp_node *n)
-{
- struct ib_umem_odp *umem_odp =
- container_of(n, struct ib_umem_odp, interval_tree);
-
- return ib_umem_end(umem_odp) - 1;
-}
-
-INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
- node_start, node_last, static, rbt_ib_umem)
-
-static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
-{
- mutex_lock(&umem_odp->umem_mutex);
- if (umem_odp->notifiers_count++ == 0)
- /*
- * Initialize the completion object for waiting on
- * notifiers. Since notifier_count is zero, no one should be
- * waiting right now.
- */
- reinit_completion(&umem_odp->notifier_completion);
- mutex_unlock(&umem_odp->umem_mutex);
-}
-
-static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
-{
- mutex_lock(&umem_odp->umem_mutex);
- /*
- * This sequence increase will notify the QP page fault that the page
- * that is going to be mapped in the spte could have been freed.
- */
- ++umem_odp->notifiers_seq;
- if (--umem_odp->notifiers_count == 0)
- complete_all(&umem_odp->notifier_completion);
- mutex_unlock(&umem_odp->umem_mutex);
-}
-
-static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
- u64 start, u64 end, void *cookie)
-{
- /*
- * Increase the number of notifiers running, to
- * prevent any further fault handling on this MR.
- */
- ib_umem_notifier_start_account(umem_odp);
- complete_all(&umem_odp->notifier_completion);
- umem_odp->umem.context->invalidate_range(
- umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp));
- return 0;
-}
+ int ret;
-static void ib_umem_notifier_release(struct mmu_notifier *mn,
- struct mm_struct *mm)
-{
- struct ib_ucontext_per_mm *per_mm =
- container_of(mn, struct ib_ucontext_per_mm, mn);
-
- down_read(&per_mm->umem_rwsem);
- if (per_mm->active)
- rbt_ib_umem_for_each_in_range(
- &per_mm->umem_tree, 0, ULLONG_MAX,
- ib_umem_notifier_release_trampoline, true, NULL);
- up_read(&per_mm->umem_rwsem);
-}
+ umem_odp->umem.is_odp = 1;
+ mutex_init(&umem_odp->umem_mutex);
-static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
- u64 start, u64 end, void *cookie)
-{
- ib_umem_notifier_start_account(item);
- item->umem.context->invalidate_range(item, start, end);
- return 0;
-}
+ if (!umem_odp->is_implicit_odp) {
+ size_t page_size = 1UL << umem_odp->page_shift;
+ unsigned long start;
+ unsigned long end;
+ size_t pages;
+
+ start = ALIGN_DOWN(umem_odp->umem.address, page_size);
+ if (check_add_overflow(umem_odp->umem.address,
+ (unsigned long)umem_odp->umem.length,
+ &end))
+ return -EOVERFLOW;
+ end = ALIGN(end, page_size);
+ if (unlikely(end < page_size))
+ return -EOVERFLOW;
+
+ pages = (end - start) >> umem_odp->page_shift;
+ if (!pages)
+ return -EINVAL;
-static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
-{
- struct ib_ucontext_per_mm *per_mm =
- container_of(mn, struct ib_ucontext_per_mm, mn);
- int rc;
+ umem_odp->page_list = kvcalloc(
+ pages, sizeof(*umem_odp->page_list), GFP_KERNEL);
+ if (!umem_odp->page_list)
+ return -ENOMEM;
- if (mmu_notifier_range_blockable(range))
- down_read(&per_mm->umem_rwsem);
- else if (!down_read_trylock(&per_mm->umem_rwsem))
- return -EAGAIN;
+ umem_odp->dma_list = kvcalloc(
+ pages, sizeof(*umem_odp->dma_list), GFP_KERNEL);
+ if (!umem_odp->dma_list) {
+ ret = -ENOMEM;
+ goto out_page_list;
+ }
- if (!per_mm->active) {
- up_read(&per_mm->umem_rwsem);
- /*
- * At this point active is permanently set and visible to this
- * CPU without a lock, that fact is relied on to skip the unlock
- * in range_end.
- */
- return 0;
+ ret = mmu_interval_notifier_insert(&umem_odp->notifier,
+ umem_odp->umem.owning_mm,
+ start, end - start, ops);
+ if (ret)
+ goto out_dma_list;
}
- rc = rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start,
- range->end,
- invalidate_range_start_trampoline,
- mmu_notifier_range_blockable(range),
- NULL);
- if (rc)
- up_read(&per_mm->umem_rwsem);
- return rc;
-}
-
-static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start,
- u64 end, void *cookie)
-{
- ib_umem_notifier_end_account(item);
return 0;
-}
-
-static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
-{
- struct ib_ucontext_per_mm *per_mm =
- container_of(mn, struct ib_ucontext_per_mm, mn);
-
- if (unlikely(!per_mm->active))
- return;
-
- rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start,
- range->end,
- invalidate_range_end_trampoline, true, NULL);
- up_read(&per_mm->umem_rwsem);
-}
-static const struct mmu_notifier_ops ib_umem_notifiers = {
- .release = ib_umem_notifier_release,
- .invalidate_range_start = ib_umem_notifier_invalidate_range_start,
- .invalidate_range_end = ib_umem_notifier_invalidate_range_end,
-};
-
-static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
-{
- struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
-
- down_write(&per_mm->umem_rwsem);
- if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
- rbt_ib_umem_insert(&umem_odp->interval_tree,
- &per_mm->umem_tree);
- up_write(&per_mm->umem_rwsem);
-}
-
-static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
-{
- struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
-
- down_write(&per_mm->umem_rwsem);
- if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
- rbt_ib_umem_remove(&umem_odp->interval_tree,
- &per_mm->umem_tree);
- complete_all(&umem_odp->notifier_completion);
-
- up_write(&per_mm->umem_rwsem);
+out_dma_list:
+ kvfree(umem_odp->dma_list);
+out_page_list:
+ kvfree(umem_odp->page_list);
+ return ret;
}
-static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx,
- struct mm_struct *mm)
+/**
+ * ib_umem_odp_alloc_implicit - Allocate a parent implicit ODP umem
+ *
+ * Implicit ODP umems do not have a VA range and do not have any page lists.
+ * They exist only to hold the per_mm reference to help the driver create
+ * children umems.
+ *
+ * @device: IB device to create UMEM
+ * @access: ib_reg_mr access flags
+ */
+struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device,
+ int access)
{
- struct ib_ucontext_per_mm *per_mm;
+ struct ib_umem *umem;
+ struct ib_umem_odp *umem_odp;
int ret;
- per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL);
- if (!per_mm)
- return ERR_PTR(-ENOMEM);
-
- per_mm->context = ctx;
- per_mm->mm = mm;
- per_mm->umem_tree = RB_ROOT_CACHED;
- init_rwsem(&per_mm->umem_rwsem);
- per_mm->active = true;
+ if (access & IB_ACCESS_HUGETLB)
+ return ERR_PTR(-EINVAL);
- rcu_read_lock();
- per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
- rcu_read_unlock();
-
- WARN_ON(mm != current->mm);
+ umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL);
+ if (!umem_odp)
+ return ERR_PTR(-ENOMEM);
+ umem = &umem_odp->umem;
+ umem->ibdev = device;
+ umem->writable = ib_access_writable(access);
+ umem->owning_mm = current->mm;
+ umem_odp->is_implicit_odp = 1;
+ umem_odp->page_shift = PAGE_SHIFT;
- per_mm->mn.ops = &ib_umem_notifiers;
- ret = mmu_notifier_register(&per_mm->mn, per_mm->mm);
+ umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
+ ret = ib_init_umem_odp(umem_odp, NULL);
if (ret) {
- dev_err(&ctx->device->dev,
- "Failed to register mmu_notifier %d\n", ret);
- goto out_pid;
- }
-
- list_add(&per_mm->ucontext_list, &ctx->per_mm_list);
- return per_mm;
-
-out_pid:
- put_pid(per_mm->tgid);
- kfree(per_mm);
- return ERR_PTR(ret);
-}
-
-static int get_per_mm(struct ib_umem_odp *umem_odp)
-{
- struct ib_ucontext *ctx = umem_odp->umem.context;
- struct ib_ucontext_per_mm *per_mm;
-
- /*
- * Generally speaking we expect only one or two per_mm in this list,
- * so no reason to optimize this search today.
- */
- mutex_lock(&ctx->per_mm_list_lock);
- list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) {
- if (per_mm->mm == umem_odp->umem.owning_mm)
- goto found;
- }
-
- per_mm = alloc_per_mm(ctx, umem_odp->umem.owning_mm);
- if (IS_ERR(per_mm)) {
- mutex_unlock(&ctx->per_mm_list_lock);
- return PTR_ERR(per_mm);
+ put_pid(umem_odp->tgid);
+ kfree(umem_odp);
+ return ERR_PTR(ret);
}
-
-found:
- umem_odp->per_mm = per_mm;
- per_mm->odp_mrs_count++;
- mutex_unlock(&ctx->per_mm_list_lock);
-
- return 0;
+ return umem_odp;
}
+EXPORT_SYMBOL(ib_umem_odp_alloc_implicit);
-static void free_per_mm(struct rcu_head *rcu)
-{
- kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu));
-}
-
-static void put_per_mm(struct ib_umem_odp *umem_odp)
+/**
+ * ib_umem_odp_alloc_child - Allocate a child ODP umem under an implicit
+ * parent ODP umem
+ *
+ * @root: The parent umem enclosing the child. This must be allocated using
+ * ib_alloc_implicit_odp_umem()
+ * @addr: The starting userspace VA
+ * @size: The length of the userspace VA
+ */
+struct ib_umem_odp *
+ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr,
+ size_t size,
+ const struct mmu_interval_notifier_ops *ops)
{
- struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
- struct ib_ucontext *ctx = umem_odp->umem.context;
- bool need_free;
-
- mutex_lock(&ctx->per_mm_list_lock);
- umem_odp->per_mm = NULL;
- per_mm->odp_mrs_count--;
- need_free = per_mm->odp_mrs_count == 0;
- if (need_free)
- list_del(&per_mm->ucontext_list);
- mutex_unlock(&ctx->per_mm_list_lock);
-
- if (!need_free)
- return;
-
/*
- * NOTE! mmu_notifier_unregister() can happen between a start/end
- * callback, resulting in an start/end, and thus an unbalanced
- * lock. This doesn't really matter to us since we are about to kfree
- * the memory that holds the lock, however LOCKDEP doesn't like this.
+ * Caller must ensure that root cannot be freed during the call to
+ * ib_alloc_odp_umem.
*/
- down_write(&per_mm->umem_rwsem);
- per_mm->active = false;
- up_write(&per_mm->umem_rwsem);
-
- WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root));
- mmu_notifier_unregister_no_release(&per_mm->mn, per_mm->mm);
- put_pid(per_mm->tgid);
- mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm);
-}
-
-struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
- unsigned long addr, size_t size)
-{
- struct ib_ucontext_per_mm *per_mm = root->per_mm;
- struct ib_ucontext *ctx = per_mm->context;
struct ib_umem_odp *odp_data;
struct ib_umem *umem;
- int pages = size >> PAGE_SHIFT;
int ret;
+ if (WARN_ON(!root->is_implicit_odp))
+ return ERR_PTR(-EINVAL);
+
odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
if (!odp_data)
return ERR_PTR(-ENOMEM);
umem = &odp_data->umem;
- umem->context = ctx;
+ umem->ibdev = root->umem.ibdev;
umem->length = size;
umem->address = addr;
- odp_data->page_shift = PAGE_SHIFT;
umem->writable = root->umem.writable;
- umem->is_odp = 1;
- odp_data->per_mm = per_mm;
- umem->owning_mm = per_mm->mm;
- mmgrab(umem->owning_mm);
-
- mutex_init(&odp_data->umem_mutex);
- init_completion(&odp_data->notifier_completion);
-
- odp_data->page_list =
- vzalloc(array_size(pages, sizeof(*odp_data->page_list)));
- if (!odp_data->page_list) {
- ret = -ENOMEM;
- goto out_odp_data;
- }
+ umem->owning_mm = root->umem.owning_mm;
+ odp_data->page_shift = PAGE_SHIFT;
+ odp_data->notifier.ops = ops;
- odp_data->dma_list =
- vzalloc(array_size(pages, sizeof(*odp_data->dma_list)));
- if (!odp_data->dma_list) {
- ret = -ENOMEM;
- goto out_page_list;
+ odp_data->tgid = get_pid(root->tgid);
+ ret = ib_init_umem_odp(odp_data, ops);
+ if (ret) {
+ put_pid(odp_data->tgid);
+ kfree(odp_data);
+ return ERR_PTR(ret);
}
-
- /*
- * Caller must ensure that the umem_odp that the per_mm came from
- * cannot be freed during the call to ib_alloc_odp_umem.
- */
- mutex_lock(&ctx->per_mm_list_lock);
- per_mm->odp_mrs_count++;
- mutex_unlock(&ctx->per_mm_list_lock);
- add_umem_to_per_mm(odp_data);
-
return odp_data;
-
-out_page_list:
- vfree(odp_data->page_list);
-out_odp_data:
- mmdrop(umem->owning_mm);
- kfree(odp_data);
- return ERR_PTR(ret);
}
-EXPORT_SYMBOL(ib_alloc_odp_umem);
+EXPORT_SYMBOL(ib_umem_odp_alloc_child);
-int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
+/**
+ * ib_umem_odp_get - Create a umem_odp for a userspace va
+ *
+ * @device: IB device struct to get UMEM
+ * @addr: userspace virtual address to start at
+ * @size: length of region to pin
+ * @access: IB_ACCESS_xxx flags for memory being pinned
+ *
+ * The driver should use when the access flags indicate ODP memory. It avoids
+ * pinning, instead, stores the mm for future page fault handling in
+ * conjunction with MMU notifiers.
+ */
+struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device,
+ unsigned long addr, size_t size, int access,
+ const struct mmu_interval_notifier_ops *ops)
{
- struct ib_umem *umem = &umem_odp->umem;
- /*
- * NOTE: This must called in a process context where umem->owning_mm
- * == current->mm
- */
- struct mm_struct *mm = umem->owning_mm;
- int ret_val;
-
- umem_odp->page_shift = PAGE_SHIFT;
- if (access & IB_ACCESS_HUGETLB) {
- struct vm_area_struct *vma;
- struct hstate *h;
-
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, ib_umem_start(umem_odp));
- if (!vma || !is_vm_hugetlb_page(vma)) {
- up_read(&mm->mmap_sem);
- return -EINVAL;
- }
- h = hstate_vma(vma);
- umem_odp->page_shift = huge_page_shift(h);
- up_read(&mm->mmap_sem);
- }
-
- mutex_init(&umem_odp->umem_mutex);
-
- init_completion(&umem_odp->notifier_completion);
-
- if (ib_umem_odp_num_pages(umem_odp)) {
- umem_odp->page_list =
- vzalloc(array_size(sizeof(*umem_odp->page_list),
- ib_umem_odp_num_pages(umem_odp)));
- if (!umem_odp->page_list)
- return -ENOMEM;
+ struct ib_umem_odp *umem_odp;
+ struct mm_struct *mm;
+ int ret;
- umem_odp->dma_list =
- vzalloc(array_size(sizeof(*umem_odp->dma_list),
- ib_umem_odp_num_pages(umem_odp)));
- if (!umem_odp->dma_list) {
- ret_val = -ENOMEM;
- goto out_page_list;
- }
- }
+ if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND)))
+ return ERR_PTR(-EINVAL);
- ret_val = get_per_mm(umem_odp);
- if (ret_val)
- goto out_dma_list;
- add_umem_to_per_mm(umem_odp);
+ umem_odp = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
+ if (!umem_odp)
+ return ERR_PTR(-ENOMEM);
- return 0;
+ umem_odp->umem.ibdev = device;
+ umem_odp->umem.length = size;
+ umem_odp->umem.address = addr;
+ umem_odp->umem.writable = ib_access_writable(access);
+ umem_odp->umem.owning_mm = mm = current->mm;
+ umem_odp->notifier.ops = ops;
-out_dma_list:
- vfree(umem_odp->dma_list);
-out_page_list:
- vfree(umem_odp->page_list);
- return ret_val;
+ umem_odp->page_shift = PAGE_SHIFT;
+#ifdef CONFIG_HUGETLB_PAGE
+ if (access & IB_ACCESS_HUGETLB)
+ umem_odp->page_shift = HPAGE_SHIFT;
+#endif
+
+ umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
+ ret = ib_init_umem_odp(umem_odp, ops);
+ if (ret)
+ goto err_put_pid;
+ return umem_odp;
+
+err_put_pid:
+ put_pid(umem_odp->tgid);
+ kfree(umem_odp);
+ return ERR_PTR(ret);
}
+EXPORT_SYMBOL(ib_umem_odp_get);
void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
{
@@ -459,14 +253,19 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
* It is the driver's responsibility to ensure, before calling us,
* that the hardware will not attempt to access the MR any more.
*/
- ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
- ib_umem_end(umem_odp));
-
- remove_umem_from_per_mm(umem_odp);
- put_per_mm(umem_odp);
- vfree(umem_odp->dma_list);
- vfree(umem_odp->page_list);
+ if (!umem_odp->is_implicit_odp) {
+ mutex_lock(&umem_odp->umem_mutex);
+ ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
+ ib_umem_end(umem_odp));
+ mutex_unlock(&umem_odp->umem_mutex);
+ mmu_interval_notifier_remove(&umem_odp->notifier);
+ kvfree(umem_odp->dma_list);
+ kvfree(umem_odp->page_list);
+ put_pid(umem_odp->tgid);
+ }
+ kfree(umem_odp);
}
+EXPORT_SYMBOL(ib_umem_odp_release);
/*
* Map for DMA and insert a single page into the on-demand paging page tables.
@@ -482,29 +281,21 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
* The function returns -EFAULT if the DMA mapping operation fails. It returns
* -EAGAIN if a concurrent invalidation prevents us from updating the page.
*
- * The page is released via put_user_page even if the operation failed. For
- * on-demand pinning, the page is released whenever it isn't stored in the
- * umem.
+ * The page is released via put_page even if the operation failed. For on-demand
+ * pinning, the page is released whenever it isn't stored in the umem.
*/
static int ib_umem_odp_map_dma_single_page(
struct ib_umem_odp *umem_odp,
- int page_index,
+ unsigned int page_index,
struct page *page,
u64 access_mask,
unsigned long current_seq)
{
- struct ib_ucontext *context = umem_odp->umem.context;
- struct ib_device *dev = context->device;
+ struct ib_device *dev = umem_odp->umem.ibdev;
dma_addr_t dma_addr;
- int remove_existing_mapping = 0;
int ret = 0;
- /*
- * Note: we avoid writing if seq is different from the initial seq, to
- * handle case of a racing notifier. This check also allows us to bail
- * early if we have a notifier running in parallel with us.
- */
- if (ib_umem_mmu_notifier_retry(umem_odp, current_seq)) {
+ if (mmu_interval_check_retry(&umem_odp->notifier, current_seq)) {
ret = -EAGAIN;
goto out;
}
@@ -522,28 +313,29 @@ static int ib_umem_odp_map_dma_single_page(
} else if (umem_odp->page_list[page_index] == page) {
umem_odp->dma_list[page_index] |= access_mask;
} else {
- pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
- umem_odp->page_list[page_index], page);
- /* Better remove the mapping now, to prevent any further
- * damage. */
- remove_existing_mapping = 1;
- }
-
-out:
- put_user_page(page);
-
- if (remove_existing_mapping) {
- ib_umem_notifier_start_account(umem_odp);
- context->invalidate_range(
- umem_odp,
- ib_umem_start(umem_odp) +
- (page_index << umem_odp->page_shift),
- ib_umem_start(umem_odp) +
- ((page_index + 1) << umem_odp->page_shift));
- ib_umem_notifier_end_account(umem_odp);
+ /*
+ * This is a race here where we could have done:
+ *
+ * CPU0 CPU1
+ * get_user_pages()
+ * invalidate()
+ * page_fault()
+ * mutex_lock(umem_mutex)
+ * page from GUP != page in ODP
+ *
+ * It should be prevented by the retry test above as reading
+ * the seq number should be reliable under the
+ * umem_mutex. Thus something is really not working right if
+ * things get here.
+ */
+ WARN(true,
+ "Got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
+ umem_odp->page_list[page_index], page);
ret = -EAGAIN;
}
+out:
+ put_page(page);
return ret;
}
@@ -606,7 +398,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
* existing beyond the lifetime of the originating process.. Presumably
* mmget_not_zero will fail in this case.
*/
- owning_process = get_pid_task(umem_odp->per_mm->tgid, PIDTYPE_PID);
+ owning_process = get_pid_task(umem_odp->tgid, PIDTYPE_PID);
if (!owning_process || !mmget_not_zero(owning_mm)) {
ret = -EINVAL;
goto out_put_task;
@@ -620,7 +412,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
while (bcnt > 0) {
const size_t gup_num_pages = min_t(size_t,
- (bcnt + BIT(page_shift) - 1) >> page_shift,
+ ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
PAGE_SIZE / sizeof(struct page *));
down_read(&owning_mm->mmap_sem);
@@ -653,7 +445,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
ret = -EFAULT;
break;
}
- put_user_page(local_page_list[j]);
+ put_page(local_page_list[j]);
continue;
}
@@ -680,8 +472,8 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
* ib_umem_odp_map_dma_single_page().
*/
if (npages - (j + 1) > 0)
- put_user_pages(&local_page_list[j+1],
- npages - (j + 1));
+ release_pages(&local_page_list[j+1],
+ npages - (j + 1));
break;
}
}
@@ -707,7 +499,9 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
{
int idx;
u64 addr;
- struct ib_device *dev = umem_odp->umem.context->device;
+ struct ib_device *dev = umem_odp->umem.ibdev;
+
+ lockdep_assert_held(&umem_odp->umem_mutex);
virt = max_t(u64, virt, ib_umem_start(umem_odp));
bound = min_t(u64, bound, ib_umem_end(umem_odp));
@@ -716,7 +510,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
* faults from completion. We might be racing with other
* invalidations, so we must make sure we free each page only
* once. */
- mutex_lock(&umem_odp->umem_mutex);
for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
if (umem_odp->page_list[idx]) {
@@ -747,49 +540,5 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
umem_odp->npages--;
}
}
- mutex_unlock(&umem_odp->umem_mutex);
}
EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
-
-/* @last is not a part of the interval. See comment for function
- * node_last.
- */
-int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
- u64 start, u64 last,
- umem_call_back cb,
- bool blockable,
- void *cookie)
-{
- int ret_val = 0;
- struct umem_odp_node *node, *next;
- struct ib_umem_odp *umem;
-
- if (unlikely(start == last))
- return ret_val;
-
- for (node = rbt_ib_umem_iter_first(root, start, last - 1);
- node; node = next) {
- /* TODO move the blockable decision up to the callback */
- if (!blockable)
- return -EAGAIN;
- next = rbt_ib_umem_iter_next(node, start, last - 1);
- umem = container_of(node, struct ib_umem_odp, interval_tree);
- ret_val = cb(umem, start, last, cookie) || ret_val;
- }
-
- return ret_val;
-}
-EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
-
-struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
- u64 addr, u64 length)
-{
- struct umem_odp_node *node;
-
- node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
- if (node)
- return container_of(node, struct ib_umem_odp, interval_tree);
- return NULL;
-
-}
-EXPORT_SYMBOL(rbt_ib_umem_lookup);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index ffdeaf6e0b68..1235ffb2389b 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1042,7 +1042,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
ib_unregister_mad_agent(file->agent[i]);
mutex_unlock(&file->port->file_mutex);
-
+ mutex_destroy(&file->mutex);
kfree(file);
return 0;
}
@@ -1312,6 +1312,9 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
struct ib_umad_file *file;
int id;
+ cdev_device_del(&port->sm_cdev, &port->sm_dev);
+ cdev_device_del(&port->cdev, &port->dev);
+
mutex_lock(&port->file_mutex);
/* Mark ib_dev NULL and block ioctl or other file ops to progress
@@ -1331,8 +1334,6 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
mutex_unlock(&port->file_mutex);
- cdev_device_del(&port->sm_cdev, &port->sm_dev);
- cdev_device_del(&port->cdev, &port->dev);
ida_free(&umad_ida, port->dev_num);
/* balances device_initialize() */
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 1e5aeb39f774..7df71983212d 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -98,7 +98,7 @@ ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata,
struct ib_uverbs_device {
atomic_t refcount;
- int num_comp_vectors;
+ u32 num_comp_vectors;
struct completion comp;
struct device dev;
/* First group for device attributes, NULL terminated array */
@@ -111,7 +111,6 @@ struct ib_uverbs_device {
struct srcu_struct disassociate_srcu;
struct mutex lists_mutex; /* protect lists */
struct list_head uverbs_file_list;
- struct list_head uverbs_events_file_list;
struct uverbs_api *uapi;
};
@@ -124,10 +123,9 @@ struct ib_uverbs_event_queue {
};
struct ib_uverbs_async_event_file {
+ struct ib_uobject uobj;
struct ib_uverbs_event_queue ev_queue;
- struct ib_uverbs_file *uverbs_file;
- struct kref ref;
- struct list_head list;
+ struct ib_event_handler event_handler;
};
struct ib_uverbs_completion_event_file {
@@ -144,8 +142,7 @@ struct ib_uverbs_file {
* ucontext_lock held
*/
struct ib_ucontext *ucontext;
- struct ib_event_handler event_handler;
- struct ib_uverbs_async_event_file *async_file;
+ struct ib_uverbs_async_event_file *async_file;
struct list_head list;
/*
@@ -183,6 +180,7 @@ struct ib_uverbs_mcast_entry {
struct ib_uevent_object {
struct ib_uobject uobject;
+ /* List member for ib_uverbs_async_event_file list */
struct list_head event_list;
u32 events_reported;
};
@@ -210,25 +208,24 @@ struct ib_uwq_object {
};
struct ib_ucq_object {
- struct ib_uobject uobject;
+ struct ib_uevent_object uevent;
struct list_head comp_list;
- struct list_head async_list;
u32 comp_events_reported;
- u32 async_events_reported;
};
extern const struct file_operations uverbs_event_fops;
+extern const struct file_operations uverbs_async_event_fops;
void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
-struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev);
-void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
+void ib_uverbs_init_async_event_file(struct ib_uverbs_async_event_file *ev_file);
+void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue);
void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
-void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_completion_event_file *ev_file,
+int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs);
+int ib_init_ucontext(struct uverbs_attr_bundle *attrs);
+
+void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj);
-void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
- struct ib_uevent_object *uobj);
+void ib_uverbs_release_uevent(struct ib_uevent_object *uobj);
void ib_uverbs_release_file(struct kref *ref);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
@@ -236,8 +233,6 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
-void ib_uverbs_event_handler(struct ib_event_handler *handler,
- struct ib_event *event);
int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs);
@@ -276,23 +271,6 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
size_t kern_filter_sz,
union ib_flow_spec *ib_spec);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DEVICE);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_PD);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MR);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_CQ);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_QP);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_AH);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MW);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_SRQ);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_WQ);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS);
-
/*
* ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
* PortInfo CapabilityMask, but was extended with unique bits.
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 7ddd0e5bc6b3..025933752e1d 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -203,85 +203,55 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs)
#define ib_uverbs_lookup_comp_file(_fd, _ufile) \
_ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile)
-static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
+int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_file *file = attrs->ufile;
- struct ib_uverbs_get_context cmd;
- struct ib_uverbs_get_context_resp resp;
- struct ib_ucontext *ucontext;
- struct file *filp;
- struct ib_rdmacg_object cg_obj;
+ struct ib_uverbs_file *ufile = attrs->ufile;
+ struct ib_ucontext *ucontext;
struct ib_device *ib_dev;
- int ret;
-
- ret = uverbs_request(attrs, &cmd, sizeof(cmd));
- if (ret)
- return ret;
- mutex_lock(&file->ucontext_lock);
- ib_dev = srcu_dereference(file->device->ib_dev,
- &file->device->disassociate_srcu);
- if (!ib_dev) {
- ret = -EIO;
- goto err;
- }
-
- if (file->ucontext) {
- ret = -EINVAL;
- goto err;
- }
-
- ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
- if (ret)
- goto err;
+ ib_dev = srcu_dereference(ufile->device->ib_dev,
+ &ufile->device->disassociate_srcu);
+ if (!ib_dev)
+ return -EIO;
ucontext = rdma_zalloc_drv_obj(ib_dev, ib_ucontext);
- if (!ucontext) {
- ret = -ENOMEM;
- goto err_alloc;
- }
-
- attrs->context = ucontext;
+ if (!ucontext)
+ return -ENOMEM;
ucontext->res.type = RDMA_RESTRACK_CTX;
ucontext->device = ib_dev;
- ucontext->cg_obj = cg_obj;
- /* ufile is required when some objects are released */
- ucontext->ufile = file;
-
- ucontext->closing = false;
- ucontext->cleanup_retryable = false;
-
- mutex_init(&ucontext->per_mm_list_lock);
- INIT_LIST_HEAD(&ucontext->per_mm_list);
+ ucontext->ufile = ufile;
+ xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC);
+ attrs->context = ucontext;
+ return 0;
+}
- ret = get_unused_fd_flags(O_CLOEXEC);
- if (ret < 0)
- goto err_free;
- resp.async_fd = ret;
+int ib_init_ucontext(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_ucontext *ucontext = attrs->context;
+ struct ib_uverbs_file *file = attrs->ufile;
+ int ret;
- filp = ib_uverbs_alloc_async_event_file(file, ib_dev);
- if (IS_ERR(filp)) {
- ret = PTR_ERR(filp);
- goto err_fd;
+ if (!down_read_trylock(&file->hw_destroy_rwsem))
+ return -EIO;
+ mutex_lock(&file->ucontext_lock);
+ if (file->ucontext) {
+ ret = -EINVAL;
+ goto err;
}
- resp.num_comp_vectors = file->device->num_comp_vectors;
-
- ret = uverbs_response(attrs, &resp, sizeof(resp));
+ ret = ib_rdmacg_try_charge(&ucontext->cg_obj, ucontext->device,
+ RDMACG_RESOURCE_HCA_HANDLE);
if (ret)
- goto err_file;
+ goto err;
- ret = ib_dev->ops.alloc_ucontext(ucontext, &attrs->driver_udata);
+ ret = ucontext->device->ops.alloc_ucontext(ucontext,
+ &attrs->driver_udata);
if (ret)
- goto err_file;
- if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
- ucontext->invalidate_range = NULL;
+ goto err_uncharge;
rdma_restrack_uadd(&ucontext->res);
- fd_install(resp.async_fd, filp);
-
/*
* Make sure that ib_uverbs_get_ucontext() sees the pointer update
* only after all writes to setup the ucontext have completed
@@ -289,24 +259,62 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
smp_store_release(&file->ucontext, ucontext);
mutex_unlock(&file->ucontext_lock);
-
+ up_read(&file->hw_destroy_rwsem);
return 0;
-err_file:
- ib_uverbs_free_async_event_file(file);
- fput(filp);
+err_uncharge:
+ ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
+ RDMACG_RESOURCE_HCA_HANDLE);
+err:
+ mutex_unlock(&file->ucontext_lock);
+ up_read(&file->hw_destroy_rwsem);
+ return ret;
+}
-err_fd:
- put_unused_fd(resp.async_fd);
+static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_get_context_resp resp;
+ struct ib_uverbs_get_context cmd;
+ struct ib_device *ib_dev;
+ struct ib_uobject *uobj;
+ int ret;
-err_free:
- kfree(ucontext);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
-err_alloc:
- ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+ ret = ib_alloc_ucontext(attrs);
+ if (ret)
+ return ret;
-err:
- mutex_unlock(&file->ucontext_lock);
+ uobj = uobj_alloc(UVERBS_OBJECT_ASYNC_EVENT, attrs, &ib_dev);
+ if (IS_ERR(uobj)) {
+ ret = PTR_ERR(uobj);
+ goto err_ucontext;
+ }
+
+ resp = (struct ib_uverbs_get_context_resp){
+ .num_comp_vectors = attrs->ufile->device->num_comp_vectors,
+ .async_fd = uobj->id,
+ };
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
+ goto err_uobj;
+
+ ret = ib_init_ucontext(attrs);
+ if (ret)
+ goto err_uobj;
+
+ ib_uverbs_init_async_event_file(
+ container_of(uobj, struct ib_uverbs_async_event_file, uobj));
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
+
+err_uobj:
+ rdma_alloc_abort_uobject(uobj, attrs);
+err_ucontext:
+ kfree(attrs->context);
+ attrs->context = NULL;
return ret;
}
@@ -449,7 +457,8 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
if (ret)
goto err_copy;
- return uobj_alloc_commit(uobj, attrs);
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
err_copy:
ib_dealloc_pd_user(pd, uverbs_get_cleared_udata(attrs));
@@ -645,7 +654,8 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
mutex_unlock(&ibudev->xrcd_tree_mutex);
- return uobj_alloc_commit(&obj->uobject, attrs);
+ rdma_alloc_commit_uobject(&obj->uobject, attrs);
+ return 0;
err_copy:
if (inode) {
@@ -777,7 +787,8 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
uobj_put_obj_read(pd);
- return uobj_alloc_commit(uobj, attrs);
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
err_copy:
ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs));
@@ -931,7 +942,8 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
goto err_copy;
uobj_put_obj_read(pd);
- return uobj_alloc_commit(uobj, attrs);
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
err_copy:
uverbs_dealloc_mw(mw);
@@ -983,7 +995,8 @@ static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs)
return ret;
}
- return uobj_alloc_commit(uobj, attrs);
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
}
static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
@@ -1013,11 +1026,9 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
}
}
- obj->uobject.user_handle = cmd->user_handle;
- obj->comp_events_reported = 0;
- obj->async_events_reported = 0;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
INIT_LIST_HEAD(&obj->comp_list);
- INIT_LIST_HEAD(&obj->async_list);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
attr.cqe = cmd->cqe;
attr.comp_vector = cmd->comp_vector;
@@ -1029,7 +1040,7 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
goto err_file;
}
cq->device = ib_dev;
- cq->uobject = &obj->uobject;
+ cq->uobject = obj;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
@@ -1039,9 +1050,9 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
if (ret)
goto err_free;
- obj->uobject.object = cq;
+ obj->uevent.uobject.object = cq;
memset(&resp, 0, sizeof resp);
- resp.base.cq_handle = obj->uobject.id;
+ resp.base.cq_handle = obj->uevent.uobject.id;
resp.base.cqe = cq->cqe;
resp.response_length = uverbs_response_length(attrs, sizeof(resp));
@@ -1052,9 +1063,7 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
if (ret)
goto err_cb;
- ret = uobj_alloc_commit(&obj->uobject, attrs);
- if (ret)
- return ERR_PTR(ret);
+ rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
return obj;
err_cb:
@@ -1064,10 +1073,10 @@ err_free:
kfree(cq);
err_file:
if (ev_file)
- ib_uverbs_release_ucq(attrs->ufile, ev_file, obj);
+ ib_uverbs_release_ucq(ev_file, obj);
err:
- uobj_alloc_abort(&obj->uobject, attrs);
+ uobj_alloc_abort(&obj->uevent.uobject, attrs);
return ERR_PTR(ret);
}
@@ -1136,7 +1145,8 @@ static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
ret = uverbs_response(attrs, &resp, sizeof(resp));
out:
- uobj_put_obj_read(cq);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
@@ -1219,7 +1229,8 @@ static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
ret = uverbs_output_written(attrs, UVERBS_ATTR_CORE_OUT);
out_put:
- uobj_put_obj_read(cq);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
@@ -1240,8 +1251,8 @@ static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs)
ib_req_notify_cq(cq, cmd.solicited_only ?
IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
- uobj_put_obj_read(cq);
-
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return 0;
}
@@ -1261,10 +1272,10 @@ static int ib_uverbs_destroy_cq(struct uverbs_attr_bundle *attrs)
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- obj = container_of(uobj, struct ib_ucq_object, uobject);
+ obj = container_of(uobj, struct ib_ucq_object, uevent.uobject);
memset(&resp, 0, sizeof(resp));
resp.comp_events_reported = obj->comp_events_reported;
- resp.async_events_reported = obj->async_events_reported;
+ resp.async_events_reported = obj->uevent.events_reported;
uobj_put_destroy(uobj);
@@ -1378,7 +1389,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
}
attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = attrs->ufile;
attr.send_cq = scq;
attr.recv_cq = rcq;
attr.srq = srq;
@@ -1394,7 +1404,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
attr.cap.max_recv_sge = cmd->max_recv_sge;
attr.cap.max_inline_data = cmd->max_inline_data;
- obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
@@ -1424,7 +1433,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
qp = ib_create_qp(pd, &attr);
else
qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata,
- &obj->uevent.uobject);
+ obj);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
@@ -1442,7 +1451,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
qp->srq = attr.srq;
qp->rwq_ind_tbl = ind_tbl;
qp->event_handler = attr.event_handler;
- qp->qp_context = attr.qp_context;
qp->qp_type = attr.qp_type;
atomic_set(&qp->usecnt, 0);
atomic_inc(&pd->usecnt);
@@ -1457,7 +1465,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
atomic_inc(&ind_tbl->usecnt);
} else {
/* It is done in _ib_create_qp for other QP types */
- qp->uobject = &obj->uevent.uobject;
+ qp->uobject = obj;
}
obj->uevent.uobject.object = qp;
@@ -1486,15 +1494,19 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
if (pd)
uobj_put_obj_read(pd);
if (scq)
- uobj_put_obj_read(scq);
+ rdma_lookup_put_uobject(&scq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (rcq && rcq != scq)
- uobj_put_obj_read(rcq);
+ rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (srq)
- uobj_put_obj_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ind_tbl)
uobj_put_obj_read(ind_tbl);
- return uobj_alloc_commit(&obj->uevent.uobject, attrs);
+ rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
+ return 0;
err_cb:
ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs));
@@ -1504,11 +1516,14 @@ err_put:
if (pd)
uobj_put_obj_read(pd);
if (scq)
- uobj_put_obj_read(scq);
+ rdma_lookup_put_uobject(&scq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (rcq && rcq != scq)
- uobj_put_obj_read(rcq);
+ rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (srq)
- uobj_put_obj_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ind_tbl)
uobj_put_obj_read(ind_tbl);
@@ -1570,7 +1585,7 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
struct ib_xrcd *xrcd;
struct ib_uobject *uninitialized_var(xrcd_uobj);
struct ib_qp *qp;
- struct ib_qp_open_attr attr;
+ struct ib_qp_open_attr attr = {};
int ret;
struct ib_device *ib_dev;
@@ -1596,11 +1611,9 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
}
attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = attrs->ufile;
attr.qp_num = cmd.qpn;
attr.qp_type = cmd.qp_type;
- obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
@@ -1623,10 +1636,11 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
- qp->uobject = &obj->uevent.uobject;
+ qp->uobject = obj;
uobj_put_read(xrcd_uobj);
- return uobj_alloc_commit(&obj->uevent.uobject, attrs);
+ rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
+ return 0;
err_destroy:
ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs));
@@ -1687,7 +1701,8 @@ static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs)
ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ret)
goto out;
@@ -1924,7 +1939,8 @@ static int modify_qp(struct uverbs_attr_bundle *attrs,
&attrs->driver_udata);
release_qp:
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
out:
kfree(attr);
@@ -2188,7 +2204,8 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
ret = ret2;
out_put:
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
while (wr) {
if (is_ud && ud_wr(wr)->ah)
@@ -2330,7 +2347,8 @@ static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs)
resp.bad_wr = 0;
ret = qp->device->ops.post_recv(qp->real_qp, wr, &bad_wr);
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ret) {
for (next = wr; next; next = next->next) {
++resp.bad_wr;
@@ -2380,7 +2398,8 @@ static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs)
resp.bad_wr = 0;
ret = srq->device->ops.post_srq_recv(srq, wr, &bad_wr);
- uobj_put_obj_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ret)
for (next = wr; next; next = next->next) {
@@ -2468,7 +2487,8 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs)
goto err_copy;
uobj_put_obj_read(pd);
- return uobj_alloc_commit(uobj, attrs);
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
err_copy:
rdma_destroy_ah_user(ah, RDMA_DESTROY_AH_SLEEPABLE,
@@ -2510,7 +2530,7 @@ static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs)
if (!qp)
return -EINVAL;
- obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ obj = qp->uobject;
mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
@@ -2537,7 +2557,8 @@ static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs)
out_put:
mutex_unlock(&obj->mcast_lock);
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
@@ -2559,7 +2580,7 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
if (!qp)
return -EINVAL;
- obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ obj = qp->uobject;
mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
@@ -2580,7 +2601,8 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
out_put:
mutex_unlock(&obj->mcast_lock);
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
@@ -2723,12 +2745,6 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs,
return 0;
}
-static size_t kern_spec_filter_sz(const struct ib_uverbs_flow_spec_hdr *spec)
-{
- /* Returns user space filter size, includes padding */
- return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2;
-}
-
static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size,
u16 ib_real_filter_sz)
{
@@ -2872,11 +2888,16 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec)
{
- ssize_t kern_filter_sz;
+ size_t kern_filter_sz;
void *kern_spec_mask;
void *kern_spec_val;
- kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
+ if (check_sub_overflow((size_t)kern_spec->hdr.size,
+ sizeof(struct ib_uverbs_flow_spec_hdr),
+ &kern_filter_sz))
+ return -EINVAL;
+
+ kern_filter_sz /= 2;
kern_spec_val = (void *)kern_spec +
sizeof(struct ib_uverbs_flow_spec_hdr);
@@ -2946,7 +2967,6 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
wq_init_attr.create_flags = cmd.create_flags;
- obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata);
@@ -2955,7 +2975,7 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
goto err_put_cq;
}
- wq->uobject = &obj->uevent.uobject;
+ wq->uobject = obj;
obj->uevent.uobject.object = wq;
wq->wq_type = wq_init_attr.wq_type;
wq->cq = cq;
@@ -2965,7 +2985,7 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
atomic_set(&wq->usecnt, 0);
atomic_inc(&pd->usecnt);
atomic_inc(&cq->usecnt);
- wq->uobject = &obj->uevent.uobject;
+ wq->uobject = obj;
obj->uevent.uobject.object = wq;
memset(&resp, 0, sizeof(resp));
@@ -2979,13 +2999,16 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
goto err_copy;
uobj_put_obj_read(pd);
- uobj_put_obj_read(cq);
- return uobj_alloc_commit(&obj->uevent.uobject, attrs);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
+ return 0;
err_copy:
ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs));
err_put_cq:
- uobj_put_obj_read(cq);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
err_put_pd:
uobj_put_obj_read(pd);
err_uobj:
@@ -3051,7 +3074,8 @@ static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs)
}
ret = wq->device->ops.modify_wq(wq, &wq_attr, cmd.attr_mask,
&attrs->driver_udata);
- uobj_put_obj_read(wq);
+ rdma_lookup_put_uobject(&wq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
@@ -3152,9 +3176,11 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
kfree(wqs_handles);
for (j = 0; j < num_read_wqs; j++)
- uobj_put_obj_read(wqs[j]);
+ rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
- return uobj_alloc_commit(uobj, attrs);
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
err_copy:
ib_destroy_rwq_ind_table(rwq_ind_tbl);
@@ -3162,7 +3188,8 @@ err_uobj:
uobj_alloc_abort(uobj, attrs);
put_wqs:
for (j = 0; j < num_read_wqs; j++)
- uobj_put_obj_read(wqs[j]);
+ rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
err_free:
kfree(wqs_handles);
kfree(wqs);
@@ -3328,11 +3355,13 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
if (err)
goto err_copy;
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
kfree(flow_attr);
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
- return uobj_alloc_commit(uobj, attrs);
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
err_copy:
if (!qp->device->ops.destroy_flow(flow_id))
atomic_dec(&qp->usecnt);
@@ -3341,7 +3370,8 @@ err_free:
err_free_flow_attr:
kfree(flow_attr);
err_put:
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
err_uobj:
uobj_alloc_abort(uobj, attrs);
err_free_attr:
@@ -3426,7 +3456,6 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
attr.attr.max_sge = cmd->max_sge;
attr.attr.srq_limit = cmd->srq_limit;
- obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
srq = rdma_zalloc_drv_obj(ib_dev, ib_srq);
@@ -3438,7 +3467,7 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
srq->device = pd->device;
srq->pd = pd;
srq->srq_type = cmd->srq_type;
- srq->uobject = &obj->uevent.uobject;
+ srq->uobject = obj;
srq->event_handler = attr.event_handler;
srq->srq_context = attr.srq_context;
@@ -3477,14 +3506,17 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
uobj_put_read(xrcd_uobj);
if (ib_srq_has_cq(cmd->srq_type))
- uobj_put_obj_read(attr.ext.cq);
+ rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
uobj_put_obj_read(pd);
- return uobj_alloc_commit(&obj->uevent.uobject, attrs);
+ rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
+ return 0;
err_copy:
ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs));
-
+ /* It was released in ib_destroy_srq_user */
+ srq = NULL;
err_free:
kfree(srq);
err_put:
@@ -3492,7 +3524,8 @@ err_put:
err_put_cq:
if (ib_srq_has_cq(cmd->srq_type))
- uobj_put_obj_read(attr.ext.cq);
+ rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
err_put_xrcd:
if (cmd->srq_type == IB_SRQT_XRC) {
@@ -3560,7 +3593,8 @@ static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs)
ret = srq->device->ops.modify_srq(srq, &attr, cmd.attr_mask,
&attrs->driver_udata);
- uobj_put_obj_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
@@ -3583,7 +3617,8 @@ static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs)
ret = ib_query_srq(srq, &attr);
- uobj_put_obj_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ret)
return ret;
@@ -3708,8 +3743,8 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
ret = rdma_set_cq_moderation(cq, cmd.attr.cq_count, cmd.attr.cq_period);
- uobj_put_obj_read(cq);
-
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 61758201d9b2..538affbc517e 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -220,24 +220,17 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
return ret;
}
-static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
- struct uverbs_objs_arr_attr *attr,
- bool commit, struct uverbs_attr_bundle *attrs)
+static void uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
+ struct uverbs_objs_arr_attr *attr,
+ bool commit,
+ struct uverbs_attr_bundle *attrs)
{
const struct uverbs_attr_spec *spec = &attr_uapi->spec;
- int current_ret;
- int ret = 0;
size_t i;
- for (i = 0; i != attr->len; i++) {
- current_ret = uverbs_finalize_object(attr->uobjects[i],
- spec->u2.objs_arr.access,
- commit, attrs);
- if (!ret)
- ret = current_ret;
- }
-
- return ret;
+ for (i = 0; i != attr->len; i++)
+ uverbs_finalize_object(attr->uobjects[i],
+ spec->u2.objs_arr.access, commit, attrs);
}
static int uverbs_process_attr(struct bundle_priv *pbundle,
@@ -495,26 +488,22 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle,
return ret;
}
-static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
+static void bundle_destroy(struct bundle_priv *pbundle, bool commit)
{
unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len;
struct bundle_alloc_head *memblock;
unsigned int i;
- int ret = 0;
/* fast path for simple uobjects */
i = -1;
while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
i + 1)) < key_bitmap_len) {
struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
- int current_ret;
- current_ret = uverbs_finalize_object(
+ uverbs_finalize_object(
attr->obj_attr.uobject,
attr->obj_attr.attr_elm->spec.u.obj.access, commit,
&pbundle->bundle);
- if (!ret)
- ret = current_ret;
}
i = -1;
@@ -523,7 +512,6 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
const struct uverbs_api_attr *attr_uapi;
void __rcu **slot;
- int current_ret;
slot = uapi_get_attr_for_method(
pbundle,
@@ -534,11 +522,8 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
attr_uapi = rcu_dereference_protected(*slot, true);
if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
- current_ret = uverbs_free_idrs_array(
- attr_uapi, &attr->objs_arr_attr, commit,
- &pbundle->bundle);
- if (!ret)
- ret = current_ret;
+ uverbs_free_idrs_array(attr_uapi, &attr->objs_arr_attr,
+ commit, &pbundle->bundle);
}
}
@@ -548,8 +533,6 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
memblock = memblock->next;
kvfree(tmp);
}
-
- return ret;
}
static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
@@ -562,7 +545,6 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
struct bundle_priv *pbundle;
struct bundle_priv onstack;
void __rcu **slot;
- int destroy_ret;
int ret;
if (unlikely(hdr->driver_id != uapi->driver_id))
@@ -610,10 +592,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize));
ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
- destroy_ret = bundle_destroy(pbundle, ret == 0);
- if (unlikely(destroy_ret && !ret))
- return destroy_ret;
-
+ bundle_destroy(pbundle, ret == 0);
return ret;
}
@@ -795,6 +774,9 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
{
const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
if (size < attr->ptr_attr.len) {
if (clear_user(u64_to_user_ptr(attr->ptr_attr.data) + size,
attr->ptr_attr.len - size))
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 11c13c1381cf..2d4083bf4a04 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -120,20 +120,13 @@ static void ib_uverbs_release_dev(struct device *device)
uverbs_destroy_api(dev->uapi);
cleanup_srcu_struct(&dev->disassociate_srcu);
+ mutex_destroy(&dev->lists_mutex);
+ mutex_destroy(&dev->xrcd_tree_mutex);
kfree(dev);
}
-static void ib_uverbs_release_async_event_file(struct kref *ref)
-{
- struct ib_uverbs_async_event_file *file =
- container_of(ref, struct ib_uverbs_async_event_file, ref);
-
- kfree(file);
-}
-
-void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_completion_event_file *ev_file,
- struct ib_ucq_object *uobj)
+void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
+ struct ib_ucq_object *uobj)
{
struct ib_uverbs_event *evt, *tmp;
@@ -148,25 +141,24 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
uverbs_uobject_put(&ev_file->uobj);
}
- spin_lock_irq(&file->async_file->ev_queue.lock);
- list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
- list_del(&evt->list);
- kfree(evt);
- }
- spin_unlock_irq(&file->async_file->ev_queue.lock);
+ ib_uverbs_release_uevent(&uobj->uevent);
}
-void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
- struct ib_uevent_object *uobj)
+void ib_uverbs_release_uevent(struct ib_uevent_object *uobj)
{
+ struct ib_uverbs_async_event_file *async_file =
+ READ_ONCE(uobj->uobject.ufile->async_file);
struct ib_uverbs_event *evt, *tmp;
- spin_lock_irq(&file->async_file->ev_queue.lock);
+ if (!async_file)
+ return;
+
+ spin_lock_irq(&async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&file->async_file->ev_queue.lock);
+ spin_unlock_irq(&async_file->ev_queue.lock);
}
void ib_uverbs_detach_umcast(struct ib_qp *qp,
@@ -206,17 +198,17 @@ void ib_uverbs_release_file(struct kref *ref)
ib_uverbs_comp_dev(file->device);
if (file->async_file)
- kref_put(&file->async_file->ref,
- ib_uverbs_release_async_event_file);
+ uverbs_uobject_put(&file->async_file->uobj);
put_device(&file->device->dev);
if (file->disassociate_page)
__free_pages(file->disassociate_page, 0);
+ mutex_destroy(&file->umap_lock);
+ mutex_destroy(&file->ucontext_lock);
kfree(file);
}
static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
- struct ib_uverbs_file *uverbs_file,
struct file *filp, char __user *buf,
size_t count, loff_t *pos,
size_t eventsz)
@@ -234,19 +226,16 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
if (wait_event_interruptible(ev_queue->poll_wait,
(!list_empty(&ev_queue->event_list) ||
- /* The barriers built into wait_event_interruptible()
- * and wake_up() guarentee this will see the null set
- * without using RCU
- */
- !uverbs_file->device->ib_dev)))
+ ev_queue->is_closed)))
return -ERESTARTSYS;
+ spin_lock_irq(&ev_queue->lock);
+
/* If device was disassociated and no event exists set an error */
- if (list_empty(&ev_queue->event_list) &&
- !uverbs_file->device->ib_dev)
+ if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) {
+ spin_unlock_irq(&ev_queue->lock);
return -EIO;
-
- spin_lock_irq(&ev_queue->lock);
+ }
}
event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
@@ -281,8 +270,7 @@ static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
{
struct ib_uverbs_async_event_file *file = filp->private_data;
- return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp,
- buf, count, pos,
+ return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos,
sizeof(struct ib_uverbs_async_event_desc));
}
@@ -292,9 +280,8 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
struct ib_uverbs_completion_event_file *comp_ev_file =
filp->private_data;
- return ib_uverbs_event_read(&comp_ev_file->ev_queue,
- comp_ev_file->uobj.ufile, filp,
- buf, count, pos,
+ return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count,
+ pos,
sizeof(struct ib_uverbs_comp_event_desc));
}
@@ -317,7 +304,9 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
static __poll_t ib_uverbs_async_event_poll(struct file *filp,
struct poll_table_struct *wait)
{
- return ib_uverbs_event_poll(filp->private_data, filp, wait);
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+
+ return ib_uverbs_event_poll(&file->ev_queue, filp, wait);
}
static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
@@ -331,9 +320,9 @@ static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
{
- struct ib_uverbs_event_queue *ev_queue = filp->private_data;
+ struct ib_uverbs_async_event_file *file = filp->private_data;
- return fasync_helper(fd, filp, on, &ev_queue->async_queue);
+ return fasync_helper(fd, filp, on, &file->ev_queue.async_queue);
}
static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
@@ -344,70 +333,20 @@ static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
}
-static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
-{
- struct ib_uverbs_async_event_file *file = filp->private_data;
- struct ib_uverbs_file *uverbs_file = file->uverbs_file;
- struct ib_uverbs_event *entry, *tmp;
- int closed_already = 0;
-
- mutex_lock(&uverbs_file->device->lists_mutex);
- spin_lock_irq(&file->ev_queue.lock);
- closed_already = file->ev_queue.is_closed;
- file->ev_queue.is_closed = 1;
- list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
- if (entry->counter)
- list_del(&entry->obj_list);
- kfree(entry);
- }
- spin_unlock_irq(&file->ev_queue.lock);
- if (!closed_already) {
- list_del(&file->list);
- ib_unregister_event_handler(&uverbs_file->event_handler);
- }
- mutex_unlock(&uverbs_file->device->lists_mutex);
-
- kref_put(&uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&file->ref, ib_uverbs_release_async_event_file);
-
- return 0;
-}
-
-static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
-{
- struct ib_uobject *uobj = filp->private_data;
- struct ib_uverbs_completion_event_file *file = container_of(
- uobj, struct ib_uverbs_completion_event_file, uobj);
- struct ib_uverbs_event *entry, *tmp;
-
- spin_lock_irq(&file->ev_queue.lock);
- list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
- if (entry->counter)
- list_del(&entry->obj_list);
- kfree(entry);
- }
- file->ev_queue.is_closed = 1;
- spin_unlock_irq(&file->ev_queue.lock);
-
- uverbs_close_fd(filp);
-
- return 0;
-}
-
const struct file_operations uverbs_event_fops = {
.owner = THIS_MODULE,
.read = ib_uverbs_comp_event_read,
.poll = ib_uverbs_comp_event_poll,
- .release = ib_uverbs_comp_event_close,
+ .release = uverbs_uobject_fd_release,
.fasync = ib_uverbs_comp_event_fasync,
.llseek = no_llseek,
};
-static const struct file_operations uverbs_async_event_fops = {
+const struct file_operations uverbs_async_event_fops = {
.owner = THIS_MODULE,
.read = ib_uverbs_async_event_read,
.poll = ib_uverbs_async_event_poll,
- .release = ib_uverbs_async_event_close,
+ .release = uverbs_uobject_fd_release,
.fasync = ib_uverbs_async_event_fasync,
.llseek = no_llseek,
};
@@ -434,9 +373,9 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
return;
}
- uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
+ uobj = cq->uobject;
- entry->desc.comp.cq_handle = cq->uobject->user_handle;
+ entry->desc.comp.cq_handle = cq->uobject->uevent.uobject.user_handle;
entry->counter = &uobj->comp_events_reported;
list_add_tail(&entry->list, &ev_queue->event_list);
@@ -447,102 +386,82 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
}
-static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
- __u64 element, __u64 event,
- struct list_head *obj_list,
- u32 *counter)
+static void
+ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
+ __u64 element, __u64 event, struct list_head *obj_list,
+ u32 *counter)
{
struct ib_uverbs_event *entry;
unsigned long flags;
- spin_lock_irqsave(&file->async_file->ev_queue.lock, flags);
- if (file->async_file->ev_queue.is_closed) {
- spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
+ if (!async_file)
+ return;
+
+ spin_lock_irqsave(&async_file->ev_queue.lock, flags);
+ if (async_file->ev_queue.is_closed) {
+ spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
return;
}
entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
+ spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
return;
}
- entry->desc.async.element = element;
+ entry->desc.async.element = element;
entry->desc.async.event_type = event;
- entry->desc.async.reserved = 0;
- entry->counter = counter;
+ entry->desc.async.reserved = 0;
+ entry->counter = counter;
- list_add_tail(&entry->list, &file->async_file->ev_queue.event_list);
+ list_add_tail(&entry->list, &async_file->ev_queue.event_list);
if (obj_list)
list_add_tail(&entry->obj_list, obj_list);
- spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
+ spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
- wake_up_interruptible(&file->async_file->ev_queue.poll_wait);
- kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&async_file->ev_queue.poll_wait);
+ kill_fasync(&async_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
-void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
+static void uverbs_uobj_event(struct ib_uevent_object *eobj,
+ struct ib_event *event)
{
- struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
- struct ib_ucq_object, uobject);
+ ib_uverbs_async_handler(READ_ONCE(eobj->uobject.ufile->async_file),
+ eobj->uobject.user_handle, event->event,
+ &eobj->event_list, &eobj->events_reported);
+}
- ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle,
- event->event, &uobj->async_list,
- &uobj->async_events_reported);
+void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
+{
+ uverbs_uobj_event(&event->element.cq->uobject->uevent, event);
}
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
{
- struct ib_uevent_object *uobj;
-
/* for XRC target qp's, check that qp is live */
if (!event->element.qp->uobject)
return;
- uobj = container_of(event->element.qp->uobject,
- struct ib_uevent_object, uobject);
-
- ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
- event->event, &uobj->event_list,
- &uobj->events_reported);
+ uverbs_uobj_event(&event->element.qp->uobject->uevent, event);
}
void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
{
- struct ib_uevent_object *uobj = container_of(event->element.wq->uobject,
- struct ib_uevent_object, uobject);
-
- ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
- event->event, &uobj->event_list,
- &uobj->events_reported);
+ uverbs_uobj_event(&event->element.wq->uobject->uevent, event);
}
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
{
- struct ib_uevent_object *uobj;
-
- uobj = container_of(event->element.srq->uobject,
- struct ib_uevent_object, uobject);
-
- ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
- event->event, &uobj->event_list,
- &uobj->events_reported);
-}
-
-void ib_uverbs_event_handler(struct ib_event_handler *handler,
- struct ib_event *event)
-{
- struct ib_uverbs_file *file =
- container_of(handler, struct ib_uverbs_file, event_handler);
-
- ib_uverbs_async_handler(file, event->element.port_num, event->event,
- NULL, NULL);
+ uverbs_uobj_event(&event->element.srq->uobject->uevent, event);
}
-void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
+static void ib_uverbs_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
{
- kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file);
- file->async_file = NULL;
+ ib_uverbs_async_handler(
+ container_of(handler, struct ib_uverbs_async_event_file,
+ event_handler),
+ event->element.port_num, event->event, NULL, NULL);
}
void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
@@ -554,45 +473,26 @@ void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
ev_queue->async_queue = NULL;
}
-struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev)
+void ib_uverbs_init_async_event_file(
+ struct ib_uverbs_async_event_file *async_file)
{
- struct ib_uverbs_async_event_file *ev_file;
- struct file *filp;
-
- ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
- if (!ev_file)
- return ERR_PTR(-ENOMEM);
-
- ib_uverbs_init_event_queue(&ev_file->ev_queue);
- ev_file->uverbs_file = uverbs_file;
- kref_get(&ev_file->uverbs_file->ref);
- kref_init(&ev_file->ref);
- filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
- ev_file, O_RDONLY);
- if (IS_ERR(filp))
- goto err_put_refs;
-
- mutex_lock(&uverbs_file->device->lists_mutex);
- list_add_tail(&ev_file->list,
- &uverbs_file->device->uverbs_events_file_list);
- mutex_unlock(&uverbs_file->device->lists_mutex);
-
- WARN_ON(uverbs_file->async_file);
- uverbs_file->async_file = ev_file;
- kref_get(&uverbs_file->async_file->ref);
- INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
- ib_dev,
- ib_uverbs_event_handler);
- ib_register_event_handler(&uverbs_file->event_handler);
- /* At that point async file stuff was fully set */
+ struct ib_uverbs_file *uverbs_file = async_file->uobj.ufile;
+ struct ib_device *ib_dev = async_file->uobj.context->device;
+
+ ib_uverbs_init_event_queue(&async_file->ev_queue);
- return filp;
+ /* The first async_event_file becomes the default one for the file. */
+ mutex_lock(&uverbs_file->ucontext_lock);
+ if (!uverbs_file->async_file) {
+ /* Pairs with the put in ib_uverbs_release_file */
+ uverbs_uobject_get(&async_file->uobj);
+ smp_store_release(&uverbs_file->async_file, async_file);
+ }
+ mutex_unlock(&uverbs_file->ucontext_lock);
-err_put_refs:
- kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&ev_file->ref, ib_uverbs_release_async_event_file);
- return filp;
+ INIT_IB_EVENT_HANDLER(&async_file->event_handler, ib_dev,
+ ib_uverbs_event_handler);
+ ib_register_event_handler(&async_file->event_handler);
}
static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
@@ -768,6 +668,8 @@ out_unlock:
return (ret) ? : count;
}
+static const struct vm_operations_struct rdma_umap_ops;
+
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct ib_uverbs_file *file = filp->private_data;
@@ -781,7 +683,7 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
ret = PTR_ERR(ucontext);
goto out;
}
-
+ vma->vm_ops = &rdma_umap_ops;
ret = ucontext->device->ops.mmap(ucontext, vma);
out:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
@@ -789,38 +691,6 @@ out:
}
/*
- * Each time we map IO memory into user space this keeps track of the mapping.
- * When the device is hot-unplugged we 'zap' the mmaps in user space to point
- * to the zero page and allow the hot unplug to proceed.
- *
- * This is necessary for cases like PCI physical hot unplug as the actual BAR
- * memory may vanish after this and access to it from userspace could MCE.
- *
- * RDMA drivers supporting disassociation must have their user space designed
- * to cope in some way with their IO pages going to the zero page.
- */
-struct rdma_umap_priv {
- struct vm_area_struct *vma;
- struct list_head list;
-};
-
-static const struct vm_operations_struct rdma_umap_ops;
-
-static void rdma_umap_priv_init(struct rdma_umap_priv *priv,
- struct vm_area_struct *vma)
-{
- struct ib_uverbs_file *ufile = vma->vm_file->private_data;
-
- priv->vma = vma;
- vma->vm_private_data = priv;
- vma->vm_ops = &rdma_umap_ops;
-
- mutex_lock(&ufile->umap_lock);
- list_add(&priv->list, &ufile->umaps);
- mutex_unlock(&ufile->umap_lock);
-}
-
-/*
* The VMA has been dup'd, initialize the vm_private_data with a new tracking
* struct
*/
@@ -845,7 +715,7 @@ static void rdma_umap_open(struct vm_area_struct *vma)
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
goto out_unlock;
- rdma_umap_priv_init(priv, vma);
+ rdma_umap_priv_init(priv, vma, opriv->entry);
up_read(&ufile->hw_destroy_rwsem);
return;
@@ -876,6 +746,9 @@ static void rdma_umap_close(struct vm_area_struct *vma)
* this point.
*/
mutex_lock(&ufile->umap_lock);
+ if (priv->entry)
+ rdma_user_mmap_entry_put(priv->entry);
+
list_del(&priv->list);
mutex_unlock(&ufile->umap_lock);
kfree(priv);
@@ -927,44 +800,6 @@ static const struct vm_operations_struct rdma_umap_ops = {
.fault = rdma_umap_fault,
};
-/*
- * Map IO memory into a process. This is to be called by drivers as part of
- * their mmap() functions if they wish to send something like PCI-E BAR memory
- * to userspace.
- */
-int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
- unsigned long pfn, unsigned long size, pgprot_t prot)
-{
- struct ib_uverbs_file *ufile = ucontext->ufile;
- struct rdma_umap_priv *priv;
-
- if (!(vma->vm_flags & VM_SHARED))
- return -EINVAL;
-
- if (vma->vm_end - vma->vm_start != size)
- return -EINVAL;
-
- /* Driver is using this wrong, must be called by ib_uverbs_mmap */
- if (WARN_ON(!vma->vm_file ||
- vma->vm_file->private_data != ufile))
- return -EINVAL;
- lockdep_assert_held(&ufile->device->disassociate_srcu);
-
- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
-
- vma->vm_page_prot = prot;
- if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
- kfree(priv);
- return -EAGAIN;
- }
-
- rdma_umap_priv_init(priv, vma);
- return 0;
-}
-EXPORT_SYMBOL(rdma_user_mmap_io);
-
void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
{
struct rdma_umap_priv *priv, *next_priv;
@@ -1014,6 +849,11 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
zap_vma_ptes(vma, vma->vm_start,
vma->vm_end - vma->vm_start);
+
+ if (priv->entry) {
+ rdma_user_mmap_entry_put(priv->entry);
+ priv->entry = NULL;
+ }
}
mutex_unlock(&ufile->umap_lock);
skip_mm:
@@ -1135,7 +975,7 @@ static const struct file_operations uverbs_fops = {
.release = ib_uverbs_close,
.llseek = no_llseek,
.unlocked_ioctl = ib_uverbs_ioctl,
- .compat_ioctl = ib_uverbs_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static const struct file_operations uverbs_mmap_fops = {
@@ -1146,7 +986,7 @@ static const struct file_operations uverbs_mmap_fops = {
.release = ib_uverbs_close,
.llseek = no_llseek,
.unlocked_ioctl = ib_uverbs_ioctl,
- .compat_ioctl = ib_uverbs_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static int ib_uverbs_get_nl_info(struct ib_device *ibdev, void *client_data,
@@ -1281,7 +1121,6 @@ static void ib_uverbs_add_one(struct ib_device *device)
mutex_init(&uverbs_dev->xrcd_tree_mutex);
mutex_init(&uverbs_dev->lists_mutex);
INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
- INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
rcu_assign_pointer(uverbs_dev->ib_dev, device);
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
@@ -1326,14 +1165,9 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_device *ib_dev)
{
struct ib_uverbs_file *file;
- struct ib_uverbs_async_event_file *event_file;
- struct ib_event event;
/* Pending running commands to terminate */
uverbs_disassociate_api_pre(uverbs_dev);
- event.event = IB_EVENT_DEVICE_FATAL;
- event.element.port_num = 0;
- event.device = ib_dev;
mutex_lock(&uverbs_dev->lists_mutex);
while (!list_empty(&uverbs_dev->uverbs_file_list)) {
@@ -1349,31 +1183,14 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
*/
mutex_unlock(&uverbs_dev->lists_mutex);
- ib_uverbs_event_handler(&file->event_handler, &event);
+ ib_uverbs_async_handler(READ_ONCE(file->async_file), 0,
+ IB_EVENT_DEVICE_FATAL, NULL, NULL);
+
uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
kref_put(&file->ref, ib_uverbs_release_file);
mutex_lock(&uverbs_dev->lists_mutex);
}
-
- while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
- event_file = list_first_entry(&uverbs_dev->
- uverbs_events_file_list,
- struct ib_uverbs_async_event_file,
- list);
- spin_lock_irq(&event_file->ev_queue.lock);
- event_file->ev_queue.is_closed = 1;
- spin_unlock_irq(&event_file->ev_queue.lock);
-
- list_del(&event_file->list);
- ib_unregister_event_handler(
- &event_file->uverbs_file->event_handler);
- event_file->uverbs_file->event_handler.device =
- NULL;
-
- wake_up_interruptible(&event_file->ev_queue.poll_wait);
- kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
- }
mutex_unlock(&uverbs_dev->lists_mutex);
uverbs_disassociate_api(uverbs_dev->uapi);
@@ -1487,6 +1304,7 @@ static void __exit ib_uverbs_cleanup(void)
IB_UVERBS_NUM_FIXED_MINOR);
unregister_chrdev_region(dynamic_uverbs_dev,
IB_UVERBS_NUM_DYNAMIC_MINOR);
+ mmu_notifier_synchronize();
}
module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 35b2e2c640cc..3abfc63225cb 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -105,7 +105,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject,
if (uqp->uxrcd)
atomic_dec(&uqp->uxrcd->refcnt);
- ib_uverbs_release_uevent(attrs->ufile, &uqp->uevent);
+ ib_uverbs_release_uevent(&uqp->uevent);
return ret;
}
@@ -138,7 +138,7 @@ static int uverbs_free_wq(struct ib_uobject *uobject,
if (ib_is_destroy_retryable(ret, why, uobject))
return ret;
- ib_uverbs_release_uevent(attrs->ufile, &uwq->uevent);
+ ib_uverbs_release_uevent(&uwq->uevent);
return ret;
}
@@ -163,7 +163,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject,
atomic_dec(&us->uxrcd->refcnt);
}
- ib_uverbs_release_uevent(attrs->ufile, uevent);
+ ib_uverbs_release_uevent(uevent);
return ret;
}
@@ -202,24 +202,41 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
return 0;
}
-static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj,
- enum rdma_remove_reason why)
+void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue)
{
- struct ib_uverbs_completion_event_file *comp_event_file =
- container_of(uobj, struct ib_uverbs_completion_event_file,
- uobj);
- struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue;
+ struct ib_uverbs_event *entry, *tmp;
spin_lock_irq(&event_queue->lock);
+ /*
+ * The user must ensure that no new items are added to the event_list
+ * once is_closed is set.
+ */
event_queue->is_closed = 1;
spin_unlock_irq(&event_queue->lock);
+ wake_up_interruptible(&event_queue->poll_wait);
+ kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN);
- if (why == RDMA_REMOVE_DRIVER_REMOVE) {
- wake_up_interruptible(&event_queue->poll_wait);
- kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN);
+ spin_lock_irq(&event_queue->lock);
+ list_for_each_entry_safe(entry, tmp, &event_queue->event_list, list) {
+ if (entry->counter)
+ list_del(&entry->obj_list);
+ list_del(&entry->list);
+ kfree(entry);
}
+ spin_unlock_irq(&event_queue->lock);
+}
+
+static int
+uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct ib_uverbs_completion_event_file *file =
+ container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj);
+
+ ib_uverbs_free_event_queue(&file->ev_queue);
return 0;
-};
+}
int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs)
{
@@ -230,7 +247,7 @@ EXPORT_SYMBOL(uverbs_destroy_def_handler);
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_COMP_CHANNEL,
UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file),
- uverbs_hot_unplug_completion_event_file,
+ uverbs_completion_event_file_destroy_uobj,
&uverbs_event_fops,
"[infinibandevent]",
O_RDONLY));
diff --git a/drivers/infiniband/core/uverbs_std_types_async_fd.c b/drivers/infiniband/core/uverbs_std_types_async_fd.c
new file mode 100644
index 000000000000..82ec0806b34b
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_async_fd.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int UVERBS_HANDLER(UVERBS_METHOD_ASYNC_EVENT_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_METHOD_ASYNC_EVENT_ALLOC);
+
+ ib_uverbs_init_async_event_file(
+ container_of(uobj, struct ib_uverbs_async_event_file, uobj));
+ return 0;
+}
+
+static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct ib_uverbs_async_event_file *event_file =
+ container_of(uobj, struct ib_uverbs_async_event_file, uobj);
+
+ ib_unregister_event_handler(&event_file->event_handler);
+ ib_uverbs_free_event_queue(&event_file->ev_queue);
+ return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_ASYNC_EVENT_ALLOC,
+ UVERBS_ATTR_FD(UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_async_event_file),
+ uverbs_async_event_destroy_uobj,
+ &uverbs_async_event_fops,
+ "[infinibandevent]",
+ O_RDONLY),
+ &UVERBS_METHOD(UVERBS_METHOD_ASYNC_EVENT_ALLOC));
+
+const struct uapi_definition uverbs_def_obj_async_fd[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_ASYNC_EVENT),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index e39fe6a8aac4..da4110a0eea2 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -41,7 +41,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
struct ib_cq *cq = uobject->object;
struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
struct ib_ucq_object *ucq =
- container_of(uobject, struct ib_ucq_object, uobject);
+ container_of(uobject, struct ib_ucq_object, uevent.uobject);
int ret;
ret = ib_destroy_cq_user(cq, &attrs->driver_udata);
@@ -49,7 +49,6 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
return ret;
ib_uverbs_release_ucq(
- attrs->ufile,
ev_queue ? container_of(ev_queue,
struct ib_uverbs_completion_event_file,
ev_queue) :
@@ -63,7 +62,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
{
struct ib_ucq_object *obj = container_of(
uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE),
- typeof(*obj), uobject);
+ typeof(*obj), uevent.uobject);
struct ib_device *ib_dev = attrs->context->device;
int ret;
u64 user_handle;
@@ -106,10 +105,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
goto err_event_file;
}
- obj->comp_events_reported = 0;
- obj->async_events_reported = 0;
INIT_LIST_HEAD(&obj->comp_list);
- INIT_LIST_HEAD(&obj->async_list);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
cq = rdma_zalloc_drv_obj(ib_dev, ib_cq);
if (!cq) {
@@ -118,7 +115,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
}
cq->device = ib_dev;
- cq->uobject = &obj->uobject;
+ cq->uobject = obj;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
@@ -129,8 +126,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
if (ret)
goto err_free;
- obj->uobject.object = cq;
- obj->uobject.user_handle = user_handle;
+ obj->uevent.uobject.object = cq;
+ obj->uevent.uobject.user_handle = user_handle;
rdma_restrack_uadd(&cq->res);
ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
@@ -182,10 +179,10 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
struct ib_uobject *uobj =
uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
struct ib_ucq_object *obj =
- container_of(uobj, struct ib_ucq_object, uobject);
+ container_of(uobj, struct ib_ucq_object, uevent.uobject);
struct ib_uverbs_destroy_cq_resp resp = {
.comp_events_reported = obj->comp_events_reported,
- .async_events_reported = obj->async_events_reported
+ .async_events_reported = obj->uevent.events_reported
};
return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp,
diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
index 2a3f2f01028d..ae4a59d6f9b1 100644
--- a/drivers/infiniband/core/uverbs_std_types_device.c
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -200,6 +200,43 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)(
&resp, sizeof(resp));
}
+static int UVERBS_HANDLER(UVERBS_METHOD_GET_CONTEXT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ u32 num_comp = attrs->ufile->device->num_comp_vectors;
+ u64 core_support = IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS;
+ int ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
+ &num_comp, sizeof(num_comp));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
+ &core_support, sizeof(core_support));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ ret = ib_alloc_ucontext(attrs);
+ if (ret)
+ return ret;
+ ret = ib_init_ucontext(attrs);
+ if (ret) {
+ kfree(attrs->context);
+ attrs->context = NULL;
+ return ret;
+ }
+ return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_GET_CONTEXT,
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
+ UVERBS_ATTR_TYPE(u32), UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
+ UVERBS_ATTR_TYPE(u64), UA_OPTIONAL),
+ UVERBS_ATTR_UHW());
+
DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_INFO_HANDLES,
/* Also includes any device specific object ids */
@@ -220,6 +257,7 @@ DECLARE_UVERBS_NAMED_METHOD(
UA_MANDATORY));
DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE,
+ &UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT),
&UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE),
&UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES),
&UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT));
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 00c547887132..3f121ac31e0a 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -195,9 +195,9 @@ static int uapi_merge_obj_tree(struct uverbs_api *uapi,
* disassociation, and the FD types require the driver to use
* struct file_operations.owner to prevent the driver module
* code from unloading while the file is open. This provides
- * enough safety that uverbs_close_fd() will continue to work.
- * Drivers using FD are responsible to handle disassociation of
- * the device on their own.
+ * enough safety that uverbs_uobject_fd_release() will
+ * continue to work. Drivers using FD are responsible to
+ * handle disassociation of the device on their own.
*/
if (WARN_ON(is_driver &&
obj->type_attrs->type_class != &uverbs_idr_class &&
@@ -626,6 +626,7 @@ void uverbs_destroy_api(struct uverbs_api *uapi)
}
static const struct uapi_definition uverbs_core_api[] = {
+ UAPI_DEF_CHAIN(uverbs_def_obj_async_fd),
UAPI_DEF_CHAIN(uverbs_def_obj_counters),
UAPI_DEF_CHAIN(uverbs_def_obj_cq),
UAPI_DEF_CHAIN(uverbs_def_obj_device),
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 92349bf37589..3ebae3b65c28 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -52,6 +52,9 @@
#include <rdma/rw.h>
#include "core_priv.h"
+#include <trace/events/rdma_core.h>
+
+#include <trace/events/rdma_core.h>
static int ib_resolve_eth_dmac(struct ib_device *device,
struct rdma_ah_attr *ah_attr);
@@ -244,6 +247,8 @@ EXPORT_SYMBOL(rdma_port_get_link_layer);
/**
* ib_alloc_pd - Allocates an unused protection domain.
* @device: The device on which to allocate the protection domain.
+ * @flags: protection domain flags
+ * @caller: caller's build-time module name
*
* A protection domain object provides an association between QPs, shared
* receive queues, address handles, memory regions, and memory windows.
@@ -662,16 +667,17 @@ static bool find_gid_index(const union ib_gid *gid,
void *context)
{
struct find_gid_index_context *ctx = context;
+ u16 vlan_id = 0xffff;
+ int ret;
if (ctx->gid_type != gid_attr->gid_type)
return false;
- if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
- (is_vlan_dev(gid_attr->ndev) &&
- vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
+ ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
+ if (ret)
return false;
- return true;
+ return ctx->vlan_id == vlan_id;
}
static const struct ib_gid_attr *
@@ -1050,11 +1056,11 @@ static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
struct ib_qp *qp = context;
unsigned long flags;
- spin_lock_irqsave(&qp->device->event_handler_lock, flags);
+ spin_lock_irqsave(&qp->device->qp_open_list_lock, flags);
list_for_each_entry(event->element.qp, &qp->open_list, open_list)
if (event->element.qp->event_handler)
event->element.qp->event_handler(event, event->element.qp->qp_context);
- spin_unlock_irqrestore(&qp->device->event_handler_lock, flags);
+ spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags);
}
static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
@@ -1091,9 +1097,9 @@ static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
qp->qp_num = real_qp->qp_num;
qp->qp_type = real_qp->qp_type;
- spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+ spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags);
list_add(&qp->open_list, &real_qp->open_list);
- spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+ spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags);
return qp;
}
@@ -1821,9 +1827,9 @@ int ib_close_qp(struct ib_qp *qp)
if (real_qp == qp)
return -EINVAL;
- spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+ spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags);
list_del(&qp->open_list);
- spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+ spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags);
atomic_dec(&real_qp->usecnt);
if (qp->qp_sec)
@@ -1987,6 +1993,47 @@ EXPORT_SYMBOL(ib_resize_cq);
/* Memory regions */
+struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags)
+{
+ struct ib_mr *mr;
+
+ if (access_flags & IB_ACCESS_ON_DEMAND) {
+ if (!(pd->device->attrs.device_cap_flags &
+ IB_DEVICE_ON_DEMAND_PAGING)) {
+ pr_debug("ODP support not available\n");
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ mr = pd->device->ops.reg_user_mr(pd, start, length, virt_addr,
+ access_flags, NULL);
+
+ if (IS_ERR(mr))
+ return mr;
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->dm = NULL;
+ atomic_inc(&pd->usecnt);
+ mr->res.type = RDMA_RESTRACK_MR;
+ rdma_restrack_kadd(&mr->res);
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_reg_user_mr);
+
+int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
+ u32 flags, struct ib_sge *sg_list, u32 num_sge)
+{
+ if (!pd->device->ops.advise_mr)
+ return -EOPNOTSUPP;
+
+ return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge,
+ NULL);
+}
+EXPORT_SYMBOL(ib_advise_mr);
+
int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
{
struct ib_pd *pd = mr->pd;
@@ -1994,6 +2041,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
int ret;
+ trace_mr_dereg(mr);
rdma_restrack_del(&mr->res);
ret = mr->device->ops.dereg_mr(mr, udata);
if (!ret) {
@@ -2025,11 +2073,16 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
{
struct ib_mr *mr;
- if (!pd->device->ops.alloc_mr)
- return ERR_PTR(-EOPNOTSUPP);
+ if (!pd->device->ops.alloc_mr) {
+ mr = ERR_PTR(-EOPNOTSUPP);
+ goto out;
+ }
- if (WARN_ON_ONCE(mr_type == IB_MR_TYPE_INTEGRITY))
- return ERR_PTR(-EINVAL);
+ if (mr_type == IB_MR_TYPE_INTEGRITY) {
+ WARN_ON_ONCE(1);
+ mr = ERR_PTR(-EINVAL);
+ goto out;
+ }
mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata);
if (!IS_ERR(mr)) {
@@ -2045,6 +2098,8 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
mr->sig_attrs = NULL;
}
+out:
+ trace_mr_alloc(pd, mr_type, max_num_sg, mr);
return mr;
}
EXPORT_SYMBOL(ib_alloc_mr_user);
@@ -2069,21 +2124,27 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
struct ib_sig_attrs *sig_attrs;
if (!pd->device->ops.alloc_mr_integrity ||
- !pd->device->ops.map_mr_sg_pi)
- return ERR_PTR(-EOPNOTSUPP);
+ !pd->device->ops.map_mr_sg_pi) {
+ mr = ERR_PTR(-EOPNOTSUPP);
+ goto out;
+ }
- if (!max_num_meta_sg)
- return ERR_PTR(-EINVAL);
+ if (!max_num_meta_sg) {
+ mr = ERR_PTR(-EINVAL);
+ goto out;
+ }
sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL);
- if (!sig_attrs)
- return ERR_PTR(-ENOMEM);
+ if (!sig_attrs) {
+ mr = ERR_PTR(-ENOMEM);
+ goto out;
+ }
mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg,
max_num_meta_sg);
if (IS_ERR(mr)) {
kfree(sig_attrs);
- return mr;
+ goto out;
}
mr->device = pd->device;
@@ -2097,6 +2158,8 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
mr->type = IB_MR_TYPE_INTEGRITY;
mr->sig_attrs = sig_attrs;
+out:
+ trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr);
return mr;
}
EXPORT_SYMBOL(ib_alloc_mr_integrity);
@@ -2259,6 +2322,7 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
if (ret)
return ret;
}
+ mutex_destroy(&xrcd->tgt_qp_mutex);
return xrcd->device->ops.dealloc_xrcd(xrcd, udata);
}
@@ -2457,6 +2521,16 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
}
EXPORT_SYMBOL(ib_set_vf_guid);
+int ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_guid *node_guid,
+ struct ifla_vf_guid *port_guid)
+{
+ if (!device->ops.get_vf_guid)
+ return -EOPNOTSUPP;
+
+ return device->ops.get_vf_guid(device, vf, port, node_guid, port_guid);
+}
+EXPORT_SYMBOL(ib_get_vf_guid);
/**
* ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection
* information) and set an appropriate memory region for registration.
@@ -2730,6 +2804,7 @@ void ib_drain_sq(struct ib_qp *qp)
qp->device->ops.drain_sq(qp);
else
__ib_drain_sq(qp);
+ trace_cq_drain_complete(qp->send_cq);
}
EXPORT_SYMBOL(ib_drain_sq);
@@ -2758,6 +2833,7 @@ void ib_drain_rq(struct ib_qp *qp)
qp->device->ops.drain_rq(qp);
else
__ib_drain_rq(qp);
+ trace_cq_drain_complete(qp->recv_cq);
}
EXPORT_SYMBOL(ib_drain_rq);
OpenPOWER on IntegriCloud