diff options
Diffstat (limited to 'drivers/infiniband/core')
44 files changed, 3031 insertions, 3145 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 09881bd5f12d..d1b14887960e 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -11,7 +11,8 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ device.o fmr_pool.o cache.o netlink.o \ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o \ - nldev.o restrack.o counters.o + nldev.o restrack.o counters.o ib_core_uverbs.o \ + trace.o ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o @@ -20,7 +21,8 @@ ib_cm-y := cm.o iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o -rdma_cm-y := cma.o +CFLAGS_cma_trace.o += -I$(src) +rdma_cm-y := cma.o cma_trace.o rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o @@ -33,6 +35,7 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ uverbs_std_types_cq.o \ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \ uverbs_std_types_mr.o uverbs_std_types_counters.o \ - uverbs_uapi.o uverbs_std_types_device.o + uverbs_uapi.o uverbs_std_types_device.o \ + uverbs_std_types_async_fd.o ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 9b76a8fcdd24..1753a9801b70 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -139,7 +139,7 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb, if (ib_nl_is_good_ip_resp(nlh)) ib_nl_process_good_ip_rsep(nlh); - return skb->len; + return 0; } static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, @@ -183,7 +183,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); - rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL); + rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, GFP_KERNEL); /* Make the request retry, so when we get the response from userspace * we will have something. @@ -352,7 +352,7 @@ static bool has_gateway(const struct dst_entry *dst, sa_family_t family) if (family == AF_INET) { rt = container_of(dst, struct rtable, dst); - return rt->rt_gw_family == AF_INET; + return rt->rt_uses_gateway; } rt6 = container_of(dst, struct rt6_info, dst); @@ -421,16 +421,15 @@ static int addr6_resolve(struct sockaddr *src_sock, (const struct sockaddr_in6 *)dst_sock; struct flowi6 fl6; struct dst_entry *dst; - int ret; memset(&fl6, 0, sizeof fl6); fl6.daddr = dst_in->sin6_addr; fl6.saddr = src_in->sin6_addr; fl6.flowi6_oif = addr->bound_dev_if; - ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6); - if (ret < 0) - return ret; + dst = ipv6_stub->ipv6_dst_lookup_flow(addr->net, NULL, &fl6, NULL); + if (IS_ERR(dst)) + return PTR_ERR(dst); if (ipv6_addr_any(&src_in->sin6_addr)) src_in->sin6_addr = fl6.saddr; diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 18e476b3ced0..17bfedd24cc3 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -51,9 +51,8 @@ struct ib_pkey_cache { struct ib_update_work { struct work_struct work; - struct ib_device *device; - u8 port_num; - bool enforce_security; + struct ib_event event; + bool enforce_security; }; union ib_gid zgid; @@ -130,7 +129,7 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) event.element.port_num = port; event.event = IB_EVENT_GID_CHANGE; - ib_dispatch_event(&event); + ib_dispatch_event_clients(&event); } static const char * const gid_type_str[] = { @@ -810,6 +809,7 @@ static void release_gid_table(struct ib_device *device, if (leak) return; + mutex_destroy(&table->lock); kfree(table->data_vec); kfree(table); } @@ -818,22 +818,16 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table) { int i; - bool deleted = false; if (!table) return; mutex_lock(&table->lock); for (i = 0; i < table->sz; ++i) { - if (is_gid_entry_valid(table->data_vec[i])) { + if (is_gid_entry_valid(table->data_vec[i])) del_gid(ib_dev, port, table, i); - deleted = true; - } } mutex_unlock(&table->lock); - - if (deleted) - dispatch_gid_change_event(ib_dev, port); } void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, @@ -1039,7 +1033,7 @@ int ib_get_cached_pkey(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); cache = device->port_data[port_num].cache.pkey; @@ -1048,7 +1042,7 @@ int ib_get_cached_pkey(struct ib_device *device, else *pkey = cache->table[index]; - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return ret; } @@ -1063,9 +1057,9 @@ int ib_get_cached_subnet_prefix(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); *sn_pfx = device->port_data[port_num].cache.subnet_prefix; - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return 0; } @@ -1085,7 +1079,7 @@ int ib_find_cached_pkey(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); cache = device->port_data[port_num].cache.pkey; @@ -1106,7 +1100,7 @@ int ib_find_cached_pkey(struct ib_device *device, ret = 0; } - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return ret; } @@ -1125,7 +1119,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); cache = device->port_data[port_num].cache.pkey; @@ -1138,7 +1132,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device, break; } - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return ret; } @@ -1154,9 +1148,9 @@ int ib_get_cached_lmc(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); *lmc = device->port_data[port_num].cache.lmc; - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return ret; } @@ -1172,9 +1166,9 @@ int ib_get_cached_port_state(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); *port_state = device->port_data[port_num].cache.port_state; - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return ret; } @@ -1386,9 +1380,8 @@ err: return ret; } -static void ib_cache_update(struct ib_device *device, - u8 port, - bool enforce_security) +static int +ib_cache_update(struct ib_device *device, u8 port, bool enforce_security) { struct ib_port_attr *tprops = NULL; struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; @@ -1396,11 +1389,11 @@ static void ib_cache_update(struct ib_device *device, int ret; if (!rdma_is_port_valid(device, port)) - return; + return -EINVAL; tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) - return; + return -ENOMEM; ret = ib_query_port(device, port, tprops); if (ret) { @@ -1418,8 +1411,10 @@ static void ib_cache_update(struct ib_device *device, pkey_cache = kmalloc(struct_size(pkey_cache, table, tprops->pkey_tbl_len), GFP_KERNEL); - if (!pkey_cache) + if (!pkey_cache) { + ret = -ENOMEM; goto err; + } pkey_cache->table_len = tprops->pkey_tbl_len; @@ -1433,7 +1428,7 @@ static void ib_cache_update(struct ib_device *device, } } - write_lock_irq(&device->cache.lock); + write_lock_irq(&device->cache_lock); old_pkey_cache = device->port_data[port].cache.pkey; @@ -1442,7 +1437,7 @@ static void ib_cache_update(struct ib_device *device, device->port_data[port].cache.port_state = tprops->state; device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix; - write_unlock_irq(&device->cache.lock); + write_unlock_irq(&device->cache_lock); if (enforce_security) ib_security_cache_change(device, @@ -1451,57 +1446,91 @@ static void ib_cache_update(struct ib_device *device, kfree(old_pkey_cache); kfree(tprops); - return; + return 0; err: kfree(pkey_cache); kfree(tprops); + return ret; +} + +static void ib_cache_event_task(struct work_struct *_work) +{ + struct ib_update_work *work = + container_of(_work, struct ib_update_work, work); + int ret; + + /* Before distributing the cache update event, first sync + * the cache. + */ + ret = ib_cache_update(work->event.device, work->event.element.port_num, + work->enforce_security); + + /* GID event is notified already for individual GID entries by + * dispatch_gid_change_event(). Hence, notifiy for rest of the + * events. + */ + if (!ret && work->event.event != IB_EVENT_GID_CHANGE) + ib_dispatch_event_clients(&work->event); + + kfree(work); } -static void ib_cache_task(struct work_struct *_work) +static void ib_generic_event_task(struct work_struct *_work) { struct ib_update_work *work = container_of(_work, struct ib_update_work, work); - ib_cache_update(work->device, - work->port_num, - work->enforce_security); + ib_dispatch_event_clients(&work->event); kfree(work); } -static void ib_cache_event(struct ib_event_handler *handler, - struct ib_event *event) +static bool is_cache_update_event(const struct ib_event *event) +{ + return (event->event == IB_EVENT_PORT_ERR || + event->event == IB_EVENT_PORT_ACTIVE || + event->event == IB_EVENT_LID_CHANGE || + event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_CLIENT_REREGISTER || + event->event == IB_EVENT_GID_CHANGE); +} + +/** + * ib_dispatch_event - Dispatch an asynchronous event + * @event:Event to dispatch + * + * Low-level drivers must call ib_dispatch_event() to dispatch the + * event to all registered event handlers when an asynchronous event + * occurs. + */ +void ib_dispatch_event(const struct ib_event *event) { struct ib_update_work *work; - if (event->event == IB_EVENT_PORT_ERR || - event->event == IB_EVENT_PORT_ACTIVE || - event->event == IB_EVENT_LID_CHANGE || - event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_CLIENT_REREGISTER || - event->event == IB_EVENT_GID_CHANGE) { - work = kmalloc(sizeof *work, GFP_ATOMIC); - if (work) { - INIT_WORK(&work->work, ib_cache_task); - work->device = event->device; - work->port_num = event->element.port_num; - if (event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_GID_CHANGE) - work->enforce_security = true; - else - work->enforce_security = false; - - queue_work(ib_wq, &work->work); - } - } + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + if (is_cache_update_event(event)) + INIT_WORK(&work->work, ib_cache_event_task); + else + INIT_WORK(&work->work, ib_generic_event_task); + + work->event = *event; + if (event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_GID_CHANGE) + work->enforce_security = true; + + queue_work(ib_wq, &work->work); } +EXPORT_SYMBOL(ib_dispatch_event); int ib_cache_setup_one(struct ib_device *device) { unsigned int p; int err; - rwlock_init(&device->cache.lock); + rwlock_init(&device->cache_lock); err = gid_table_setup_one(device); if (err) @@ -1510,9 +1539,6 @@ int ib_cache_setup_one(struct ib_device *device) rdma_for_each_port (device, p) ib_cache_update(device, p, true); - INIT_IB_EVENT_HANDLER(&device->cache.event_handler, - device, ib_cache_event); - ib_register_event_handler(&device->cache.event_handler); return 0; } @@ -1534,14 +1560,12 @@ void ib_cache_release_one(struct ib_device *device) void ib_cache_cleanup_one(struct ib_device *device) { - /* The cleanup function unregisters the event handler, - * waits for all in-progress workqueue elements and cleans - * up the GID cache. This function should be called after - * the device was removed from the devices list and all - * clients were removed, so the cache exists but is + /* The cleanup function waits for all in-progress workqueue + * elements and cleans up the GID cache. This function should be + * called after the device was removed from the devices list and + * all clients were removed, so the cache exists but is * non-functional and shouldn't be updated anymore. */ - ib_unregister_event_handler(&device->cache.event_handler); flush_workqueue(ib_wq); gid_table_cleanup_one(device); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index da10e6ccb43c..68cc1b2d6824 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1,36 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2004-2007 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. */ #include <linux/completion.h> @@ -246,7 +220,7 @@ struct cm_work { }; struct cm_timewait_info { - struct cm_work work; /* Must be first. */ + struct cm_work work; struct list_head list; struct rb_node remote_qp_node; struct rb_node remote_id_node; @@ -263,10 +237,11 @@ struct cm_id_private { struct rb_node sidr_id_node; spinlock_t lock; /* Do not acquire inside cm.lock */ struct completion comp; - atomic_t refcount; + refcount_t refcount; /* Number of clients sharing this ib_cm_id. Only valid for listeners. * Protected by the cm.lock spinlock. */ int listen_sharecount; + struct rcu_head rcu; struct ib_mad_send_buf *msg; struct cm_timewait_info *timewait_info; @@ -308,7 +283,7 @@ static void cm_work_handler(struct work_struct *work); static inline void cm_deref_id(struct cm_id_private *cm_id_priv) { - if (atomic_dec_and_test(&cm_id_priv->refcount)) + if (refcount_dec_and_test(&cm_id_priv->refcount)) complete(&cm_id_priv->comp); } @@ -365,7 +340,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, m->ah = ah; m->retries = cm_id_priv->max_cm_retries; - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; *msg = m; @@ -619,28 +594,16 @@ static void cm_free_id(__be32 local_id) xa_erase_irq(&cm.local_id_table, cm_local_id(local_id)); } -static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) +static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; + rcu_read_lock(); cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id)); - if (cm_id_priv) { - if (cm_id_priv->id.remote_id == remote_id) - atomic_inc(&cm_id_priv->refcount); - else - cm_id_priv = NULL; - } - - return cm_id_priv; -} - -static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) -{ - struct cm_id_private *cm_id_priv; - - spin_lock_irq(&cm.lock); - cm_id_priv = cm_get_id(local_id, remote_id); - spin_unlock_irq(&cm.lock); + if (!cm_id_priv || cm_id_priv->id.remote_id != remote_id || + !refcount_inc_not_zero(&cm_id_priv->refcount)) + cm_id_priv = NULL; + rcu_read_unlock(); return cm_id_priv; } @@ -883,7 +846,7 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, INIT_LIST_HEAD(&cm_id_priv->prim_list); INIT_LIST_HEAD(&cm_id_priv->altr_list); atomic_set(&cm_id_priv->work_count, -1); - atomic_set(&cm_id_priv->refcount, 1); + refcount_set(&cm_id_priv->refcount, 1); return &cm_id_priv->id; error: @@ -1115,7 +1078,7 @@ retest: rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr); rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr); kfree(cm_id_priv->private_data); - kfree(cm_id_priv); + kfree_rcu(cm_id_priv, rcu); } void ib_destroy_cm_id(struct ib_cm_id *cm_id) @@ -1230,7 +1193,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, spin_unlock_irqrestore(&cm.lock, flags); return ERR_PTR(-EINVAL); } - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); ++cm_id_priv->listen_sharecount; spin_unlock_irqrestore(&cm.lock, flags); @@ -1288,54 +1251,72 @@ static void cm_format_req(struct cm_req_msg *req_msg, cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, cm_form_tid(cm_id_priv)); - req_msg->local_comm_id = cm_id_priv->id.local_id; - req_msg->service_id = param->service_id; - req_msg->local_ca_guid = cm_id_priv->id.device->node_guid; - cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); - cm_req_set_init_depth(req_msg, param->initiator_depth); - cm_req_set_remote_resp_timeout(req_msg, - param->remote_cm_response_timeout); + IBA_SET(CM_REQ_LOCAL_COMM_ID, req_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_REQ_SERVICE_ID, req_msg, be64_to_cpu(param->service_id)); + IBA_SET(CM_REQ_LOCAL_CA_GUID, req_msg, + be64_to_cpu(cm_id_priv->id.device->node_guid)); + IBA_SET(CM_REQ_LOCAL_QPN, req_msg, param->qp_num); + IBA_SET(CM_REQ_INITIATOR_DEPTH, req_msg, param->initiator_depth); + IBA_SET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg, + param->remote_cm_response_timeout); cm_req_set_qp_type(req_msg, param->qp_type); - cm_req_set_flow_ctrl(req_msg, param->flow_control); - cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn)); - cm_req_set_local_resp_timeout(req_msg, - param->local_cm_response_timeout); - req_msg->pkey = param->primary_path->pkey; - cm_req_set_path_mtu(req_msg, param->primary_path->mtu); - cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); + IBA_SET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg, param->flow_control); + IBA_SET(CM_REQ_STARTING_PSN, req_msg, param->starting_psn); + IBA_SET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg, + param->local_cm_response_timeout); + IBA_SET(CM_REQ_PARTITION_KEY, req_msg, + be16_to_cpu(param->primary_path->pkey)); + IBA_SET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg, + param->primary_path->mtu); + IBA_SET(CM_REQ_MAX_CM_RETRIES, req_msg, param->max_cm_retries); if (param->qp_type != IB_QPT_XRC_INI) { - cm_req_set_resp_res(req_msg, param->responder_resources); - cm_req_set_retry_count(req_msg, param->retry_count); - cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); - cm_req_set_srq(req_msg, param->srq); + IBA_SET(CM_REQ_RESPONDER_RESOURCES, req_msg, + param->responder_resources); + IBA_SET(CM_REQ_RETRY_COUNT, req_msg, param->retry_count); + IBA_SET(CM_REQ_RNR_RETRY_COUNT, req_msg, + param->rnr_retry_count); + IBA_SET(CM_REQ_SRQ, req_msg, param->srq); } - req_msg->primary_local_gid = pri_path->sgid; - req_msg->primary_remote_gid = pri_path->dgid; + *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg) = + pri_path->sgid; + *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg) = + pri_path->dgid; if (pri_ext) { - req_msg->primary_local_gid.global.interface_id - = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid)); - req_msg->primary_remote_gid.global.interface_id - = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid)); + IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg) + ->global.interface_id = + OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid)); + IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg) + ->global.interface_id = + OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid)); } if (pri_path->hop_limit <= 1) { - req_msg->primary_local_lid = pri_ext ? 0 : - htons(ntohl(sa_path_get_slid(pri_path))); - req_msg->primary_remote_lid = pri_ext ? 0 : - htons(ntohl(sa_path_get_dlid(pri_path))); + IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, + be16_to_cpu(pri_ext ? 0 : + htons(ntohl(sa_path_get_slid( + pri_path))))); + IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg, + be16_to_cpu(pri_ext ? 0 : + htons(ntohl(sa_path_get_dlid( + pri_path))))); } else { /* Work-around until there's a way to obtain remote LID info */ - req_msg->primary_local_lid = IB_LID_PERMISSIVE; - req_msg->primary_remote_lid = IB_LID_PERMISSIVE; + IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, + be16_to_cpu(IB_LID_PERMISSIVE)); + IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg, + be16_to_cpu(IB_LID_PERMISSIVE)); } - cm_req_set_primary_flow_label(req_msg, pri_path->flow_label); - cm_req_set_primary_packet_rate(req_msg, pri_path->rate); - req_msg->primary_traffic_class = pri_path->traffic_class; - req_msg->primary_hop_limit = pri_path->hop_limit; - cm_req_set_primary_sl(req_msg, pri_path->sl); - cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1)); - cm_req_set_primary_local_ack_timeout(req_msg, + IBA_SET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg, + be32_to_cpu(pri_path->flow_label)); + IBA_SET(CM_REQ_PRIMARY_PACKET_RATE, req_msg, pri_path->rate); + IBA_SET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg, pri_path->traffic_class); + IBA_SET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg, pri_path->hop_limit); + IBA_SET(CM_REQ_PRIMARY_SL, req_msg, pri_path->sl); + IBA_SET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg, + (pri_path->hop_limit <= 1)); + IBA_SET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, pri_path->packet_life_time)); @@ -1346,38 +1327,55 @@ static void cm_format_req(struct cm_req_msg *req_msg, alt_ext = opa_is_extended_lid(alt_path->opa.dlid, alt_path->opa.slid); - req_msg->alt_local_gid = alt_path->sgid; - req_msg->alt_remote_gid = alt_path->dgid; + *IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg) = + alt_path->sgid; + *IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg) = + alt_path->dgid; if (alt_ext) { - req_msg->alt_local_gid.global.interface_id - = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid)); - req_msg->alt_remote_gid.global.interface_id - = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid)); + IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID, + req_msg) + ->global.interface_id = + OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid)); + IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID, + req_msg) + ->global.interface_id = + OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid)); } if (alt_path->hop_limit <= 1) { - req_msg->alt_local_lid = alt_ext ? 0 : - htons(ntohl(sa_path_get_slid(alt_path))); - req_msg->alt_remote_lid = alt_ext ? 0 : - htons(ntohl(sa_path_get_dlid(alt_path))); + IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg, + be16_to_cpu( + alt_ext ? 0 : + htons(ntohl(sa_path_get_slid( + alt_path))))); + IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg, + be16_to_cpu( + alt_ext ? 0 : + htons(ntohl(sa_path_get_dlid( + alt_path))))); } else { - req_msg->alt_local_lid = IB_LID_PERMISSIVE; - req_msg->alt_remote_lid = IB_LID_PERMISSIVE; + IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg, + be16_to_cpu(IB_LID_PERMISSIVE)); + IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg, + be16_to_cpu(IB_LID_PERMISSIVE)); } - cm_req_set_alt_flow_label(req_msg, - alt_path->flow_label); - cm_req_set_alt_packet_rate(req_msg, alt_path->rate); - req_msg->alt_traffic_class = alt_path->traffic_class; - req_msg->alt_hop_limit = alt_path->hop_limit; - cm_req_set_alt_sl(req_msg, alt_path->sl); - cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1)); - cm_req_set_alt_local_ack_timeout(req_msg, + IBA_SET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg, + be32_to_cpu(alt_path->flow_label)); + IBA_SET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg, alt_path->rate); + IBA_SET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg, + alt_path->traffic_class); + IBA_SET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg, + alt_path->hop_limit); + IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, alt_path->sl); + IBA_SET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg, + (alt_path->hop_limit <= 1)); + IBA_SET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, alt_path->packet_life_time)); } if (param->private_data && param->private_data_len) - memcpy(req_msg->private_data, param->private_data, - param->private_data_len); + IBA_SET_MEM(CM_REQ_PRIVATE_DATA, req_msg, param->private_data, + param->private_data_len); } static int cm_validate_req_param(struct ib_cm_req_param *param) @@ -1469,8 +1467,8 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms; cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; - cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); - cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg); + cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); + cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg)); spin_lock_irqsave(&cm_id_priv->lock, flags); ret = ib_post_send_mad(cm_id_priv->msg, NULL); @@ -1508,14 +1506,16 @@ static int cm_issue_rej(struct cm_port *port, rej_msg = (struct cm_rej_msg *) msg->mad; cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid); - rej_msg->remote_comm_id = rcv_msg->local_comm_id; - rej_msg->local_comm_id = rcv_msg->remote_comm_id; - cm_rej_set_msg_rejected(rej_msg, msg_rejected); - rej_msg->reason = cpu_to_be16(reason); + IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg, + IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg)); + IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, + IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg)); + IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, msg_rejected); + IBA_SET(CM_REJ_REASON, rej_msg, reason); if (ari && ari_length) { - cm_rej_set_reject_info_len(rej_msg, ari_length); - memcpy(rej_msg->ari, ari, ari_length); + IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length); + IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length); } ret = ib_post_send_mad(msg, NULL); @@ -1525,18 +1525,12 @@ static int cm_issue_rej(struct cm_port *port, return ret; } -static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid, - __be32 local_qpn, __be32 remote_qpn) -{ - return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) || - ((local_ca_guid == remote_ca_guid) && - (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn)))); -} - static bool cm_req_has_alt_path(struct cm_req_msg *req_msg) { - return ((req_msg->alt_local_lid) || - (ib_is_opa_gid(&req_msg->alt_local_gid))); + return ((cpu_to_be16( + IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg))) || + (ib_is_opa_gid(IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID, + req_msg)))); } static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num, @@ -1556,14 +1550,18 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) { sa_path_set_dlid(primary_path, - ntohs(req_msg->primary_local_lid)); + IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, + req_msg)); sa_path_set_slid(primary_path, - ntohs(req_msg->primary_remote_lid)); + IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID, + req_msg)); } else { - lid = opa_get_lid_from_gid(&req_msg->primary_local_gid); + lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR( + CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg)); sa_path_set_dlid(primary_path, lid); - lid = opa_get_lid_from_gid(&req_msg->primary_remote_gid); + lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR( + CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg)); sa_path_set_slid(primary_path, lid); } @@ -1571,13 +1569,19 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, return; if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) { - sa_path_set_dlid(alt_path, ntohs(req_msg->alt_local_lid)); - sa_path_set_slid(alt_path, ntohs(req_msg->alt_remote_lid)); + sa_path_set_dlid(alt_path, + IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, + req_msg)); + sa_path_set_slid(alt_path, + IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, + req_msg)); } else { - lid = opa_get_lid_from_gid(&req_msg->alt_local_gid); + lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR( + CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg)); sa_path_set_dlid(alt_path, lid); - lid = opa_get_lid_from_gid(&req_msg->alt_remote_gid); + lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR( + CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg)); sa_path_set_slid(alt_path, lid); } } @@ -1586,44 +1590,58 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, struct sa_path_rec *primary_path, struct sa_path_rec *alt_path) { - primary_path->dgid = req_msg->primary_local_gid; - primary_path->sgid = req_msg->primary_remote_gid; - primary_path->flow_label = cm_req_get_primary_flow_label(req_msg); - primary_path->hop_limit = req_msg->primary_hop_limit; - primary_path->traffic_class = req_msg->primary_traffic_class; + primary_path->dgid = + *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg); + primary_path->sgid = + *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg); + primary_path->flow_label = + cpu_to_be32(IBA_GET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg)); + primary_path->hop_limit = IBA_GET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg); + primary_path->traffic_class = + IBA_GET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg); primary_path->reversible = 1; - primary_path->pkey = req_msg->pkey; - primary_path->sl = cm_req_get_primary_sl(req_msg); + primary_path->pkey = + cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg)); + primary_path->sl = IBA_GET(CM_REQ_PRIMARY_SL, req_msg); primary_path->mtu_selector = IB_SA_EQ; - primary_path->mtu = cm_req_get_path_mtu(req_msg); + primary_path->mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg); primary_path->rate_selector = IB_SA_EQ; - primary_path->rate = cm_req_get_primary_packet_rate(req_msg); + primary_path->rate = IBA_GET(CM_REQ_PRIMARY_PACKET_RATE, req_msg); primary_path->packet_life_time_selector = IB_SA_EQ; primary_path->packet_life_time = - cm_req_get_primary_local_ack_timeout(req_msg); + IBA_GET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg); primary_path->packet_life_time -= (primary_path->packet_life_time > 0); - primary_path->service_id = req_msg->service_id; + primary_path->service_id = + cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)); if (sa_path_is_roce(primary_path)) primary_path->roce.route_resolved = false; if (cm_req_has_alt_path(req_msg)) { - alt_path->dgid = req_msg->alt_local_gid; - alt_path->sgid = req_msg->alt_remote_gid; - alt_path->flow_label = cm_req_get_alt_flow_label(req_msg); - alt_path->hop_limit = req_msg->alt_hop_limit; - alt_path->traffic_class = req_msg->alt_traffic_class; + alt_path->dgid = *IBA_GET_MEM_PTR( + CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg); + alt_path->sgid = *IBA_GET_MEM_PTR( + CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg); + alt_path->flow_label = cpu_to_be32( + IBA_GET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg)); + alt_path->hop_limit = + IBA_GET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg); + alt_path->traffic_class = + IBA_GET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg); alt_path->reversible = 1; - alt_path->pkey = req_msg->pkey; - alt_path->sl = cm_req_get_alt_sl(req_msg); + alt_path->pkey = + cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg)); + alt_path->sl = IBA_GET(CM_REQ_ALTERNATE_SL, req_msg); alt_path->mtu_selector = IB_SA_EQ; - alt_path->mtu = cm_req_get_path_mtu(req_msg); + alt_path->mtu = + IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg); alt_path->rate_selector = IB_SA_EQ; - alt_path->rate = cm_req_get_alt_packet_rate(req_msg); + alt_path->rate = IBA_GET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg); alt_path->packet_life_time_selector = IB_SA_EQ; alt_path->packet_life_time = - cm_req_get_alt_local_ack_timeout(req_msg); + IBA_GET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg); alt_path->packet_life_time -= (alt_path->packet_life_time > 0); - alt_path->service_id = req_msg->service_id; + alt_path->service_id = + cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)); if (sa_path_is_roce(alt_path)) alt_path->roce.route_resolved = false; @@ -1698,23 +1716,25 @@ static void cm_format_req_event(struct cm_work *work, } else { param->alternate_path = NULL; } - param->remote_ca_guid = req_msg->local_ca_guid; - param->remote_qkey = be32_to_cpu(req_msg->local_qkey); - param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg)); + param->remote_ca_guid = + cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg)); + param->remote_qkey = IBA_GET(CM_REQ_LOCAL_Q_KEY, req_msg); + param->remote_qpn = IBA_GET(CM_REQ_LOCAL_QPN, req_msg); param->qp_type = cm_req_get_qp_type(req_msg); - param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg)); - param->responder_resources = cm_req_get_init_depth(req_msg); - param->initiator_depth = cm_req_get_resp_res(req_msg); + param->starting_psn = IBA_GET(CM_REQ_STARTING_PSN, req_msg); + param->responder_resources = IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg); + param->initiator_depth = IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg); param->local_cm_response_timeout = - cm_req_get_remote_resp_timeout(req_msg); - param->flow_control = cm_req_get_flow_ctrl(req_msg); + IBA_GET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg); + param->flow_control = IBA_GET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg); param->remote_cm_response_timeout = - cm_req_get_local_resp_timeout(req_msg); - param->retry_count = cm_req_get_retry_count(req_msg); - param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); - param->srq = cm_req_get_srq(req_msg); + IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg); + param->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg); + param->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg); + param->srq = IBA_GET(CM_REQ_SRQ, req_msg); param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr; - work->cm_event.private_data = &req_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_REQ_PRIVATE_DATA, req_msg); } static void cm_process_work(struct cm_id_private *cm_id_priv, @@ -1748,13 +1768,16 @@ static void cm_format_mra(struct cm_mra_msg *mra_msg, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid); - cm_mra_set_msg_mraed(mra_msg, msg_mraed); - mra_msg->local_comm_id = cm_id_priv->id.local_id; - mra_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_mra_set_service_timeout(mra_msg, service_timeout); + IBA_SET(CM_MRA_MESSAGE_MRAED, mra_msg, msg_mraed); + IBA_SET(CM_MRA_LOCAL_COMM_ID, mra_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_MRA_REMOTE_COMM_ID, mra_msg, + be32_to_cpu(cm_id_priv->id.remote_id)); + IBA_SET(CM_MRA_SERVICE_TIMEOUT, mra_msg, service_timeout); if (private_data && private_data_len) - memcpy(mra_msg->private_data, private_data, private_data_len); + IBA_SET_MEM(CM_MRA_PRIVATE_DATA, mra_msg, private_data, + private_data_len); } static void cm_format_rej(struct cm_rej_msg *rej_msg, @@ -1766,36 +1789,42 @@ static void cm_format_rej(struct cm_rej_msg *rej_msg, u8 private_data_len) { cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid); - rej_msg->remote_comm_id = cm_id_priv->id.remote_id; + IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg, + be32_to_cpu(cm_id_priv->id.remote_id)); switch(cm_id_priv->id.state) { case IB_CM_REQ_RCVD: - rej_msg->local_comm_id = 0; - cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); + IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, be32_to_cpu(0)); + IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ); break; case IB_CM_MRA_REQ_SENT: - rej_msg->local_comm_id = cm_id_priv->id.local_id; - cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); + IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ); break; case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: - rej_msg->local_comm_id = cm_id_priv->id.local_id; - cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP); + IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REP); break; default: - rej_msg->local_comm_id = cm_id_priv->id.local_id; - cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER); + IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, + CM_MSG_RESPONSE_OTHER); break; } - rej_msg->reason = cpu_to_be16(reason); + IBA_SET(CM_REJ_REASON, rej_msg, reason); if (ari && ari_length) { - cm_rej_set_reject_info_len(rej_msg, ari_length); - memcpy(rej_msg->ari, ari, ari_length); + IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length); + IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length); } if (private_data && private_data_len) - memcpy(rej_msg->private_data, private_data, private_data_len); + IBA_SET_MEM(CM_REJ_PRIVATE_DATA, rej_msg, private_data, + private_data_len); } static void cm_dup_req_handler(struct cm_work *work, @@ -1855,7 +1884,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, spin_lock_irq(&cm.lock); timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info); if (timewait_info) { - cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, + cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id, timewait_info->work.remote_id); spin_unlock_irq(&cm.lock); if (cur_cm_id_priv) { @@ -1869,7 +1898,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); if (timewait_info) { cm_cleanup_timewait(cm_id_priv->timewait_info); - cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, + cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id, timewait_info->work.remote_id); spin_unlock_irq(&cm.lock); @@ -1885,8 +1914,9 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, } /* Find matching listen request. */ - listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, - req_msg->service_id); + listen_cm_id_priv = cm_find_listen( + cm_id_priv->id.device, + cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg))); if (!listen_cm_id_priv) { cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irq(&cm.lock); @@ -1895,8 +1925,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, NULL, 0); goto out; } - atomic_inc(&listen_cm_id_priv->refcount); - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&listen_cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); cm_id_priv->id.state = IB_CM_REQ_RCVD; atomic_inc(&cm_id_priv->work_count); spin_unlock_irq(&cm.lock); @@ -1911,24 +1941,32 @@ out: */ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) { - if (!cm_req_get_primary_subnet_local(req_msg)) { - if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) { - req_msg->primary_local_lid = ib_lid_be16(wc->slid); - cm_req_set_primary_sl(req_msg, wc->sl); + if (!IBA_GET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg)) { + if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, + req_msg)) == IB_LID_PERMISSIVE) { + IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, + be16_to_cpu(ib_lid_be16(wc->slid))); + IBA_SET(CM_REQ_PRIMARY_SL, req_msg, wc->sl); } - if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE) - req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits); + if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID, + req_msg)) == IB_LID_PERMISSIVE) + IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg, + wc->dlid_path_bits); } - if (!cm_req_get_alt_subnet_local(req_msg)) { - if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) { - req_msg->alt_local_lid = ib_lid_be16(wc->slid); - cm_req_set_alt_sl(req_msg, wc->sl); + if (!IBA_GET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg)) { + if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, + req_msg)) == IB_LID_PERMISSIVE) { + IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg, + be16_to_cpu(ib_lid_be16(wc->slid))); + IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, wc->sl); } - if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE) - req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits); + if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, + req_msg)) == IB_LID_PERMISSIVE) + IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg, + wc->dlid_path_bits); } } @@ -1948,7 +1986,8 @@ static int cm_req_handler(struct cm_work *work) return PTR_ERR(cm_id); cm_id_priv = container_of(cm_id, struct cm_id_private, id); - cm_id_priv->id.remote_id = req_msg->local_comm_id; + cm_id_priv->id.remote_id = + cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg)); ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); @@ -1960,9 +1999,12 @@ static int cm_req_handler(struct cm_work *work) ret = PTR_ERR(cm_id_priv->timewait_info); goto destroy; } - cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id; - cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid; - cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg); + cm_id_priv->timewait_info->work.remote_id = + cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg)); + cm_id_priv->timewait_info->remote_ca_guid = + cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg)); + cm_id_priv->timewait_info->remote_qpn = + cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); listen_cm_id_priv = cm_match_req(work, cm_id_priv); if (!listen_cm_id_priv) { @@ -1974,7 +2016,8 @@ static int cm_req_handler(struct cm_work *work) cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; cm_id_priv->id.context = listen_cm_id_priv->id.context; - cm_id_priv->id.service_id = req_msg->service_id; + cm_id_priv->id.service_id = + cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)); cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_process_routed_req(req_msg, work->mad_recv_wc->wc); @@ -1991,10 +2034,11 @@ static int cm_req_handler(struct cm_work *work) work->path[0].rec_type = sa_conv_gid_to_pathrec_type(gid_attr->gid_type); } else { - cm_path_set_rec_type(work->port->cm_dev->ib_device, - work->port->port_num, - &work->path[0], - &req_msg->primary_local_gid); + cm_path_set_rec_type( + work->port->cm_dev->ib_device, work->port->port_num, + &work->path[0], + IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, + req_msg)); } if (cm_req_has_alt_path(req_msg)) work->path[1].rec_type = work->path[0].rec_type; @@ -2034,16 +2078,19 @@ static int cm_req_handler(struct cm_work *work) } cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->timeout_ms = cm_convert_to_ms( - cm_req_get_local_resp_timeout(req_msg)); - cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg); - cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg); - cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg); - cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg); - cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg); - cm_id_priv->pkey = req_msg->pkey; - cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg); - cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); - cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); + IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg)); + cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg); + cm_id_priv->remote_qpn = + cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); + cm_id_priv->initiator_depth = + IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg); + cm_id_priv->responder_resources = + IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg); + cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg); + cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg)); + cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg)); + cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg); + cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg); cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id); @@ -2052,7 +2099,7 @@ static int cm_req_handler(struct cm_work *work) return 0; rejected: - atomic_dec(&cm_id_priv->refcount); + refcount_dec(&cm_id_priv->refcount); cm_deref_id(listen_cm_id_priv); free_timeinfo: kfree(cm_id_priv->timewait_info); @@ -2066,29 +2113,35 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg, struct ib_cm_rep_param *param) { cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); - rep_msg->local_comm_id = cm_id_priv->id.local_id; - rep_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); - rep_msg->resp_resources = param->responder_resources; - cm_rep_set_target_ack_delay(rep_msg, - cm_id_priv->av.port->cm_dev->ack_delay); - cm_rep_set_failover(rep_msg, param->failover_accepted); - cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); - rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid; + IBA_SET(CM_REP_LOCAL_COMM_ID, rep_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_REP_REMOTE_COMM_ID, rep_msg, + be32_to_cpu(cm_id_priv->id.remote_id)); + IBA_SET(CM_REP_STARTING_PSN, rep_msg, param->starting_psn); + IBA_SET(CM_REP_RESPONDER_RESOURCES, rep_msg, + param->responder_resources); + IBA_SET(CM_REP_TARGET_ACK_DELAY, rep_msg, + cm_id_priv->av.port->cm_dev->ack_delay); + IBA_SET(CM_REP_FAILOVER_ACCEPTED, rep_msg, param->failover_accepted); + IBA_SET(CM_REP_RNR_RETRY_COUNT, rep_msg, param->rnr_retry_count); + IBA_SET(CM_REP_LOCAL_CA_GUID, rep_msg, + be64_to_cpu(cm_id_priv->id.device->node_guid)); if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) { - rep_msg->initiator_depth = param->initiator_depth; - cm_rep_set_flow_ctrl(rep_msg, param->flow_control); - cm_rep_set_srq(rep_msg, param->srq); - cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); + IBA_SET(CM_REP_INITIATOR_DEPTH, rep_msg, + param->initiator_depth); + IBA_SET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg, + param->flow_control); + IBA_SET(CM_REP_SRQ, rep_msg, param->srq); + IBA_SET(CM_REP_LOCAL_QPN, rep_msg, param->qp_num); } else { - cm_rep_set_srq(rep_msg, 1); - cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num)); + IBA_SET(CM_REP_SRQ, rep_msg, 1); + IBA_SET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, rep_msg, param->qp_num); } if (param->private_data && param->private_data_len) - memcpy(rep_msg->private_data, param->private_data, - param->private_data_len); + IBA_SET_MEM(CM_REP_PRIVATE_DATA, rep_msg, param->private_data, + param->private_data_len); } int ib_send_cm_rep(struct ib_cm_id *cm_id, @@ -2134,7 +2187,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, cm_id_priv->msg = msg; cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; - cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg); + cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg)); cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -2148,11 +2201,14 @@ static void cm_format_rtu(struct cm_rtu_msg *rtu_msg, u8 private_data_len) { cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid); - rtu_msg->local_comm_id = cm_id_priv->id.local_id; - rtu_msg->remote_comm_id = cm_id_priv->id.remote_id; + IBA_SET(CM_RTU_LOCAL_COMM_ID, rtu_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_RTU_REMOTE_COMM_ID, rtu_msg, + be32_to_cpu(cm_id_priv->id.remote_id)); if (private_data && private_data_len) - memcpy(rtu_msg->private_data, private_data, private_data_len); + IBA_SET_MEM(CM_RTU_PRIVATE_DATA, rtu_msg, private_data, + private_data_len); } int ib_send_cm_rtu(struct ib_cm_id *cm_id, @@ -2215,18 +2271,20 @@ static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type) rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.rep_rcvd; - param->remote_ca_guid = rep_msg->local_ca_guid; - param->remote_qkey = be32_to_cpu(rep_msg->local_qkey); + param->remote_ca_guid = + cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg)); + param->remote_qkey = IBA_GET(CM_REP_LOCAL_Q_KEY, rep_msg); param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type)); - param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg)); - param->responder_resources = rep_msg->initiator_depth; - param->initiator_depth = rep_msg->resp_resources; - param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); - param->failover_accepted = cm_rep_get_failover(rep_msg); - param->flow_control = cm_rep_get_flow_ctrl(rep_msg); - param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); - param->srq = cm_rep_get_srq(rep_msg); - work->cm_event.private_data = &rep_msg->private_data; + param->starting_psn = IBA_GET(CM_REP_STARTING_PSN, rep_msg); + param->responder_resources = IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg); + param->initiator_depth = IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg); + param->target_ack_delay = IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg); + param->failover_accepted = IBA_GET(CM_REP_FAILOVER_ACCEPTED, rep_msg); + param->flow_control = IBA_GET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg); + param->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg); + param->srq = IBA_GET(CM_REP_SRQ, rep_msg); + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_REP_PRIVATE_DATA, rep_msg); } static void cm_dup_rep_handler(struct cm_work *work) @@ -2237,8 +2295,9 @@ static void cm_dup_rep_handler(struct cm_work *work) int ret; rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, - rep_msg->local_comm_id); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), + cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg))); if (!cm_id_priv) return; @@ -2282,11 +2341,12 @@ static int cm_rep_handler(struct cm_work *work) struct cm_timewait_info *timewait_info; rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0); if (!cm_id_priv) { cm_dup_rep_handler(work); pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__, - be32_to_cpu(rep_msg->remote_comm_id)); + IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)); return -EINVAL; } @@ -2300,15 +2360,18 @@ static int cm_rep_handler(struct cm_work *work) default: spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; - pr_debug("%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n", - __func__, cm_id_priv->id.state, - be32_to_cpu(rep_msg->local_comm_id), - be32_to_cpu(rep_msg->remote_comm_id)); + pr_debug( + "%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n", + __func__, cm_id_priv->id.state, + IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg), + IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)); goto error; } - cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; - cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; + cm_id_priv->timewait_info->work.remote_id = + cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg)); + cm_id_priv->timewait_info->remote_ca_guid = + cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg)); cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); spin_lock(&cm.lock); @@ -2318,7 +2381,7 @@ static int cm_rep_handler(struct cm_work *work) spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; pr_debug("%s: Failed to insert remote id %d\n", __func__, - be32_to_cpu(rep_msg->remote_comm_id)); + IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)); goto error; } /* Check for a stale connection. */ @@ -2327,7 +2390,7 @@ static int cm_rep_handler(struct cm_work *work) rb_erase(&cm_id_priv->timewait_info->remote_id_node, &cm.remote_id_table); cm_id_priv->timewait_info->inserted_remote_id = 0; - cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, + cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id, timewait_info->work.remote_id); spin_unlock(&cm.lock); @@ -2336,9 +2399,10 @@ static int cm_rep_handler(struct cm_work *work) IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, NULL, 0); ret = -EINVAL; - pr_debug("%s: Stale connection. local_comm_id %d, remote_comm_id %d\n", - __func__, be32_to_cpu(rep_msg->local_comm_id), - be32_to_cpu(rep_msg->remote_comm_id)); + pr_debug( + "%s: Stale connection. local_comm_id %d, remote_comm_id %d\n", + __func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg), + IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)); if (cur_cm_id_priv) { cm_id = &cur_cm_id_priv->id; @@ -2351,13 +2415,17 @@ static int cm_rep_handler(struct cm_work *work) spin_unlock(&cm.lock); cm_id_priv->id.state = IB_CM_REP_RCVD; - cm_id_priv->id.remote_id = rep_msg->local_comm_id; + cm_id_priv->id.remote_id = + cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg)); cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); - cm_id_priv->initiator_depth = rep_msg->resp_resources; - cm_id_priv->responder_resources = rep_msg->initiator_depth; - cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); - cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); - cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); + cm_id_priv->initiator_depth = + IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg); + cm_id_priv->responder_resources = + IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg); + cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg)); + cm_id_priv->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg); + cm_id_priv->target_ack_delay = + IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg); cm_id_priv->av.timeout = cm_ack_timeout(cm_id_priv->target_ack_delay, cm_id_priv->av.timeout - 1); @@ -2423,12 +2491,14 @@ static int cm_rtu_handler(struct cm_work *work) int ret; rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id, - rtu_msg->local_comm_id); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_RTU_REMOTE_COMM_ID, rtu_msg)), + cpu_to_be32(IBA_GET(CM_RTU_LOCAL_COMM_ID, rtu_msg))); if (!cm_id_priv) return -EINVAL; - work->cm_event.private_data = &rtu_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_RTU_PRIVATE_DATA, rtu_msg); spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_REP_SENT && @@ -2463,12 +2533,16 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg, { cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID, cm_form_tid(cm_id_priv)); - dreq_msg->local_comm_id = cm_id_priv->id.local_id; - dreq_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn); + IBA_SET(CM_DREQ_LOCAL_COMM_ID, dreq_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_DREQ_REMOTE_COMM_ID, dreq_msg, + be32_to_cpu(cm_id_priv->id.remote_id)); + IBA_SET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg, + be32_to_cpu(cm_id_priv->remote_qpn)); if (private_data && private_data_len) - memcpy(dreq_msg->private_data, private_data, private_data_len); + IBA_SET_MEM(CM_DREQ_PRIVATE_DATA, dreq_msg, private_data, + private_data_len); } int ib_send_cm_dreq(struct ib_cm_id *cm_id, @@ -2528,11 +2602,14 @@ static void cm_format_drep(struct cm_drep_msg *drep_msg, u8 private_data_len) { cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid); - drep_msg->local_comm_id = cm_id_priv->id.local_id; - drep_msg->remote_comm_id = cm_id_priv->id.remote_id; + IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg, + be32_to_cpu(cm_id_priv->id.remote_id)); if (private_data && private_data_len) - memcpy(drep_msg->private_data, private_data, private_data_len); + IBA_SET_MEM(CM_DREP_PRIVATE_DATA, drep_msg, private_data, + private_data_len); } int ib_send_cm_drep(struct ib_cm_id *cm_id, @@ -2600,8 +2677,10 @@ static int cm_issue_drep(struct cm_port *port, drep_msg = (struct cm_drep_msg *) msg->mad; cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid); - drep_msg->remote_comm_id = dreq_msg->local_comm_id; - drep_msg->local_comm_id = dreq_msg->remote_comm_id; + IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg, + IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg)); + IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg, + IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)); ret = ib_post_send_mad(msg, NULL); if (ret) @@ -2618,22 +2697,26 @@ static int cm_dreq_handler(struct cm_work *work) int ret; dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, - dreq_msg->local_comm_id); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)), + cpu_to_be32(IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg))); if (!cm_id_priv) { atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_DREQ_COUNTER]); cm_issue_drep(work->port, work->mad_recv_wc); - pr_debug("%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n", - __func__, be32_to_cpu(dreq_msg->local_comm_id), - be32_to_cpu(dreq_msg->remote_comm_id)); + pr_debug( + "%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n", + __func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg), + IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)); return -EINVAL; } - work->cm_event.private_data = &dreq_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_DREQ_PRIVATE_DATA, dreq_msg); spin_lock_irq(&cm_id_priv->lock); - if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg)) + if (cm_id_priv->local_qpn != + cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg))) goto unlock; switch (cm_id_priv->id.state) { @@ -2699,12 +2782,14 @@ static int cm_drep_handler(struct cm_work *work) int ret; drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id, - drep_msg->local_comm_id); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_DREP_REMOTE_COMM_ID, drep_msg)), + cpu_to_be32(IBA_GET(CM_DREP_LOCAL_COMM_ID, drep_msg))); if (!cm_id_priv) return -EINVAL; - work->cm_event.private_data = &drep_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_DREP_PRIVATE_DATA, drep_msg); spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_DREQ_SENT && @@ -2800,10 +2885,11 @@ static void cm_format_rej_event(struct cm_work *work) rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.rej_rcvd; - param->ari = rej_msg->ari; - param->ari_length = cm_rej_get_reject_info_len(rej_msg); - param->reason = __be16_to_cpu(rej_msg->reason); - work->cm_event.private_data = &rej_msg->private_data; + param->ari = IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg); + param->ari_length = IBA_GET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg); + param->reason = IBA_GET(CM_REJ_REASON, rej_msg); + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_REJ_PRIVATE_DATA, rej_msg); } static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) @@ -2812,29 +2898,29 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) struct cm_id_private *cm_id_priv; __be32 remote_id; - remote_id = rej_msg->local_comm_id; + remote_id = cpu_to_be32(IBA_GET(CM_REJ_LOCAL_COMM_ID, rej_msg)); - if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) { + if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT) { spin_lock_irq(&cm.lock); - timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari), - remote_id); + timewait_info = cm_find_remote_id( + *((__be64 *)IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg)), + remote_id); if (!timewait_info) { spin_unlock_irq(&cm.lock); return NULL; } - cm_id_priv = xa_load(&cm.local_id_table, - cm_local_id(timewait_info->work.local_id)); - if (cm_id_priv) { - if (cm_id_priv->id.remote_id == remote_id) - atomic_inc(&cm_id_priv->refcount); - else - cm_id_priv = NULL; - } + cm_id_priv = + cm_acquire_id(timewait_info->work.local_id, remote_id); spin_unlock_irq(&cm.lock); - } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ) - cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0); + } else if (IBA_GET(CM_REJ_MESSAGE_REJECTED, rej_msg) == + CM_MSG_RESPONSE_REQ) + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)), + 0); else - cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)), + remote_id); return cm_id_priv; } @@ -2862,7 +2948,7 @@ static int cm_rej_handler(struct cm_work *work) /* fall through */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: - if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN) + if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_STALE_CONN) cm_enter_timewait(cm_id_priv); else cm_reset_to_idle(cm_id_priv); @@ -2992,13 +3078,16 @@ EXPORT_SYMBOL(ib_send_cm_mra); static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg) { - switch (cm_mra_get_msg_mraed(mra_msg)) { + switch (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg)) { case CM_MSG_RESPONSE_REQ: - return cm_acquire_id(mra_msg->remote_comm_id, 0); + return cm_acquire_id( + cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)), + 0); case CM_MSG_RESPONSE_REP: case CM_MSG_RESPONSE_OTHER: - return cm_acquire_id(mra_msg->remote_comm_id, - mra_msg->local_comm_id); + return cm_acquire_id( + cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)), + cpu_to_be32(IBA_GET(CM_MRA_LOCAL_COMM_ID, mra_msg))); default: return NULL; } @@ -3015,30 +3104,34 @@ static int cm_mra_handler(struct cm_work *work) if (!cm_id_priv) return -EINVAL; - work->cm_event.private_data = &mra_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_MRA_PRIVATE_DATA, mra_msg); work->cm_event.param.mra_rcvd.service_timeout = - cm_mra_get_service_timeout(mra_msg); - timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + + IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg); + timeout = cm_convert_to_ms(IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg)) + cm_convert_to_ms(cm_id_priv->av.timeout); spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: - if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || + if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) != + CM_MSG_RESPONSE_REQ || ib_modify_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; break; case IB_CM_REP_SENT: - if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP || + if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) != + CM_MSG_RESPONSE_REP || ib_modify_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; break; case IB_CM_ESTABLISHED: - if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || + if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) != + CM_MSG_RESPONSE_OTHER || cm_id_priv->id.lap_state != IB_CM_LAP_SENT || ib_modify_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg, timeout)) { @@ -3080,117 +3173,23 @@ out: return -EINVAL; } -static void cm_format_lap(struct cm_lap_msg *lap_msg, - struct cm_id_private *cm_id_priv, - struct sa_path_rec *alternate_path, - const void *private_data, - u8 private_data_len) -{ - bool alt_ext = false; - - if (alternate_path->rec_type == SA_PATH_REC_TYPE_OPA) - alt_ext = opa_is_extended_lid(alternate_path->opa.dlid, - alternate_path->opa.slid); - cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, - cm_form_tid(cm_id_priv)); - lap_msg->local_comm_id = cm_id_priv->id.local_id; - lap_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn); - /* todo: need remote CM response timeout */ - cm_lap_set_remote_resp_timeout(lap_msg, 0x1F); - lap_msg->alt_local_lid = - htons(ntohl(sa_path_get_slid(alternate_path))); - lap_msg->alt_remote_lid = - htons(ntohl(sa_path_get_dlid(alternate_path))); - lap_msg->alt_local_gid = alternate_path->sgid; - lap_msg->alt_remote_gid = alternate_path->dgid; - if (alt_ext) { - lap_msg->alt_local_gid.global.interface_id - = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.slid)); - lap_msg->alt_remote_gid.global.interface_id - = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.dlid)); - } - cm_lap_set_flow_label(lap_msg, alternate_path->flow_label); - cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class); - lap_msg->alt_hop_limit = alternate_path->hop_limit; - cm_lap_set_packet_rate(lap_msg, alternate_path->rate); - cm_lap_set_sl(lap_msg, alternate_path->sl); - cm_lap_set_subnet_local(lap_msg, 1); /* local only... */ - cm_lap_set_local_ack_timeout(lap_msg, - cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, - alternate_path->packet_life_time)); - - if (private_data && private_data_len) - memcpy(lap_msg->private_data, private_data, private_data_len); -} - -int ib_send_cm_lap(struct ib_cm_id *cm_id, - struct sa_path_rec *alternate_path, - const void *private_data, - u8 private_data_len) -{ - struct cm_id_private *cm_id_priv; - struct ib_mad_send_buf *msg; - unsigned long flags; - int ret; - - if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE) - return -EINVAL; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_ESTABLISHED || - (cm_id->lap_state != IB_CM_LAP_UNINIT && - cm_id->lap_state != IB_CM_LAP_IDLE)) { - ret = -EINVAL; - goto out; - } - - ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av, - cm_id_priv); - if (ret) - goto out; - cm_id_priv->alt_av.timeout = - cm_ack_timeout(cm_id_priv->target_ack_delay, - cm_id_priv->alt_av.timeout - 1); - - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto out; - - cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv, - alternate_path, private_data, private_data_len); - msg->timeout_ms = cm_id_priv->timeout_ms; - msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED; - - ret = ib_post_send_mad(msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - return ret; - } - - cm_id->lap_state = IB_CM_LAP_SENT; - cm_id_priv->msg = msg; - -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} -EXPORT_SYMBOL(ib_send_cm_lap); - static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg, struct sa_path_rec *path) { u32 lid; if (path->rec_type != SA_PATH_REC_TYPE_OPA) { - sa_path_set_dlid(path, ntohs(lap_msg->alt_local_lid)); - sa_path_set_slid(path, ntohs(lap_msg->alt_remote_lid)); + sa_path_set_dlid(path, IBA_GET(CM_LAP_ALTERNATE_LOCAL_PORT_LID, + lap_msg)); + sa_path_set_slid(path, IBA_GET(CM_LAP_ALTERNATE_REMOTE_PORT_LID, + lap_msg)); } else { - lid = opa_get_lid_from_gid(&lap_msg->alt_local_gid); + lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR( + CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg)); sa_path_set_dlid(path, lid); - lid = opa_get_lid_from_gid(&lap_msg->alt_remote_gid); + lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR( + CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg)); sa_path_set_slid(path, lid); } } @@ -3199,20 +3198,23 @@ static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv, struct sa_path_rec *path, struct cm_lap_msg *lap_msg) { - path->dgid = lap_msg->alt_local_gid; - path->sgid = lap_msg->alt_remote_gid; - path->flow_label = cm_lap_get_flow_label(lap_msg); - path->hop_limit = lap_msg->alt_hop_limit; - path->traffic_class = cm_lap_get_traffic_class(lap_msg); + path->dgid = *IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg); + path->sgid = + *IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg); + path->flow_label = + cpu_to_be32(IBA_GET(CM_LAP_ALTERNATE_FLOW_LABEL, lap_msg)); + path->hop_limit = IBA_GET(CM_LAP_ALTERNATE_HOP_LIMIT, lap_msg); + path->traffic_class = IBA_GET(CM_LAP_ALTERNATE_TRAFFIC_CLASS, lap_msg); path->reversible = 1; path->pkey = cm_id_priv->pkey; - path->sl = cm_lap_get_sl(lap_msg); + path->sl = IBA_GET(CM_LAP_ALTERNATE_SL, lap_msg); path->mtu_selector = IB_SA_EQ; path->mtu = cm_id_priv->path_mtu; path->rate_selector = IB_SA_EQ; - path->rate = cm_lap_get_packet_rate(lap_msg); + path->rate = IBA_GET(CM_LAP_ALTERNATE_PACKET_RATE, lap_msg); path->packet_life_time_selector = IB_SA_EQ; - path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg); + path->packet_life_time = + IBA_GET(CM_LAP_ALTERNATE_LOCAL_ACK_TIMEOUT, lap_msg); path->packet_life_time -= (path->packet_life_time > 0); cm_format_path_lid_from_lap(lap_msg, path); } @@ -3234,20 +3236,22 @@ static int cm_lap_handler(struct cm_work *work) /* todo: verify LAP request and send reject APR if invalid. */ lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id, - lap_msg->local_comm_id); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_LAP_REMOTE_COMM_ID, lap_msg)), + cpu_to_be32(IBA_GET(CM_LAP_LOCAL_COMM_ID, lap_msg))); if (!cm_id_priv) return -EINVAL; param = &work->cm_event.param.lap_rcvd; memset(&work->path[0], 0, sizeof(work->path[1])); cm_path_set_rec_type(work->port->cm_dev->ib_device, - work->port->port_num, - &work->path[0], - &lap_msg->alt_local_gid); + work->port->port_num, &work->path[0], + IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID, + lap_msg)); param->alternate_path = &work->path[0]; cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg); - work->cm_event.private_data = &lap_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_LAP_PRIVATE_DATA, lap_msg); spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED) @@ -3312,72 +3316,6 @@ deref: cm_deref_id(cm_id_priv); return -EINVAL; } -static void cm_format_apr(struct cm_apr_msg *apr_msg, - struct cm_id_private *cm_id_priv, - enum ib_cm_apr_status status, - void *info, - u8 info_length, - const void *private_data, - u8 private_data_len) -{ - cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid); - apr_msg->local_comm_id = cm_id_priv->id.local_id; - apr_msg->remote_comm_id = cm_id_priv->id.remote_id; - apr_msg->ap_status = (u8) status; - - if (info && info_length) { - apr_msg->info_length = info_length; - memcpy(apr_msg->info, info, info_length); - } - - if (private_data && private_data_len) - memcpy(apr_msg->private_data, private_data, private_data_len); -} - -int ib_send_cm_apr(struct ib_cm_id *cm_id, - enum ib_cm_apr_status status, - void *info, - u8 info_length, - const void *private_data, - u8 private_data_len) -{ - struct cm_id_private *cm_id_priv; - struct ib_mad_send_buf *msg; - unsigned long flags; - int ret; - - if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) || - (info && info_length > IB_CM_APR_INFO_LENGTH)) - return -EINVAL; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_ESTABLISHED || - (cm_id->lap_state != IB_CM_LAP_RCVD && - cm_id->lap_state != IB_CM_MRA_LAP_SENT)) { - ret = -EINVAL; - goto out; - } - - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto out; - - cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status, - info, info_length, private_data, private_data_len); - ret = ib_post_send_mad(msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - return ret; - } - - cm_id->lap_state = IB_CM_LAP_IDLE; -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} -EXPORT_SYMBOL(ib_send_cm_apr); - static int cm_apr_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; @@ -3392,15 +3330,20 @@ static int cm_apr_handler(struct cm_work *work) return -EINVAL; apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id, - apr_msg->local_comm_id); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_APR_REMOTE_COMM_ID, apr_msg)), + cpu_to_be32(IBA_GET(CM_APR_LOCAL_COMM_ID, apr_msg))); if (!cm_id_priv) return -EINVAL; /* Unmatched reply. */ - work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status; - work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info; - work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length; - work->cm_event.private_data = &apr_msg->private_data; + work->cm_event.param.apr_rcvd.ap_status = + IBA_GET(CM_APR_AR_STATUS, apr_msg); + work->cm_event.param.apr_rcvd.apr_info = + IBA_GET_MEM_PTR(CM_APR_ADDITIONAL_INFORMATION, apr_msg); + work->cm_event.param.apr_rcvd.info_len = + IBA_GET(CM_APR_ADDITIONAL_INFORMATION_LENGTH, apr_msg); + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_APR_PRIVATE_DATA, apr_msg); spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED || @@ -3434,7 +3377,7 @@ static int cm_timewait_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; int ret; - timewait_info = (struct cm_timewait_info *)work; + timewait_info = container_of(work, struct cm_timewait_info, work); spin_lock_irq(&cm.lock); list_del(&timewait_info->list); spin_unlock_irq(&cm.lock); @@ -3472,13 +3415,16 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, { cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, cm_form_tid(cm_id_priv)); - sidr_req_msg->request_id = cm_id_priv->id.local_id; - sidr_req_msg->pkey = param->path->pkey; - sidr_req_msg->service_id = param->service_id; + IBA_SET(CM_SIDR_REQ_REQUESTID, sidr_req_msg, + be32_to_cpu(cm_id_priv->id.local_id)); + IBA_SET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg, + be16_to_cpu(param->path->pkey)); + IBA_SET(CM_SIDR_REQ_SERVICEID, sidr_req_msg, + be64_to_cpu(param->service_id)); if (param->private_data && param->private_data_len) - memcpy(sidr_req_msg->private_data, param->private_data, - param->private_data_len); + IBA_SET_MEM(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg, + param->private_data, param->private_data_len); } int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, @@ -3542,13 +3488,15 @@ static void cm_format_sidr_req_event(struct cm_work *work, sidr_req_msg = (struct cm_sidr_req_msg *) work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.sidr_req_rcvd; - param->pkey = __be16_to_cpu(sidr_req_msg->pkey); + param->pkey = IBA_GET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg); param->listen_id = listen_id; - param->service_id = sidr_req_msg->service_id; + param->service_id = + cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)); param->bth_pkey = cm_get_bth_pkey(work); param->port = work->port->port_num; param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr; - work->cm_event.private_data = &sidr_req_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg); } static int cm_sidr_req_handler(struct cm_work *work) @@ -3576,7 +3524,8 @@ static int cm_sidr_req_handler(struct cm_work *work) if (ret) goto out; - cm_id_priv->id.remote_id = sidr_req_msg->request_id; + cm_id_priv->id.remote_id = + cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg)); cm_id_priv->tid = sidr_req_msg->hdr.tid; atomic_inc(&cm_id_priv->work_count); @@ -3589,20 +3538,22 @@ static int cm_sidr_req_handler(struct cm_work *work) goto out; /* Duplicate message. */ } cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; - cur_cm_id_priv = cm_find_listen(cm_id->device, - sidr_req_msg->service_id); + cur_cm_id_priv = cm_find_listen( + cm_id->device, + cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg))); if (!cur_cm_id_priv) { spin_unlock_irq(&cm.lock); cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED); goto out; /* No match. */ } - atomic_inc(&cur_cm_id_priv->refcount); - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&cur_cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); spin_unlock_irq(&cm.lock); cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; cm_id_priv->id.context = cur_cm_id_priv->id.context; - cm_id_priv->id.service_id = sidr_req_msg->service_id; + cm_id_priv->id.service_id = + cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)); cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id); @@ -3620,18 +3571,21 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg, { cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID, cm_id_priv->tid); - sidr_rep_msg->request_id = cm_id_priv->id.remote_id; - sidr_rep_msg->status = param->status; - cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num)); - sidr_rep_msg->service_id = cm_id_priv->id.service_id; - sidr_rep_msg->qkey = cpu_to_be32(param->qkey); + IBA_SET(CM_SIDR_REP_REQUESTID, sidr_rep_msg, + be32_to_cpu(cm_id_priv->id.remote_id)); + IBA_SET(CM_SIDR_REP_STATUS, sidr_rep_msg, param->status); + IBA_SET(CM_SIDR_REP_QPN, sidr_rep_msg, param->qp_num); + IBA_SET(CM_SIDR_REP_SERVICEID, sidr_rep_msg, + be64_to_cpu(cm_id_priv->id.service_id)); + IBA_SET(CM_SIDR_REP_Q_KEY, sidr_rep_msg, param->qkey); if (param->info && param->info_length) - memcpy(sidr_rep_msg->info, param->info, param->info_length); + IBA_SET_MEM(CM_SIDR_REP_ADDITIONAL_INFORMATION, sidr_rep_msg, + param->info, param->info_length); if (param->private_data && param->private_data_len) - memcpy(sidr_rep_msg->private_data, param->private_data, - param->private_data_len); + IBA_SET_MEM(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg, + param->private_data, param->private_data_len); } int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, @@ -3691,13 +3645,16 @@ static void cm_format_sidr_rep_event(struct cm_work *work, sidr_rep_msg = (struct cm_sidr_rep_msg *) work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.sidr_rep_rcvd; - param->status = sidr_rep_msg->status; - param->qkey = be32_to_cpu(sidr_rep_msg->qkey); - param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg)); - param->info = &sidr_rep_msg->info; - param->info_len = sidr_rep_msg->info_length; + param->status = IBA_GET(CM_SIDR_REP_STATUS, sidr_rep_msg); + param->qkey = IBA_GET(CM_SIDR_REP_Q_KEY, sidr_rep_msg); + param->qpn = IBA_GET(CM_SIDR_REP_QPN, sidr_rep_msg); + param->info = IBA_GET_MEM_PTR(CM_SIDR_REP_ADDITIONAL_INFORMATION, + sidr_rep_msg); + param->info_len = IBA_GET(CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH, + sidr_rep_msg); param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr; - work->cm_event.private_data = &sidr_rep_msg->private_data; + work->cm_event.private_data = + IBA_GET_MEM_PTR(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg); } static int cm_sidr_rep_handler(struct cm_work *work) @@ -3707,7 +3664,8 @@ static int cm_sidr_rep_handler(struct cm_work *work) sidr_rep_msg = (struct cm_sidr_rep_msg *) work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0); + cm_id_priv = cm_acquire_id( + cpu_to_be32(IBA_GET(CM_SIDR_REP_REQUESTID, sidr_rep_msg)), 0); if (!cm_id_priv) return -EINVAL; /* Unmatched reply. */ @@ -4399,6 +4357,7 @@ error2: error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; + kfree(port); while (--i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; @@ -4407,6 +4366,7 @@ error1: ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); + kfree(port); } free: kfree(cm_dev); @@ -4460,6 +4420,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) spin_unlock_irq(&cm.state_lock); ib_unregister_mad_agent(cur_mad_agent); cm_remove_port_fs(port); + kfree(port); } kfree(cm_dev); diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 3d16d614aff6..0cc40656b5c5 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -1,39 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004, 2011 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING the madirectory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use source and binary forms, with or - * withmodification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retathe above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE - * SOFTWARE. + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. */ -#if !defined(CM_MSGS_H) +#ifndef CM_MSGS_H #define CM_MSGS_H +#include <rdma/ibta_vol1_c12.h> #include <rdma/ib_mad.h> #include <rdma/ib_cm.h> @@ -44,120 +19,14 @@ #define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ -struct cm_req_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 rsvd4; - __be64 service_id; - __be64 local_ca_guid; - __be32 rsvd24; - __be32 local_qkey; - /* local QPN:24, responder resources:8 */ - __be32 offset32; - /* local EECN:24, initiator depth:8 */ - __be32 offset36; - /* - * remote EECN:24, remote CM response timeout:5, - * transport service type:2, end-to-end flow control:1 - */ - __be32 offset40; - /* starting PSN:24, local CM response timeout:5, retry count:3 */ - __be32 offset44; - __be16 pkey; - /* path MTU:4, RDC exists:1, RNR retry count:3. */ - u8 offset50; - /* max CM Retries:4, SRQ:1, extended transport type:3 */ - u8 offset51; - - __be16 primary_local_lid; - __be16 primary_remote_lid; - union ib_gid primary_local_gid; - union ib_gid primary_remote_gid; - /* flow label:20, rsvd:6, packet rate:6 */ - __be32 primary_offset88; - u8 primary_traffic_class; - u8 primary_hop_limit; - /* SL:4, subnet local:1, rsvd:3 */ - u8 primary_offset94; - /* local ACK timeout:5, rsvd:3 */ - u8 primary_offset95; - - __be16 alt_local_lid; - __be16 alt_remote_lid; - union ib_gid alt_local_gid; - union ib_gid alt_remote_gid; - /* flow label:20, rsvd:6, packet rate:6 */ - __be32 alt_offset132; - u8 alt_traffic_class; - u8 alt_hop_limit; - /* SL:4, subnet local:1, rsvd:3 */ - u8 alt_offset138; - /* local ACK timeout:5, rsvd:3 */ - u8 alt_offset139; - - u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; - -} __packed; - -static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg) -{ - return cpu_to_be32(be32_to_cpu(req_msg->offset32) >> 8); -} - -static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, __be32 qpn) -{ - req_msg->offset32 = cpu_to_be32((be32_to_cpu(qpn) << 8) | - (be32_to_cpu(req_msg->offset32) & - 0x000000FF)); -} - -static inline u8 cm_req_get_resp_res(struct cm_req_msg *req_msg) -{ - return (u8) be32_to_cpu(req_msg->offset32); -} - -static inline void cm_req_set_resp_res(struct cm_req_msg *req_msg, u8 resp_res) -{ - req_msg->offset32 = cpu_to_be32(resp_res | - (be32_to_cpu(req_msg->offset32) & - 0xFFFFFF00)); -} - -static inline u8 cm_req_get_init_depth(struct cm_req_msg *req_msg) -{ - return (u8) be32_to_cpu(req_msg->offset36); -} - -static inline void cm_req_set_init_depth(struct cm_req_msg *req_msg, - u8 init_depth) -{ - req_msg->offset36 = cpu_to_be32(init_depth | - (be32_to_cpu(req_msg->offset36) & - 0xFFFFFF00)); -} - -static inline u8 cm_req_get_remote_resp_timeout(struct cm_req_msg *req_msg) -{ - return (u8) ((be32_to_cpu(req_msg->offset40) & 0xF8) >> 3); -} - -static inline void cm_req_set_remote_resp_timeout(struct cm_req_msg *req_msg, - u8 resp_timeout) -{ - req_msg->offset40 = cpu_to_be32((resp_timeout << 3) | - (be32_to_cpu(req_msg->offset40) & - 0xFFFFFF07)); -} - static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg) { - u8 transport_type = (u8) (be32_to_cpu(req_msg->offset40) & 0x06) >> 1; + u8 transport_type = IBA_GET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg); switch(transport_type) { case 0: return IB_QPT_RC; case 1: return IB_QPT_UC; case 3: - switch (req_msg->offset51 & 0x7) { + switch (IBA_GET(CM_REQ_EXTENDED_TRANSPORT_TYPE, req_msg)) { case 1: return IB_QPT_XRC_TGT; default: return 0; } @@ -170,240 +39,17 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg, { switch(qp_type) { case IB_QPT_UC: - req_msg->offset40 = cpu_to_be32((be32_to_cpu( - req_msg->offset40) & - 0xFFFFFFF9) | 0x2); + IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 1); break; case IB_QPT_XRC_INI: - req_msg->offset40 = cpu_to_be32((be32_to_cpu( - req_msg->offset40) & - 0xFFFFFFF9) | 0x6); - req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1; + IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 3); + IBA_SET(CM_REQ_EXTENDED_TRANSPORT_TYPE, req_msg, 1); break; default: - req_msg->offset40 = cpu_to_be32(be32_to_cpu( - req_msg->offset40) & - 0xFFFFFFF9); + IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 0); } } -static inline u8 cm_req_get_flow_ctrl(struct cm_req_msg *req_msg) -{ - return be32_to_cpu(req_msg->offset40) & 0x1; -} - -static inline void cm_req_set_flow_ctrl(struct cm_req_msg *req_msg, - u8 flow_ctrl) -{ - req_msg->offset40 = cpu_to_be32((flow_ctrl & 0x1) | - (be32_to_cpu(req_msg->offset40) & - 0xFFFFFFFE)); -} - -static inline __be32 cm_req_get_starting_psn(struct cm_req_msg *req_msg) -{ - return cpu_to_be32(be32_to_cpu(req_msg->offset44) >> 8); -} - -static inline void cm_req_set_starting_psn(struct cm_req_msg *req_msg, - __be32 starting_psn) -{ - req_msg->offset44 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) | - (be32_to_cpu(req_msg->offset44) & 0x000000FF)); -} - -static inline u8 cm_req_get_local_resp_timeout(struct cm_req_msg *req_msg) -{ - return (u8) ((be32_to_cpu(req_msg->offset44) & 0xF8) >> 3); -} - -static inline void cm_req_set_local_resp_timeout(struct cm_req_msg *req_msg, - u8 resp_timeout) -{ - req_msg->offset44 = cpu_to_be32((resp_timeout << 3) | - (be32_to_cpu(req_msg->offset44) & 0xFFFFFF07)); -} - -static inline u8 cm_req_get_retry_count(struct cm_req_msg *req_msg) -{ - return (u8) (be32_to_cpu(req_msg->offset44) & 0x7); -} - -static inline void cm_req_set_retry_count(struct cm_req_msg *req_msg, - u8 retry_count) -{ - req_msg->offset44 = cpu_to_be32((retry_count & 0x7) | - (be32_to_cpu(req_msg->offset44) & 0xFFFFFFF8)); -} - -static inline u8 cm_req_get_path_mtu(struct cm_req_msg *req_msg) -{ - return req_msg->offset50 >> 4; -} - -static inline void cm_req_set_path_mtu(struct cm_req_msg *req_msg, u8 path_mtu) -{ - req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF) | (path_mtu << 4)); -} - -static inline u8 cm_req_get_rnr_retry_count(struct cm_req_msg *req_msg) -{ - return req_msg->offset50 & 0x7; -} - -static inline void cm_req_set_rnr_retry_count(struct cm_req_msg *req_msg, - u8 rnr_retry_count) -{ - req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF8) | - (rnr_retry_count & 0x7)); -} - -static inline u8 cm_req_get_max_cm_retries(struct cm_req_msg *req_msg) -{ - return req_msg->offset51 >> 4; -} - -static inline void cm_req_set_max_cm_retries(struct cm_req_msg *req_msg, - u8 retries) -{ - req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF) | (retries << 4)); -} - -static inline u8 cm_req_get_srq(struct cm_req_msg *req_msg) -{ - return (req_msg->offset51 & 0x8) >> 3; -} - -static inline void cm_req_set_srq(struct cm_req_msg *req_msg, u8 srq) -{ - req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF7) | - ((srq & 0x1) << 3)); -} - -static inline __be32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg) -{ - return cpu_to_be32(be32_to_cpu(req_msg->primary_offset88) >> 12); -} - -static inline void cm_req_set_primary_flow_label(struct cm_req_msg *req_msg, - __be32 flow_label) -{ - req_msg->primary_offset88 = cpu_to_be32( - (be32_to_cpu(req_msg->primary_offset88) & - 0x00000FFF) | - (be32_to_cpu(flow_label) << 12)); -} - -static inline u8 cm_req_get_primary_packet_rate(struct cm_req_msg *req_msg) -{ - return (u8) (be32_to_cpu(req_msg->primary_offset88) & 0x3F); -} - -static inline void cm_req_set_primary_packet_rate(struct cm_req_msg *req_msg, - u8 rate) -{ - req_msg->primary_offset88 = cpu_to_be32( - (be32_to_cpu(req_msg->primary_offset88) & - 0xFFFFFFC0) | (rate & 0x3F)); -} - -static inline u8 cm_req_get_primary_sl(struct cm_req_msg *req_msg) -{ - return (u8) (req_msg->primary_offset94 >> 4); -} - -static inline void cm_req_set_primary_sl(struct cm_req_msg *req_msg, u8 sl) -{ - req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0x0F) | - (sl << 4)); -} - -static inline u8 cm_req_get_primary_subnet_local(struct cm_req_msg *req_msg) -{ - return (u8) ((req_msg->primary_offset94 & 0x08) >> 3); -} - -static inline void cm_req_set_primary_subnet_local(struct cm_req_msg *req_msg, - u8 subnet_local) -{ - req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0xF7) | - ((subnet_local & 0x1) << 3)); -} - -static inline u8 cm_req_get_primary_local_ack_timeout(struct cm_req_msg *req_msg) -{ - return (u8) (req_msg->primary_offset95 >> 3); -} - -static inline void cm_req_set_primary_local_ack_timeout(struct cm_req_msg *req_msg, - u8 local_ack_timeout) -{ - req_msg->primary_offset95 = (u8) ((req_msg->primary_offset95 & 0x07) | - (local_ack_timeout << 3)); -} - -static inline __be32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg) -{ - return cpu_to_be32(be32_to_cpu(req_msg->alt_offset132) >> 12); -} - -static inline void cm_req_set_alt_flow_label(struct cm_req_msg *req_msg, - __be32 flow_label) -{ - req_msg->alt_offset132 = cpu_to_be32( - (be32_to_cpu(req_msg->alt_offset132) & - 0x00000FFF) | - (be32_to_cpu(flow_label) << 12)); -} - -static inline u8 cm_req_get_alt_packet_rate(struct cm_req_msg *req_msg) -{ - return (u8) (be32_to_cpu(req_msg->alt_offset132) & 0x3F); -} - -static inline void cm_req_set_alt_packet_rate(struct cm_req_msg *req_msg, - u8 rate) -{ - req_msg->alt_offset132 = cpu_to_be32( - (be32_to_cpu(req_msg->alt_offset132) & - 0xFFFFFFC0) | (rate & 0x3F)); -} - -static inline u8 cm_req_get_alt_sl(struct cm_req_msg *req_msg) -{ - return (u8) (req_msg->alt_offset138 >> 4); -} - -static inline void cm_req_set_alt_sl(struct cm_req_msg *req_msg, u8 sl) -{ - req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0x0F) | - (sl << 4)); -} - -static inline u8 cm_req_get_alt_subnet_local(struct cm_req_msg *req_msg) -{ - return (u8) ((req_msg->alt_offset138 & 0x08) >> 3); -} - -static inline void cm_req_set_alt_subnet_local(struct cm_req_msg *req_msg, - u8 subnet_local) -{ - req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0xF7) | - ((subnet_local & 0x1) << 3)); -} - -static inline u8 cm_req_get_alt_local_ack_timeout(struct cm_req_msg *req_msg) -{ - return (u8) (req_msg->alt_offset139 >> 3); -} - -static inline void cm_req_set_alt_local_ack_timeout(struct cm_req_msg *req_msg, - u8 local_ack_timeout) -{ - req_msg->alt_offset139 = (u8) ((req_msg->alt_offset139 & 0x07) | - (local_ack_timeout << 3)); -} - /* Message REJected or MRAed */ enum cm_msg_response { CM_MSG_RESPONSE_REQ = 0x0, @@ -411,419 +57,12 @@ enum cm_msg_response { CM_MSG_RESPONSE_OTHER = 0x2 }; - struct cm_mra_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - /* message MRAed:2, rsvd:6 */ - u8 offset8; - /* service timeout:5, rsvd:3 */ - u8 offset9; - - u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE]; - -} __packed; - -static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg) -{ - return (u8) (mra_msg->offset8 >> 6); -} - -static inline void cm_mra_set_msg_mraed(struct cm_mra_msg *mra_msg, u8 msg) -{ - mra_msg->offset8 = (u8) ((mra_msg->offset8 & 0x3F) | (msg << 6)); -} - -static inline u8 cm_mra_get_service_timeout(struct cm_mra_msg *mra_msg) -{ - return (u8) (mra_msg->offset9 >> 3); -} - -static inline void cm_mra_set_service_timeout(struct cm_mra_msg *mra_msg, - u8 service_timeout) -{ - mra_msg->offset9 = (u8) ((mra_msg->offset9 & 0x07) | - (service_timeout << 3)); -} - -struct cm_rej_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - /* message REJected:2, rsvd:6 */ - u8 offset8; - /* reject info length:7, rsvd:1. */ - u8 offset9; - __be16 reason; - u8 ari[IB_CM_REJ_ARI_LENGTH]; - - u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE]; - -} __packed; - -static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg) -{ - return (u8) (rej_msg->offset8 >> 6); -} - -static inline void cm_rej_set_msg_rejected(struct cm_rej_msg *rej_msg, u8 msg) -{ - rej_msg->offset8 = (u8) ((rej_msg->offset8 & 0x3F) | (msg << 6)); -} - -static inline u8 cm_rej_get_reject_info_len(struct cm_rej_msg *rej_msg) -{ - return (u8) (rej_msg->offset9 >> 1); -} - -static inline void cm_rej_set_reject_info_len(struct cm_rej_msg *rej_msg, - u8 len) -{ - rej_msg->offset9 = (u8) ((rej_msg->offset9 & 0x1) | (len << 1)); -} - -struct cm_rep_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - __be32 local_qkey; - /* local QPN:24, rsvd:8 */ - __be32 offset12; - /* local EECN:24, rsvd:8 */ - __be32 offset16; - /* starting PSN:24 rsvd:8 */ - __be32 offset20; - u8 resp_resources; - u8 initiator_depth; - /* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */ - u8 offset26; - /* RNR retry count:3, SRQ:1, rsvd:5 */ - u8 offset27; - __be64 local_ca_guid; - - u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE]; - -} __packed; - -static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg) -{ - return cpu_to_be32(be32_to_cpu(rep_msg->offset12) >> 8); -} - -static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn) -{ - rep_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) | - (be32_to_cpu(rep_msg->offset12) & 0x000000FF)); -} - -static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg) -{ - return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8); -} - -static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn) -{ - rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) | - (be32_to_cpu(rep_msg->offset16) & 0x000000FF)); -} - static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type) { return (qp_type == IB_QPT_XRC_INI) ? - cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg); -} - -static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg) -{ - return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8); -} - -static inline void cm_rep_set_starting_psn(struct cm_rep_msg *rep_msg, - __be32 starting_psn) -{ - rep_msg->offset20 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) | - (be32_to_cpu(rep_msg->offset20) & 0x000000FF)); -} - -static inline u8 cm_rep_get_target_ack_delay(struct cm_rep_msg *rep_msg) -{ - return (u8) (rep_msg->offset26 >> 3); -} - -static inline void cm_rep_set_target_ack_delay(struct cm_rep_msg *rep_msg, - u8 target_ack_delay) -{ - rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0x07) | - (target_ack_delay << 3)); -} - -static inline u8 cm_rep_get_failover(struct cm_rep_msg *rep_msg) -{ - return (u8) ((rep_msg->offset26 & 0x06) >> 1); -} - -static inline void cm_rep_set_failover(struct cm_rep_msg *rep_msg, u8 failover) -{ - rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xF9) | - ((failover & 0x3) << 1)); -} - -static inline u8 cm_rep_get_flow_ctrl(struct cm_rep_msg *rep_msg) -{ - return (u8) (rep_msg->offset26 & 0x01); -} - -static inline void cm_rep_set_flow_ctrl(struct cm_rep_msg *rep_msg, - u8 flow_ctrl) -{ - rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xFE) | - (flow_ctrl & 0x1)); -} - -static inline u8 cm_rep_get_rnr_retry_count(struct cm_rep_msg *rep_msg) -{ - return (u8) (rep_msg->offset27 >> 5); -} - -static inline void cm_rep_set_rnr_retry_count(struct cm_rep_msg *rep_msg, - u8 rnr_retry_count) -{ - rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0x1F) | - (rnr_retry_count << 5)); -} - -static inline u8 cm_rep_get_srq(struct cm_rep_msg *rep_msg) -{ - return (u8) ((rep_msg->offset27 >> 4) & 0x1); -} - -static inline void cm_rep_set_srq(struct cm_rep_msg *rep_msg, u8 srq) -{ - rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0xEF) | - ((srq & 0x1) << 4)); -} - -struct cm_rtu_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - - u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE]; - -} __packed; - -struct cm_dreq_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - /* remote QPN/EECN:24, rsvd:8 */ - __be32 offset8; - - u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE]; - -} __packed; - -static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg) -{ - return cpu_to_be32(be32_to_cpu(dreq_msg->offset8) >> 8); -} - -static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, __be32 qpn) -{ - dreq_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) | - (be32_to_cpu(dreq_msg->offset8) & 0x000000FF)); -} - -struct cm_drep_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - - u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE]; - -} __packed; - -struct cm_lap_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - - __be32 rsvd8; - /* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */ - __be32 offset12; - __be32 rsvd16; - - __be16 alt_local_lid; - __be16 alt_remote_lid; - union ib_gid alt_local_gid; - union ib_gid alt_remote_gid; - /* flow label:20, rsvd:4, traffic class:8 */ - __be32 offset56; - u8 alt_hop_limit; - /* rsvd:2, packet rate:6 */ - u8 offset61; - /* SL:4, subnet local:1, rsvd:3 */ - u8 offset62; - /* local ACK timeout:5, rsvd:3 */ - u8 offset63; - - u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE]; -} __packed; - -static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg) -{ - return cpu_to_be32(be32_to_cpu(lap_msg->offset12) >> 8); -} - -static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, __be32 qpn) -{ - lap_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) | - (be32_to_cpu(lap_msg->offset12) & - 0x000000FF)); -} - -static inline u8 cm_lap_get_remote_resp_timeout(struct cm_lap_msg *lap_msg) -{ - return (u8) ((be32_to_cpu(lap_msg->offset12) & 0xF8) >> 3); -} - -static inline void cm_lap_set_remote_resp_timeout(struct cm_lap_msg *lap_msg, - u8 resp_timeout) -{ - lap_msg->offset12 = cpu_to_be32((resp_timeout << 3) | - (be32_to_cpu(lap_msg->offset12) & - 0xFFFFFF07)); -} - -static inline __be32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg) -{ - return cpu_to_be32(be32_to_cpu(lap_msg->offset56) >> 12); -} - -static inline void cm_lap_set_flow_label(struct cm_lap_msg *lap_msg, - __be32 flow_label) -{ - lap_msg->offset56 = cpu_to_be32( - (be32_to_cpu(lap_msg->offset56) & 0x00000FFF) | - (be32_to_cpu(flow_label) << 12)); -} - -static inline u8 cm_lap_get_traffic_class(struct cm_lap_msg *lap_msg) -{ - return (u8) be32_to_cpu(lap_msg->offset56); -} - -static inline void cm_lap_set_traffic_class(struct cm_lap_msg *lap_msg, - u8 traffic_class) -{ - lap_msg->offset56 = cpu_to_be32(traffic_class | - (be32_to_cpu(lap_msg->offset56) & - 0xFFFFFF00)); -} - -static inline u8 cm_lap_get_packet_rate(struct cm_lap_msg *lap_msg) -{ - return lap_msg->offset61 & 0x3F; -} - -static inline void cm_lap_set_packet_rate(struct cm_lap_msg *lap_msg, - u8 packet_rate) -{ - lap_msg->offset61 = (packet_rate & 0x3F) | (lap_msg->offset61 & 0xC0); -} - -static inline u8 cm_lap_get_sl(struct cm_lap_msg *lap_msg) -{ - return lap_msg->offset62 >> 4; -} - -static inline void cm_lap_set_sl(struct cm_lap_msg *lap_msg, u8 sl) -{ - lap_msg->offset62 = (sl << 4) | (lap_msg->offset62 & 0x0F); -} - -static inline u8 cm_lap_get_subnet_local(struct cm_lap_msg *lap_msg) -{ - return (lap_msg->offset62 >> 3) & 0x1; -} - -static inline void cm_lap_set_subnet_local(struct cm_lap_msg *lap_msg, - u8 subnet_local) -{ - lap_msg->offset62 = ((subnet_local & 0x1) << 3) | - (lap_msg->offset61 & 0xF7); -} -static inline u8 cm_lap_get_local_ack_timeout(struct cm_lap_msg *lap_msg) -{ - return lap_msg->offset63 >> 3; -} - -static inline void cm_lap_set_local_ack_timeout(struct cm_lap_msg *lap_msg, - u8 local_ack_timeout) -{ - lap_msg->offset63 = (local_ack_timeout << 3) | - (lap_msg->offset63 & 0x07); -} - -struct cm_apr_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - - u8 info_length; - u8 ap_status; - __be16 rsvd; - u8 info[IB_CM_APR_INFO_LENGTH]; - - u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE]; -} __packed; - -struct cm_sidr_req_msg { - struct ib_mad_hdr hdr; - - __be32 request_id; - __be16 pkey; - __be16 rsvd; - __be64 service_id; - - u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; -} __packed; - -struct cm_sidr_rep_msg { - struct ib_mad_hdr hdr; - - __be32 request_id; - u8 status; - u8 info_length; - __be16 rsvd; - /* QPN:24, rsvd:8 */ - __be32 offset8; - __be64 service_id; - __be32 qkey; - u8 info[IB_CM_SIDR_REP_INFO_LENGTH]; - - u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE]; -} __packed; - -static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg) -{ - return cpu_to_be32(be32_to_cpu(sidr_rep_msg->offset8) >> 8); -} - -static inline void cm_sidr_rep_set_qpn(struct cm_sidr_rep_msg *sidr_rep_msg, - __be32 qpn) -{ - sidr_rep_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) | - (be32_to_cpu(sidr_rep_msg->offset8) & - 0x000000FF)); + cpu_to_be32(IBA_GET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, + rep_msg)) : + cpu_to_be32(IBA_GET(CM_REP_LOCAL_QPN, rep_msg)); } #endif /* CM_MSGS_H */ diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 19f1730a4f24..72f032160c4b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1,36 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. - * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/completion.h> @@ -63,6 +36,7 @@ #include "core_priv.h" #include "cma_priv.h" +#include "cma_trace.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("Generic RDMA CM Agent"); @@ -904,6 +878,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, id_priv->id.route.addr.dev_addr.net = get_net(net); id_priv->seq_num &= 0x00ffffff; + trace_cm_id_create(id_priv); return &id_priv->id; } EXPORT_SYMBOL(__rdma_create_id); @@ -955,27 +930,34 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (id->device != pd->device) - return -EINVAL; + if (id->device != pd->device) { + ret = -EINVAL; + goto out_err; + } qp_init_attr->port_num = id->port_num; qp = ib_create_qp(pd, qp_init_attr); - if (IS_ERR(qp)) - return PTR_ERR(qp); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto out_err; + } if (id->qp_type == IB_QPT_UD) ret = cma_init_ud_qp(id_priv, qp); else ret = cma_init_conn_qp(id_priv, qp); if (ret) - goto err; + goto out_destroy; id->qp = qp; id_priv->qp_num = qp->qp_num; id_priv->srq = (qp->srq != NULL); + trace_cm_qp_create(id_priv, pd, qp_init_attr, 0); return 0; -err: +out_destroy: ib_destroy_qp(qp); +out_err: + trace_cm_qp_create(id_priv, pd, qp_init_attr, ret); return ret; } EXPORT_SYMBOL(rdma_create_qp); @@ -985,6 +967,7 @@ void rdma_destroy_qp(struct rdma_cm_id *id) struct rdma_id_private *id_priv; id_priv = container_of(id, struct rdma_id_private, id); + trace_cm_qp_destroy(id_priv); mutex_lock(&id_priv->qp_mutex); ib_destroy_qp(id_priv->id.qp); id_priv->id.qp = NULL; @@ -1838,6 +1821,7 @@ void rdma_destroy_id(struct rdma_cm_id *id) enum rdma_cm_state state; id_priv = container_of(id, struct rdma_id_private, id); + trace_cm_id_destroy(id_priv); state = cma_exch(id_priv, RDMA_CM_DESTROYING); cma_cancel_operation(id_priv, state); @@ -1890,6 +1874,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv) if (ret) goto reject; + trace_cm_send_rtu(id_priv); ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); if (ret) goto reject; @@ -1898,6 +1883,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv) reject: pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret); cma_modify_qp_err(id_priv); + trace_cm_send_rej(id_priv); ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); return ret; @@ -1917,6 +1903,17 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event, event->param.conn.qp_num = rep_data->remote_qpn; } +static int cma_cm_event_handler(struct rdma_id_private *id_priv, + struct rdma_cm_event *event) +{ + int ret; + + trace_cm_event_handler(id_priv, event); + ret = id_priv->id.event_handler(&id_priv->id, event); + trace_cm_event_done(id_priv, event, ret); + return ret; +} + static int cma_ib_handler(struct ib_cm_id *cm_id, const struct ib_cm_event *ib_event) { @@ -1939,8 +1936,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, break; case IB_CM_REP_RECEIVED: if (cma_comp(id_priv, RDMA_CM_CONNECT) && - (id_priv->id.qp_type != IB_QPT_UD)) + (id_priv->id.qp_type != IB_QPT_UD)) { + trace_cm_send_mra(id_priv); ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); + } if (id_priv->id.qp) { event.status = cma_rep_recv(id_priv); event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : @@ -1985,7 +1984,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, goto out; } - ret = id_priv->id.event_handler(&id_priv->id, &event); + ret = cma_cm_event_handler(id_priv, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; @@ -2146,6 +2145,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, if (IS_ERR(listen_id)) return PTR_ERR(listen_id); + trace_cm_req_handler(listen_id, ib_event->event); if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) { ret = -EINVAL; goto net_dev_put; @@ -2188,7 +2188,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, * until we're done accessing it. */ atomic_inc(&conn_id->refcount); - ret = conn_id->id.event_handler(&conn_id->id, &event); + ret = cma_cm_event_handler(conn_id, &event); if (ret) goto err3; /* @@ -2197,8 +2197,10 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, */ mutex_lock(&lock); if (cma_comp(conn_id, RDMA_CM_CONNECT) && - (conn_id->id.qp_type != IB_QPT_UD)) + (conn_id->id.qp_type != IB_QPT_UD)) { + trace_cm_send_mra(cm_id->context); ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); + } mutex_unlock(&lock); mutex_unlock(&conn_id->handler_mutex); mutex_unlock(&listen_id->handler_mutex); @@ -2313,7 +2315,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) event.status = iw_event->status; event.param.conn.private_data = iw_event->private_data; event.param.conn.private_data_len = iw_event->private_data_len; - ret = id_priv->id.event_handler(&id_priv->id, &event); + ret = cma_cm_event_handler(id_priv, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.iw = NULL; @@ -2390,15 +2392,16 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, * until we're done accessing it. */ atomic_inc(&conn_id->refcount); - ret = conn_id->id.event_handler(&conn_id->id, &event); + ret = cma_cm_event_handler(conn_id, &event); if (ret) { /* User wants to destroy the CM ID */ conn_id->cm_id.iw = NULL; cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); + mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); rdma_destroy_id(&conn_id->id); - goto out; + return ret; } mutex_unlock(&conn_id->handler_mutex); @@ -2461,6 +2464,7 @@ static int cma_listen_handler(struct rdma_cm_id *id, id->context = id_priv->id.context; id->event_handler = id_priv->id.event_handler; + trace_cm_event_handler(id_priv, event); return id_priv->id.event_handler(id, event); } @@ -2530,7 +2534,9 @@ EXPORT_SYMBOL(rdma_set_service_type); * This function should be called before rdma_connect() on active side, * and on passive side before rdma_accept(). It is applicable to primary * path only. The timeout will affect the local side of the QP, it is not - * negotiated with remote side and zero disables the timer. + * negotiated with remote side and zero disables the timer. In case it is + * set before rdma_resolve_route, the value will also be used to determine + * PacketLifeTime for RoCE. * * Return: 0 for success */ @@ -2635,7 +2641,7 @@ static void cma_work_handler(struct work_struct *_work) if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) goto out; - if (id_priv->id.event_handler(&id_priv->id, &work->event)) { + if (cma_cm_event_handler(id_priv, &work->event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); destroy = 1; } @@ -2658,7 +2664,7 @@ static void cma_ndev_work_handler(struct work_struct *_work) id_priv->state == RDMA_CM_DEVICE_REMOVAL) goto out; - if (id_priv->id.event_handler(&id_priv->id, &work->event)) { + if (cma_cm_event_handler(id_priv, &work->event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); destroy = 1; } @@ -2827,22 +2833,65 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv) return 0; } -static int iboe_tos_to_sl(struct net_device *ndev, int tos) +static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio) { - int prio; struct net_device *dev; - prio = rt_tos2priority(tos); - dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev; + dev = vlan_dev_real_dev(vlan_ndev); if (dev->num_tc) return netdev_get_prio_tc_map(dev, prio); -#if IS_ENABLED(CONFIG_VLAN_8021Q) + return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) & + VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; +} + +struct iboe_prio_tc_map { + int input_prio; + int output_tc; + bool found; +}; + +static int get_lower_vlan_dev_tc(struct net_device *dev, void *data) +{ + struct iboe_prio_tc_map *map = data; + + if (is_vlan_dev(dev)) + map->output_tc = get_vlan_ndev_tc(dev, map->input_prio); + else if (dev->num_tc) + map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio); + else + map->output_tc = 0; + /* We are interested only in first level VLAN device, so always + * return 1 to stop iterating over next level devices. + */ + map->found = true; + return 1; +} + +static int iboe_tos_to_sl(struct net_device *ndev, int tos) +{ + struct iboe_prio_tc_map prio_tc_map = {}; + int prio = rt_tos2priority(tos); + + /* If VLAN device, get it directly from the VLAN netdev */ if (is_vlan_dev(ndev)) - return (vlan_dev_get_egress_qos_mask(ndev, prio) & - VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; -#endif - return 0; + return get_vlan_ndev_tc(ndev, prio); + + prio_tc_map.input_prio = prio; + rcu_read_lock(); + netdev_walk_all_lower_dev_rcu(ndev, + get_lower_vlan_dev_tc, + &prio_tc_map); + rcu_read_unlock(); + /* If map is found from lower device, use it; Otherwise + * continue with the current netdevice to get priority to tc map. + */ + if (prio_tc_map.found) + return prio_tc_map.output_tc; + else if (ndev->num_tc) + return netdev_get_prio_tc_map(ndev, prio); + else + return 0; } static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) @@ -2896,7 +2945,16 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->path_rec->rate = iboe_get_rate(ndev); dev_put(ndev); route->path_rec->packet_life_time_selector = IB_SA_EQ; - route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; + /* In case ACK timeout is set, use this value to calculate + * PacketLifeTime. As per IBTA 12.7.34, + * local ACK timeout = (2 * PacketLifeTime + Local CA’s ACK delay). + * Assuming a negligible local ACK delay, we can use + * PacketLifeTime = local ACK timeout/2 + * as a reasonable approximation for RoCE networks. + */ + route->path_rec->packet_life_time = id_priv->timeout_set ? + id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME; + if (!route->path_rec->mtu) { ret = -EINVAL; goto err2; @@ -3046,7 +3104,7 @@ static void addr_handler(int status, struct sockaddr *src_addr, if (status) pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n", status); - } else { + } else if (status) { pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status); } @@ -3061,7 +3119,7 @@ static void addr_handler(int status, struct sockaddr *src_addr, } else event.event = RDMA_CM_EVENT_ADDR_RESOLVED; - if (id_priv->id.event_handler(&id_priv->id, &event)) { + if (cma_cm_event_handler(id_priv, &event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); @@ -3090,6 +3148,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); + atomic_inc(&id_priv->refcount); cma_init_resolve_addr_work(work, id_priv); queue_work(cma_wq, &work->work); return 0; @@ -3116,6 +3175,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); + atomic_inc(&id_priv->refcount); cma_init_resolve_addr_work(work, id_priv); queue_work(cma_wq, &work->work); return 0; @@ -3708,7 +3768,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, goto out; } - ret = id_priv->id.event_handler(&id_priv->id, &event); + ret = cma_cm_event_handler(id_priv, &event); rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { @@ -3772,6 +3832,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); req.max_cm_retries = CMA_MAX_CM_RETRIES; + trace_cm_send_sidr_req(id_priv); ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); if (ret) { ib_destroy_cm_id(id_priv->cm_id.ib); @@ -3845,6 +3906,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, req.max_cm_retries = CMA_MAX_CM_RETRIES; req.srq = id_priv->srq ? 1 : 0; + trace_cm_send_req(id_priv); ret = ib_send_cm_req(id_priv->cm_id.ib, &req); out: if (ret && !IS_ERR(id)) { @@ -3958,6 +4020,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); rep.srq = id_priv->srq ? 1 : 0; + trace_cm_send_rep(id_priv); ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); out: return ret; @@ -4007,6 +4070,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, rep.private_data = private_data; rep.private_data_len = private_data_len; + trace_cm_send_sidr_rep(id_priv); return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); } @@ -4092,13 +4156,15 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, return -EINVAL; if (rdma_cap_ib_cm(id->device, id->port_num)) { - if (id->qp_type == IB_QPT_UD) + if (id->qp_type == IB_QPT_UD) { ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, private_data, private_data_len); - else + } else { + trace_cm_send_rej(id_priv); ret = ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, private_data, private_data_len); + } } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_reject(id_priv->cm_id.iw, private_data, private_data_len); @@ -4123,8 +4189,13 @@ int rdma_disconnect(struct rdma_cm_id *id) if (ret) goto out; /* Initiate or respond to a disconnect. */ - if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) - ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); + trace_cm_disconnect(id_priv); + if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) { + if (!ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0)) + trace_cm_sent_drep(id_priv); + } else { + trace_cm_sent_dreq(id_priv); + } } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); } else @@ -4190,7 +4261,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) } else event.event = RDMA_CM_EVENT_MULTICAST_ERROR; - ret = id_priv->id.event_handler(&id_priv->id, &event); + ret = cma_cm_event_handler(id_priv, &event); rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { @@ -4595,6 +4666,7 @@ static void cma_add_one(struct ib_device *device) cma_listen_on_dev(id_priv, cma_dev); mutex_unlock(&lock); + trace_cm_add_one(device); return; free_gid_type: @@ -4625,7 +4697,7 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv) goto out; event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; - ret = id_priv->id.event_handler(&id_priv->id, &event); + ret = cma_cm_event_handler(id_priv, &event); out: mutex_unlock(&id_priv->handler_mutex); return ret; @@ -4663,6 +4735,8 @@ static void cma_remove_one(struct ib_device *device, void *client_data) { struct cma_device *cma_dev = client_data; + trace_cm_remove_one(device); + if (!cma_dev) return; @@ -4724,13 +4798,18 @@ static int __init cma_init(void) if (ret) goto err; - cma_configfs_init(); + ret = cma_configfs_init(); + if (ret) + goto err_ib; return 0; +err_ib: + ib_unregister_client(&cma_client); err: unregister_netdevice_notifier(&cma_nb); ib_sa_unregister_client(&sa_client); + unregister_pernet_subsys(&cma_pernet_operations); err_wq: destroy_workqueue(cma_wq); return ret; diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 3ec2c415bb70..8b0b5ae22e4c 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -342,12 +342,18 @@ static struct configfs_subsystem cma_subsys = { int __init cma_configfs_init(void) { + int ret; + config_group_init(&cma_subsys.su_group); mutex_init(&cma_subsys.su_mutex); - return configfs_register_subsystem(&cma_subsys); + ret = configfs_register_subsystem(&cma_subsys); + if (ret) + mutex_destroy(&cma_subsys.su_mutex); + return ret; } void __exit cma_configfs_exit(void) { configfs_unregister_subsystem(&cma_subsys); + mutex_destroy(&cma_subsys.su_mutex); } diff --git a/drivers/infiniband/core/cma_trace.c b/drivers/infiniband/core/cma_trace.c new file mode 100644 index 000000000000..b314a281e10e --- /dev/null +++ b/drivers/infiniband/core/cma_trace.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Trace points for the RDMA Connection Manager. + * + * Author: Chuck Lever <chuck.lever@oracle.com> + * + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + */ + +#define CREATE_TRACE_POINTS + +#include <rdma/rdma_cm.h> +#include <rdma/ib_cm.h> +#include "cma_priv.h" + +#include "cma_trace.h" diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h new file mode 100644 index 000000000000..81e36bf13159 --- /dev/null +++ b/drivers/infiniband/core/cma_trace.h @@ -0,0 +1,391 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Trace point definitions for the RDMA Connect Manager. + * + * Author: Chuck Lever <chuck.lever@oracle.com> + * + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rdma_cma + +#if !defined(_TRACE_RDMA_CMA_H) || defined(TRACE_HEADER_MULTI_READ) + +#define _TRACE_RDMA_CMA_H + +#include <linux/tracepoint.h> +#include <trace/events/rdma.h> + +/* + * enum ib_cm_event_type, from include/rdma/ib_cm.h + */ +#define IB_CM_EVENT_LIST \ + ib_cm_event(REQ_ERROR) \ + ib_cm_event(REQ_RECEIVED) \ + ib_cm_event(REP_ERROR) \ + ib_cm_event(REP_RECEIVED) \ + ib_cm_event(RTU_RECEIVED) \ + ib_cm_event(USER_ESTABLISHED) \ + ib_cm_event(DREQ_ERROR) \ + ib_cm_event(DREQ_RECEIVED) \ + ib_cm_event(DREP_RECEIVED) \ + ib_cm_event(TIMEWAIT_EXIT) \ + ib_cm_event(MRA_RECEIVED) \ + ib_cm_event(REJ_RECEIVED) \ + ib_cm_event(LAP_ERROR) \ + ib_cm_event(LAP_RECEIVED) \ + ib_cm_event(APR_RECEIVED) \ + ib_cm_event(SIDR_REQ_ERROR) \ + ib_cm_event(SIDR_REQ_RECEIVED) \ + ib_cm_event_end(SIDR_REP_RECEIVED) + +#undef ib_cm_event +#undef ib_cm_event_end + +#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x); +#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x); + +IB_CM_EVENT_LIST + +#undef ib_cm_event +#undef ib_cm_event_end + +#define ib_cm_event(x) { IB_CM_##x, #x }, +#define ib_cm_event_end(x) { IB_CM_##x, #x } + +#define rdma_show_ib_cm_event(x) \ + __print_symbolic(x, IB_CM_EVENT_LIST) + + +DECLARE_EVENT_CLASS(cma_fsm_class, + TP_PROTO( + const struct rdma_id_private *id_priv + ), + + TP_ARGS(id_priv), + + TP_STRUCT__entry( + __field(u32, cm_id) + __field(u32, tos) + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->cm_id = id_priv->res.id; + __entry->tos = id_priv->tos; + memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u", + __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos + ) +); + +#define DEFINE_CMA_FSM_EVENT(name) \ + DEFINE_EVENT(cma_fsm_class, cm_##name, \ + TP_PROTO( \ + const struct rdma_id_private *id_priv \ + ), \ + TP_ARGS(id_priv)) + +DEFINE_CMA_FSM_EVENT(send_rtu); +DEFINE_CMA_FSM_EVENT(send_rej); +DEFINE_CMA_FSM_EVENT(send_mra); +DEFINE_CMA_FSM_EVENT(send_sidr_req); +DEFINE_CMA_FSM_EVENT(send_sidr_rep); +DEFINE_CMA_FSM_EVENT(disconnect); +DEFINE_CMA_FSM_EVENT(sent_drep); +DEFINE_CMA_FSM_EVENT(sent_dreq); +DEFINE_CMA_FSM_EVENT(id_destroy); + +TRACE_EVENT(cm_id_create, + TP_PROTO( + const struct rdma_id_private *id_priv + ), + + TP_ARGS(id_priv), + + TP_STRUCT__entry( + __field(u32, cm_id) + ), + + TP_fast_assign( + __entry->cm_id = id_priv->res.id; + ), + + TP_printk("cm.id=%u", + __entry->cm_id + ) +); + +DECLARE_EVENT_CLASS(cma_qp_class, + TP_PROTO( + const struct rdma_id_private *id_priv + ), + + TP_ARGS(id_priv), + + TP_STRUCT__entry( + __field(u32, cm_id) + __field(u32, tos) + __field(u32, qp_num) + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->cm_id = id_priv->res.id; + __entry->tos = id_priv->tos; + __entry->qp_num = id_priv->qp_num; + memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u qp_num=%u", + __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos, + __entry->qp_num + ) +); + +#define DEFINE_CMA_QP_EVENT(name) \ + DEFINE_EVENT(cma_qp_class, cm_##name, \ + TP_PROTO( \ + const struct rdma_id_private *id_priv \ + ), \ + TP_ARGS(id_priv)) + +DEFINE_CMA_QP_EVENT(send_req); +DEFINE_CMA_QP_EVENT(send_rep); +DEFINE_CMA_QP_EVENT(qp_destroy); + +/* + * enum ib_wp_type, from include/rdma/ib_verbs.h + */ +#define IB_QP_TYPE_LIST \ + ib_qp_type(SMI) \ + ib_qp_type(GSI) \ + ib_qp_type(RC) \ + ib_qp_type(UC) \ + ib_qp_type(UD) \ + ib_qp_type(RAW_IPV6) \ + ib_qp_type(RAW_ETHERTYPE) \ + ib_qp_type(RAW_PACKET) \ + ib_qp_type(XRC_INI) \ + ib_qp_type_end(XRC_TGT) + +#undef ib_qp_type +#undef ib_qp_type_end + +#define ib_qp_type(x) TRACE_DEFINE_ENUM(IB_QPT_##x); +#define ib_qp_type_end(x) TRACE_DEFINE_ENUM(IB_QPT_##x); + +IB_QP_TYPE_LIST + +#undef ib_qp_type +#undef ib_qp_type_end + +#define ib_qp_type(x) { IB_QPT_##x, #x }, +#define ib_qp_type_end(x) { IB_QPT_##x, #x } + +#define rdma_show_qp_type(x) \ + __print_symbolic(x, IB_QP_TYPE_LIST) + + +TRACE_EVENT(cm_qp_create, + TP_PROTO( + const struct rdma_id_private *id_priv, + const struct ib_pd *pd, + const struct ib_qp_init_attr *qp_init_attr, + int rc + ), + + TP_ARGS(id_priv, pd, qp_init_attr, rc), + + TP_STRUCT__entry( + __field(u32, cm_id) + __field(u32, pd_id) + __field(u32, tos) + __field(u32, qp_num) + __field(u32, send_wr) + __field(u32, recv_wr) + __field(int, rc) + __field(unsigned long, qp_type) + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->cm_id = id_priv->res.id; + __entry->pd_id = pd->res.id; + __entry->tos = id_priv->tos; + __entry->send_wr = qp_init_attr->cap.max_send_wr; + __entry->recv_wr = qp_init_attr->cap.max_recv_wr; + __entry->rc = rc; + if (!rc) { + __entry->qp_num = id_priv->qp_num; + __entry->qp_type = id_priv->id.qp_type; + } else { + __entry->qp_num = 0; + __entry->qp_type = 0; + } + memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u pd.id=%u qp_type=%s" + " send_wr=%u recv_wr=%u qp_num=%u rc=%d", + __entry->cm_id, __entry->srcaddr, __entry->dstaddr, + __entry->tos, __entry->pd_id, + rdma_show_qp_type(__entry->qp_type), __entry->send_wr, + __entry->recv_wr, __entry->qp_num, __entry->rc + ) +); + +TRACE_EVENT(cm_req_handler, + TP_PROTO( + const struct rdma_id_private *id_priv, + int event + ), + + TP_ARGS(id_priv, event), + + TP_STRUCT__entry( + __field(u32, cm_id) + __field(u32, tos) + __field(unsigned long, event) + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->cm_id = id_priv->res.id; + __entry->tos = id_priv->tos; + __entry->event = event; + memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s (%lu)", + __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos, + rdma_show_ib_cm_event(__entry->event), __entry->event + ) +); + +TRACE_EVENT(cm_event_handler, + TP_PROTO( + const struct rdma_id_private *id_priv, + const struct rdma_cm_event *event + ), + + TP_ARGS(id_priv, event), + + TP_STRUCT__entry( + __field(u32, cm_id) + __field(u32, tos) + __field(unsigned long, event) + __field(int, status) + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->cm_id = id_priv->res.id; + __entry->tos = id_priv->tos; + __entry->event = event->event; + __entry->status = event->status; + memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s (%lu/%d)", + __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos, + rdma_show_cm_event(__entry->event), __entry->event, + __entry->status + ) +); + +TRACE_EVENT(cm_event_done, + TP_PROTO( + const struct rdma_id_private *id_priv, + const struct rdma_cm_event *event, + int result + ), + + TP_ARGS(id_priv, event, result), + + TP_STRUCT__entry( + __field(u32, cm_id) + __field(u32, tos) + __field(unsigned long, event) + __field(int, result) + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->cm_id = id_priv->res.id; + __entry->tos = id_priv->tos; + __entry->event = event->event; + __entry->result = result; + memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s consumer returns %d", + __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos, + rdma_show_cm_event(__entry->event), __entry->result + ) +); + +DECLARE_EVENT_CLASS(cma_client_class, + TP_PROTO( + const struct ib_device *device + ), + + TP_ARGS(device), + + TP_STRUCT__entry( + __string(name, device->name) + ), + + TP_fast_assign( + __assign_str(name, device->name); + ), + + TP_printk("device name=%s", + __get_str(name) + ) +); + +#define DEFINE_CMA_CLIENT_EVENT(name) \ + DEFINE_EVENT(cma_client_class, cm_##name, \ + TP_PROTO( \ + const struct ib_device *device \ + ), \ + TP_ARGS(device)) + +DEFINE_CMA_CLIENT_EVENT(add_one); +DEFINE_CMA_CLIENT_EVENT(remove_one); + +#endif /* _TRACE_RDMA_CMA_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE cma_trace + +#include <trace/define_trace.h> diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index beee7b7e0d9a..b1457b3464d3 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -36,6 +36,8 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/cgroup_rdma.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> #include <rdma/ib_verbs.h> #include <rdma/opa_addr.h> @@ -54,8 +56,26 @@ struct pkey_index_qp_list { struct list_head qp_list; }; +/** + * struct rdma_dev_net - rdma net namespace metadata for a net + * @nl_sock: Pointer to netlink socket + * @net: Pointer to owner net namespace + * @id: xarray id to identify the net namespace. + */ +struct rdma_dev_net { + struct sock *nl_sock; + possible_net_t net; + u32 id; +}; + extern const struct attribute_group ib_dev_attr_group; extern bool ib_devices_shared_netns; +extern unsigned int rdma_dev_net_id; + +static inline struct rdma_dev_net *rdma_net_to_dev_net(struct net *net) +{ + return net_generic(net, rdma_dev_net_id); +} int ib_device_register_sysfs(struct ib_device *device); void ib_device_unregister_sysfs(struct ib_device *device); @@ -129,6 +149,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port); int ib_cache_setup_one(struct ib_device *device); void ib_cache_cleanup_one(struct ib_device *device); void ib_cache_release_one(struct ib_device *device); +void ib_dispatch_event_clients(struct ib_event *event); #ifdef CONFIG_CGROUP_RDMA void ib_device_register_rdmacg(struct ib_device *device); @@ -179,7 +200,7 @@ void ib_mad_cleanup(void); int ib_sa_init(void); void ib_sa_cleanup(void); -int rdma_nl_init(void); +void rdma_nl_init(void); void rdma_nl_exit(void); int ib_nl_handle_resolve_resp(struct sk_buff *skb, @@ -300,7 +321,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, struct ib_pd *pd, struct ib_qp_init_attr *attr, struct ib_udata *udata, - struct ib_uobject *uobj) + struct ib_uqp_object *uobj) { enum ib_qp_type qp_type = attr->qp_type; struct ib_qp *qp; @@ -365,4 +386,18 @@ void ib_port_unregister_module_stat(struct kobject *kobj); int ib_device_set_netns_put(struct sk_buff *skb, struct ib_device *dev, u32 ns_fd); + +int rdma_nl_net_init(struct rdma_dev_net *rnet); +void rdma_nl_net_exit(struct rdma_dev_net *rnet); + +struct rdma_umap_priv { + struct vm_area_struct *vma; + struct list_head list; + struct rdma_user_mmap_entry *entry; +}; + +void rdma_umap_priv_init(struct rdma_umap_priv *priv, + struct vm_area_struct *vma, + struct rdma_user_mmap_entry *entry); + #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index b79890739a2c..2257d7f7810f 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -149,13 +149,18 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, struct auto_mode_param *param = &counter->mode.param; bool match = true; - if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) - return false; - - /* Ensure that counter belong to right PID */ - if (!rdma_is_kernel_res(&counter->res) && - !rdma_is_kernel_res(&qp->res) && - (task_pid_vnr(counter->res.task) != current->pid)) + /* + * Ensure that counter belongs to the right PID. This operation can + * race with user space which kills the process and leaves QP and + * counters orphans. + * + * It is not a big deal because exitted task will leave both QP and + * counter in the same bucket of zombie process. Just ensure that + * process is still alive before procedding. + * + */ + if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task) || + !task_pid_nr(qp->res.task)) return false; if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) @@ -231,9 +236,6 @@ static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, rt = &dev->res[RDMA_RESTRACK_COUNTER]; xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { - if (!rdma_is_visible_in_pid_ns(res)) - continue; - counter = container_of(res, struct rdma_counter, res); if ((counter->device != qp->device) || (counter->port != port)) goto next; @@ -284,6 +286,9 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) struct rdma_counter *counter; int ret; + if (!qp->res.valid) + return 0; + if (!rdma_is_port_valid(dev, port)) return -EINVAL; @@ -414,9 +419,6 @@ static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) if (IS_ERR(res)) return NULL; - if (!rdma_is_visible_in_pid_ns(res)) - goto err; - qp = container_of(res, struct ib_qp, res); if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) goto err; @@ -424,7 +426,7 @@ static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) return qp; err: - rdma_restrack_put(&qp->res); + rdma_restrack_put(res); return NULL; } @@ -447,11 +449,6 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, if (IS_ERR(res)) return NULL; - if (!rdma_is_visible_in_pid_ns(res)) { - rdma_restrack_put(res); - return NULL; - } - counter = container_of(res, struct rdma_counter, res); kref_get(&counter->kref); rdma_restrack_put(res); @@ -465,10 +462,15 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, u32 qp_num, u32 counter_id) { + struct rdma_port_counter *port_counter; struct rdma_counter *counter; struct ib_qp *qp; int ret; + port_counter = &dev->port_data[port].port_counter; + if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) + return -EINVAL; + qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; @@ -505,6 +507,7 @@ err: int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, u32 qp_num, u32 *counter_id) { + struct rdma_port_counter *port_counter; struct rdma_counter *counter; struct ib_qp *qp; int ret; @@ -512,9 +515,13 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, if (!rdma_is_port_valid(dev, port)) return -EINVAL; - if (!dev->port_data[port].port_counter.hstats) + port_counter = &dev->port_data[port].port_counter; + if (!port_counter->hstats) return -EOPNOTSUPP; + if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) + return -EINVAL; + qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; @@ -601,7 +608,7 @@ int rdma_counter_get_mode(struct ib_device *dev, u8 port, void rdma_counter_init(struct ib_device *dev) { struct rdma_port_counter *port_counter; - u32 port; + u32 port, i; if (!dev->port_data) return; @@ -622,13 +629,12 @@ void rdma_counter_init(struct ib_device *dev) return; fail: - rdma_for_each_port(dev, port) { + for (i = port; i >= rdma_start_port(dev); i--) { port_counter = &dev->port_data[port].port_counter; kfree(port_counter->hstats); port_counter->hstats = NULL; + mutex_destroy(&port_counter->lock); } - - return; } void rdma_counter_release(struct ib_device *dev) @@ -639,5 +645,6 @@ void rdma_counter_release(struct ib_device *dev) rdma_for_each_port(dev, port) { port_counter = &dev->port_data[port].port_counter; kfree(port_counter->hstats); + mutex_destroy(&port_counter->lock); } } diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 7c599878ccf7..4f25b2400694 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -7,6 +7,8 @@ #include <linux/slab.h> #include <rdma/ib_verbs.h> +#include <trace/events/rdma_core.h> + /* # of WCs to poll for with a single call to ib_poll_cq */ #define IB_POLL_BATCH 16 #define IB_POLL_BATCH_DIRECT 8 @@ -41,6 +43,7 @@ static void ib_cq_rdma_dim_work(struct work_struct *w) dim->state = DIM_START_MEASURE; + trace_cq_modify(cq, comps, usec); cq->device->ops.modify_cq(cq, comps, usec); } @@ -65,18 +68,29 @@ static void rdma_dim_init(struct ib_cq *cq) INIT_WORK(&dim->work, ib_cq_rdma_dim_work); } +static int __poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) +{ + int rc; + + rc = ib_poll_cq(cq, num_entries, wc); + trace_cq_poll(cq, num_entries, rc); + return rc; +} + static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs, int batch) { int i, n, completed = 0; + trace_cq_process(cq); + /* * budget might be (-1) if the caller does not * want to bound this call, thus we need unsigned * minimum here. */ - while ((n = ib_poll_cq(cq, min_t(u32, batch, - budget - completed), wcs)) > 0) { + while ((n = __poll_cq(cq, min_t(u32, batch, + budget - completed), wcs)) > 0) { for (i = 0; i < n; i++) { struct ib_wc *wc = &wcs[i]; @@ -131,8 +145,10 @@ static int ib_poll_handler(struct irq_poll *iop, int budget) completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH); if (completed < budget) { irq_poll_complete(&cq->iop); - if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) + if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) { + trace_cq_reschedule(cq); irq_poll_sched(&cq->iop); + } } if (dim) @@ -143,6 +159,7 @@ static int ib_poll_handler(struct irq_poll *iop, int budget) static void ib_cq_completion_softirq(struct ib_cq *cq, void *private) { + trace_cq_schedule(cq); irq_poll_sched(&cq->iop); } @@ -162,6 +179,7 @@ static void ib_cq_poll_work(struct work_struct *work) static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) { + trace_cq_schedule(cq); queue_work(cq->comp_wq, &cq->work); } @@ -239,6 +257,7 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, goto out_destroy_cq; } + trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx); return cq; out_destroy_cq: @@ -248,11 +267,40 @@ out_free_wc: kfree(cq->wc); out_free_cq: kfree(cq); + trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret); return ERR_PTR(ret); } EXPORT_SYMBOL(__ib_alloc_cq_user); /** + * __ib_alloc_cq_any - allocate a completion queue + * @dev: device to allocate the CQ for + * @private: driver private data, accessible from cq->cq_context + * @nr_cqe: number of CQEs to allocate + * @poll_ctx: context to poll the CQ from + * @caller: module owner name + * + * Attempt to spread ULP Completion Queues over each device's interrupt + * vectors. A simple best-effort mechanism is used. + */ +struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, + int nr_cqe, enum ib_poll_context poll_ctx, + const char *caller) +{ + static atomic_t counter; + int comp_vector = 0; + + if (dev->num_comp_vectors > 1) + comp_vector = + atomic_inc_return(&counter) % + min_t(int, dev->num_comp_vectors, num_online_cpus()); + + return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, + caller, NULL); +} +EXPORT_SYMBOL(__ib_alloc_cq_any); + +/** * ib_free_cq_user - free a completion queue * @cq: completion queue to free. * @udata: User data or NULL for kernel object @@ -276,6 +324,7 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) WARN_ON_ONCE(1); } + trace_cq_free(cq); rdma_restrack_del(&cq->res); cq->device->ops.destroy_cq(cq, udata); if (cq->dim) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index ea8661a00651..f6c255202d7f 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -39,7 +39,6 @@ #include <linux/init.h> #include <linux/netdevice.h> #include <net/net_namespace.h> -#include <net/netns/generic.h> #include <linux/security.h> #include <linux/notifier.h> #include <linux/hashtable.h> @@ -111,17 +110,7 @@ static void ib_client_put(struct ib_client *client) */ #define CLIENT_DATA_REGISTERED XA_MARK_1 -/** - * struct rdma_dev_net - rdma net namespace metadata for a net - * @net: Pointer to owner net namespace - * @id: xarray id to identify the net namespace. - */ -struct rdma_dev_net { - possible_net_t net; - u32 id; -}; - -static unsigned int rdma_dev_net_id; +unsigned int rdma_dev_net_id; /* * A list of net namespaces is maintained in an xarray. This is necessary @@ -139,17 +128,14 @@ module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444); MODULE_PARM_DESC(netns_mode, "Share device among net namespaces; default=1 (shared)"); /** - * rdma_dev_access_netns() - Return whether a rdma device can be accessed + * rdma_dev_access_netns() - Return whether an rdma device can be accessed * from a specified net namespace or not. - * @device: Pointer to rdma device which needs to be checked + * @dev: Pointer to rdma device which needs to be checked * @net: Pointer to net namesapce for which access to be checked * - * rdma_dev_access_netns() - Return whether a rdma device can be accessed - * from a specified net namespace or not. When - * rdma device is in shared mode, it ignores the - * net namespace. When rdma device is exclusive - * to a net namespace, rdma device net namespace is - * checked against the specified one. + * When the rdma device is in shared mode, it ignores the net namespace. + * When the rdma device is exclusive to a net namespace, rdma device net + * namespace is checked against the specified one. */ bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net) { @@ -514,6 +500,9 @@ static void ib_device_release(struct device *device) rcu_head); } + mutex_destroy(&dev->unregistration_lock); + mutex_destroy(&dev->compat_devs_mutex); + xa_destroy(&dev->compat_devs); xa_destroy(&dev->client_data); kfree_rcu(dev, rcu_head); @@ -598,7 +587,8 @@ struct ib_device *_ib_alloc_device(size_t size) rdma_init_coredev(&device->coredev, device, &init_net); INIT_LIST_HEAD(&device->event_handler_list); - spin_lock_init(&device->event_handler_lock); + spin_lock_init(&device->qp_open_list_lock); + init_rwsem(&device->event_handler_rwsem); mutex_init(&device->unregistration_lock); /* * client_data needs to be alloc because we don't want our mark to be @@ -1060,7 +1050,7 @@ int rdma_compatdev_set(u8 enable) static void rdma_dev_exit_net(struct net *net) { - struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id); + struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); struct ib_device *dev; unsigned long index; int ret; @@ -1094,25 +1084,32 @@ static void rdma_dev_exit_net(struct net *net) } up_read(&devices_rwsem); + rdma_nl_net_exit(rnet); xa_erase(&rdma_nets, rnet->id); } static __net_init int rdma_dev_init_net(struct net *net) { - struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id); + struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); unsigned long index; struct ib_device *dev; int ret; + write_pnet(&rnet->net, net); + + ret = rdma_nl_net_init(rnet); + if (ret) + return ret; + /* No need to create any compat devices in default init_net. */ if (net_eq(net, &init_net)) return 0; - write_pnet(&rnet->net, net); - ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL); - if (ret) + if (ret) { + rdma_nl_net_exit(rnet); return ret; + } down_read(&devices_rwsem); xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { @@ -1200,9 +1197,21 @@ static void setup_dma_device(struct ib_device *device) WARN_ON_ONCE(!parent); device->dma_device = parent; } - /* Setup default max segment size for all IB devices */ - dma_set_max_seg_size(device->dma_device, SZ_2G); + if (!device->dev.dma_parms) { + if (parent) { + /* + * The caller did not provide DMA parameters, so + * 'parent' probably represents a PCI device. The PCI + * core sets the maximum segment size to 64 + * KB. Increase this parameter to 2 GB. + */ + device->dev.dma_parms = parent->dma_parms; + dma_set_max_seg_size(device->dma_device, SZ_2G); + } else { + WARN_ON_ONCE(true); + } + } } /* @@ -1318,7 +1327,9 @@ out: /** * ib_register_device - Register an IB device with IB core - * @device:Device to register + * @device: Device to register + * @name: unique string device name. This may include a '%' which will + * cause a unique index to be added to the passed device name. * * Low-level drivers use ib_register_device() to register their * devices with the IB core. All registered clients will receive a @@ -1445,7 +1456,7 @@ out: /** * ib_unregister_device - Unregister an IB device - * @device: The device to unregister + * @ib_dev: The device to unregister * * Unregister an IB device. All clients will receive a remove callback. * @@ -1467,7 +1478,7 @@ EXPORT_SYMBOL(ib_unregister_device); /** * ib_unregister_device_and_put - Unregister a device while holding a 'get' - * device: The device to unregister + * @ib_dev: The device to unregister * * This is the same as ib_unregister_device(), except it includes an internal * ib_device_put() that should match a 'get' obtained by the caller. @@ -1537,7 +1548,7 @@ static void ib_unregister_work(struct work_struct *work) /** * ib_unregister_device_queued - Unregister a device using a work queue - * device: The device to unregister + * @ib_dev: The device to unregister * * This schedules an asynchronous unregistration using a WQ for the device. A * driver should use this to avoid holding locks while doing unregistration, @@ -1921,17 +1932,15 @@ EXPORT_SYMBOL(ib_set_client_data); * * ib_register_event_handler() registers an event handler that will be * called back when asynchronous IB events occur (as defined in - * chapter 11 of the InfiniBand Architecture Specification). This - * callback may occur in interrupt context. + * chapter 11 of the InfiniBand Architecture Specification). This + * callback occurs in workqueue context. */ void ib_register_event_handler(struct ib_event_handler *event_handler) { - unsigned long flags; - - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); + down_write(&event_handler->device->event_handler_rwsem); list_add_tail(&event_handler->list, &event_handler->device->event_handler_list); - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + up_write(&event_handler->device->event_handler_rwsem); } EXPORT_SYMBOL(ib_register_event_handler); @@ -1944,61 +1953,78 @@ EXPORT_SYMBOL(ib_register_event_handler); */ void ib_unregister_event_handler(struct ib_event_handler *event_handler) { - unsigned long flags; - - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); + down_write(&event_handler->device->event_handler_rwsem); list_del(&event_handler->list); - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + up_write(&event_handler->device->event_handler_rwsem); } EXPORT_SYMBOL(ib_unregister_event_handler); -/** - * ib_dispatch_event - Dispatch an asynchronous event - * @event:Event to dispatch - * - * Low-level drivers must call ib_dispatch_event() to dispatch the - * event to all registered event handlers when an asynchronous event - * occurs. - */ -void ib_dispatch_event(struct ib_event *event) +void ib_dispatch_event_clients(struct ib_event *event) { - unsigned long flags; struct ib_event_handler *handler; - spin_lock_irqsave(&event->device->event_handler_lock, flags); + down_read(&event->device->event_handler_rwsem); list_for_each_entry(handler, &event->device->event_handler_list, list) handler->handler(handler, event); - spin_unlock_irqrestore(&event->device->event_handler_lock, flags); + up_read(&event->device->event_handler_rwsem); } -EXPORT_SYMBOL(ib_dispatch_event); -/** - * ib_query_port - Query IB port attributes - * @device:Device to query - * @port_num:Port number to query - * @port_attr:Port attributes - * - * ib_query_port() returns the attributes of a port through the - * @port_attr pointer. - */ -int ib_query_port(struct ib_device *device, - u8 port_num, - struct ib_port_attr *port_attr) +static int iw_query_port(struct ib_device *device, + u8 port_num, + struct ib_port_attr *port_attr) { - union ib_gid gid; - int err; + struct in_device *inetdev; + struct net_device *netdev; - if (!rdma_is_port_valid(device, port_num)) - return -EINVAL; + memset(port_attr, 0, sizeof(*port_attr)); + + netdev = ib_device_get_netdev(device, port_num); + if (!netdev) + return -ENODEV; + + port_attr->max_mtu = IB_MTU_4096; + port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu); + + if (!netif_carrier_ok(netdev)) { + port_attr->state = IB_PORT_DOWN; + port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; + } else { + rcu_read_lock(); + inetdev = __in_dev_get_rcu(netdev); + + if (inetdev && inetdev->ifa_list) { + port_attr->state = IB_PORT_ACTIVE; + port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; + } else { + port_attr->state = IB_PORT_INIT; + port_attr->phys_state = + IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING; + } + + rcu_read_unlock(); + } + + dev_put(netdev); + return device->ops.query_port(device, port_num, port_attr); +} + +static int __ib_query_port(struct ib_device *device, + u8 port_num, + struct ib_port_attr *port_attr) +{ + union ib_gid gid = {}; + int err; memset(port_attr, 0, sizeof(*port_attr)); + err = device->ops.query_port(device, port_num, port_attr); if (err || port_attr->subnet_prefix) return err; - if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND) + if (rdma_port_get_link_layer(device, port_num) != + IB_LINK_LAYER_INFINIBAND) return 0; err = device->ops.query_gid(device, port_num, 0, &gid); @@ -2008,6 +2034,28 @@ int ib_query_port(struct ib_device *device, port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix); return 0; } + +/** + * ib_query_port - Query IB port attributes + * @device:Device to query + * @port_num:Port number to query + * @port_attr:Port attributes + * + * ib_query_port() returns the attributes of a port through the + * @port_attr pointer. + */ +int ib_query_port(struct ib_device *device, + u8 port_num, + struct ib_port_attr *port_attr) +{ + if (!rdma_is_port_valid(device, port_num)) + return -EINVAL; + + if (rdma_protocol_iwarp(device, port_num)) + return iw_query_port(device, port_num, port_attr); + else + return __ib_query_port(device, port_num, port_attr); +} EXPORT_SYMBOL(ib_query_port); static void add_ndev_hash(struct ib_port_data *pdata) @@ -2311,7 +2359,7 @@ int ib_modify_device(struct ib_device *device, struct ib_device_modify *device_modify) { if (!device->ops.modify_device) - return -ENOSYS; + return -EOPNOTSUPP; return device->ops.modify_device(device, device_modify_mask, device_modify); @@ -2342,8 +2390,12 @@ int ib_modify_port(struct ib_device *device, rc = device->ops.modify_port(device, port_num, port_modify_mask, port_modify); + else if (rdma_protocol_roce(device, port_num) && + ((port_modify->set_port_cap_mask & ~IB_PORT_CM_SUP) == 0 || + (port_modify->clr_port_cap_mask & ~IB_PORT_CM_SUP) == 0)) + rc = 0; else - rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS; + rc = -EOPNOTSUPP; return rc; } EXPORT_SYMBOL(ib_modify_port); @@ -2552,6 +2604,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, drain_sq); SET_DEVICE_OP(dev_ops, enable_driver); SET_DEVICE_OP(dev_ops, fill_res_entry); + SET_DEVICE_OP(dev_ops, fill_stat_entry); SET_DEVICE_OP(dev_ops, get_dev_fw_str); SET_DEVICE_OP(dev_ops, get_dma_mr); SET_DEVICE_OP(dev_ops, get_hw_stats); @@ -2560,6 +2613,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, get_port_immutable); SET_DEVICE_OP(dev_ops, get_vector_affinity); SET_DEVICE_OP(dev_ops, get_vf_config); + SET_DEVICE_OP(dev_ops, get_vf_guid); SET_DEVICE_OP(dev_ops, get_vf_stats); SET_DEVICE_OP(dev_ops, init_port); SET_DEVICE_OP(dev_ops, iw_accept); @@ -2574,6 +2628,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, map_mr_sg_pi); SET_DEVICE_OP(dev_ops, map_phys_fmr); SET_DEVICE_OP(dev_ops, mmap); + SET_DEVICE_OP(dev_ops, mmap_free); SET_DEVICE_OP(dev_ops, modify_ah); SET_DEVICE_OP(dev_ops, modify_cq); SET_DEVICE_OP(dev_ops, modify_device); @@ -2660,11 +2715,7 @@ static int __init ib_core_init(void) goto err_comp_unbound; } - ret = rdma_nl_init(); - if (ret) { - pr_warn("Couldn't init IB netlink interface: err %d\n", ret); - goto err_sysfs; - } + rdma_nl_init(); ret = addr_init(); if (ret) { @@ -2711,8 +2762,6 @@ err_mad: err_addr: addr_cleanup(); err_ibnl: - rdma_nl_exit(); -err_sysfs: class_unregister(&ib_class); err_comp_unbound: destroy_workqueue(ib_comp_unbound_wq); diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 7d841b689a1e..e08aec427027 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -148,13 +148,6 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool) hlist_del_init(&fmr->cache_node); fmr->remap_count = 0; list_add_tail(&fmr->fmr->list, &fmr_list); - -#ifdef DEBUG - if (fmr->ref_count !=0) { - pr_warn(PFX "Unmapping FMR 0x%08x with ref count %d\n", - fmr, fmr->ref_count); - } -#endif } list_splice_init(&pool->dirty_list, &unmap_list); @@ -496,12 +489,6 @@ void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) } } -#ifdef DEBUG - if (fmr->ref_count < 0) - pr_warn(PFX "FMR %p has ref count %d < 0\n", - fmr, fmr->ref_count); -#endif - spin_unlock_irqrestore(&pool->pool_lock, flags); } EXPORT_SYMBOL(ib_fmr_pool_unmap); diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c new file mode 100644 index 000000000000..b51bd7087a88 --- /dev/null +++ b/drivers/infiniband/core/ib_core_uverbs.c @@ -0,0 +1,367 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2019 Marvell. All rights reserved. + */ +#include <linux/xarray.h> +#include "uverbs.h" +#include "core_priv.h" + +/** + * rdma_umap_priv_init() - Initialize the private data of a vma + * + * @priv: The already allocated private data + * @vma: The vm area struct that needs private data + * @entry: entry into the mmap_xa that needs to be linked with + * this vma + * + * Each time we map IO memory into user space this keeps track of the + * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space + * to point to the zero page and allow the hot unplug to proceed. + * + * This is necessary for cases like PCI physical hot unplug as the actual BAR + * memory may vanish after this and access to it from userspace could MCE. + * + * RDMA drivers supporting disassociation must have their user space designed + * to cope in some way with their IO pages going to the zero page. + * + */ +void rdma_umap_priv_init(struct rdma_umap_priv *priv, + struct vm_area_struct *vma, + struct rdma_user_mmap_entry *entry) +{ + struct ib_uverbs_file *ufile = vma->vm_file->private_data; + + priv->vma = vma; + if (entry) { + kref_get(&entry->ref); + priv->entry = entry; + } + vma->vm_private_data = priv; + /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */ + + mutex_lock(&ufile->umap_lock); + list_add(&priv->list, &ufile->umaps); + mutex_unlock(&ufile->umap_lock); +} +EXPORT_SYMBOL(rdma_umap_priv_init); + +/** + * rdma_user_mmap_io() - Map IO memory into a process + * + * @ucontext: associated user context + * @vma: the vma related to the current mmap call + * @pfn: pfn to map + * @size: size to map + * @prot: pgprot to use in remap call + * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL + * if mmap_entry is not used by the driver + * + * This is to be called by drivers as part of their mmap() functions if they + * wish to send something like PCI-E BAR memory to userspace. + * + * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on + * success. + */ +int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, + unsigned long pfn, unsigned long size, pgprot_t prot, + struct rdma_user_mmap_entry *entry) +{ + struct ib_uverbs_file *ufile = ucontext->ufile; + struct rdma_umap_priv *priv; + + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + if (vma->vm_end - vma->vm_start != size) + return -EINVAL; + + /* Driver is using this wrong, must be called by ib_uverbs_mmap */ + if (WARN_ON(!vma->vm_file || + vma->vm_file->private_data != ufile)) + return -EINVAL; + lockdep_assert_held(&ufile->device->disassociate_srcu); + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + vma->vm_page_prot = prot; + if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { + kfree(priv); + return -EAGAIN; + } + + rdma_umap_priv_init(priv, vma, entry); + return 0; +} +EXPORT_SYMBOL(rdma_user_mmap_io); + +/** + * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa + * + * @ucontext: associated user context + * @pgoff: The mmap offset >> PAGE_SHIFT + * + * This function is called when a user tries to mmap with an offset (returned + * by rdma_user_mmap_get_offset()) it initially received from the driver. The + * rdma_user_mmap_entry was created by the function + * rdma_user_mmap_entry_insert(). This function increases the refcnt of the + * entry so that it won't be deleted from the xarray in the meantime. + * + * Return an reference to an entry if exists or NULL if there is no + * match. rdma_user_mmap_entry_put() must be called to put the reference. + */ +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, + unsigned long pgoff) +{ + struct rdma_user_mmap_entry *entry; + + if (pgoff > U32_MAX) + return NULL; + + xa_lock(&ucontext->mmap_xa); + + entry = xa_load(&ucontext->mmap_xa, pgoff); + + /* + * If refcount is zero, entry is already being deleted, driver_removed + * indicates that the no further mmaps are possible and we waiting for + * the active VMAs to be closed. + */ + if (!entry || entry->start_pgoff != pgoff || entry->driver_removed || + !kref_get_unless_zero(&entry->ref)) + goto err; + + xa_unlock(&ucontext->mmap_xa); + + ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n", + pgoff, entry->npages); + + return entry; + +err: + xa_unlock(&ucontext->mmap_xa); + return NULL; +} +EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff); + +/** + * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa + * + * @ucontext: associated user context + * @vma: the vma being mmap'd into + * + * This function is like rdma_user_mmap_entry_get_pgoff() except that it also + * checks that the VMA is correct. + */ +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, + struct vm_area_struct *vma) +{ + struct rdma_user_mmap_entry *entry; + + if (!(vma->vm_flags & VM_SHARED)) + return NULL; + entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff); + if (!entry) + return NULL; + if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) { + rdma_user_mmap_entry_put(entry); + return NULL; + } + return entry; +} +EXPORT_SYMBOL(rdma_user_mmap_entry_get); + +static void rdma_user_mmap_entry_free(struct kref *kref) +{ + struct rdma_user_mmap_entry *entry = + container_of(kref, struct rdma_user_mmap_entry, ref); + struct ib_ucontext *ucontext = entry->ucontext; + unsigned long i; + + /* + * Erase all entries occupied by this single entry, this is deferred + * until all VMA are closed so that the mmap offsets remain unique. + */ + xa_lock(&ucontext->mmap_xa); + for (i = 0; i < entry->npages; i++) + __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i); + xa_unlock(&ucontext->mmap_xa); + + ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n", + entry->start_pgoff, entry->npages); + + if (ucontext->device->ops.mmap_free) + ucontext->device->ops.mmap_free(entry); +} + +/** + * rdma_user_mmap_entry_put() - Drop reference to the mmap entry + * + * @entry: an entry in the mmap_xa + * + * This function is called when the mapping is closed if it was + * an io mapping or when the driver is done with the entry for + * some other reason. + * Should be called after rdma_user_mmap_entry_get was called + * and entry is no longer needed. This function will erase the + * entry and free it if its refcnt reaches zero. + */ +void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry) +{ + kref_put(&entry->ref, rdma_user_mmap_entry_free); +} +EXPORT_SYMBOL(rdma_user_mmap_entry_put); + +/** + * rdma_user_mmap_entry_remove() - Drop reference to entry and + * mark it as unmmapable + * + * @entry: the entry to insert into the mmap_xa + * + * Drivers can call this to prevent userspace from creating more mappings for + * entry, however existing mmaps continue to exist and ops->mmap_free() will + * not be called until all user mmaps are destroyed. + */ +void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry) +{ + if (!entry) + return; + + xa_lock(&entry->ucontext->mmap_xa); + entry->driver_removed = true; + xa_unlock(&entry->ucontext->mmap_xa); + kref_put(&entry->ref, rdma_user_mmap_entry_free); +} +EXPORT_SYMBOL(rdma_user_mmap_entry_remove); + +/** + * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa + * in a given range. + * + * @ucontext: associated user context. + * @entry: the entry to insert into the mmap_xa + * @length: length of the address that will be mmapped + * @min_pgoff: minimum pgoff to be returned + * @max_pgoff: maximum pgoff to be returned + * + * This function should be called by drivers that use the rdma_user_mmap + * interface for implementing their mmap syscall A database of mmap offsets is + * handled in the core and helper functions are provided to insert entries + * into the database and extract entries when the user calls mmap with the + * given offset. The function allocates a unique page offset in a given range + * that should be provided to user, the user will use the offset to retrieve + * information such as address to be mapped and how. + * + * Return: 0 on success and -ENOMEM on failure + */ +int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length, u32 min_pgoff, + u32 max_pgoff) +{ + struct ib_uverbs_file *ufile = ucontext->ufile; + XA_STATE(xas, &ucontext->mmap_xa, min_pgoff); + u32 xa_first, xa_last, npages; + int err; + u32 i; + + if (!entry) + return -EINVAL; + + kref_init(&entry->ref); + entry->ucontext = ucontext; + + /* + * We want the whole allocation to be done without interruption from a + * different thread. The allocation requires finding a free range and + * storing. During the xa_insert the lock could be released, possibly + * allowing another thread to choose the same range. + */ + mutex_lock(&ufile->umap_lock); + + xa_lock(&ucontext->mmap_xa); + + /* We want to find an empty range */ + npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE); + entry->npages = npages; + while (true) { + /* First find an empty index */ + xas_find_marked(&xas, max_pgoff, XA_FREE_MARK); + if (xas.xa_node == XAS_RESTART) + goto err_unlock; + + xa_first = xas.xa_index; + + /* Is there enough room to have the range? */ + if (check_add_overflow(xa_first, npages, &xa_last)) + goto err_unlock; + + /* + * Now look for the next present entry. If an entry doesn't + * exist, we found an empty range and can proceed. + */ + xas_next_entry(&xas, xa_last - 1); + if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last) + break; + } + + for (i = xa_first; i < xa_last; i++) { + err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL); + if (err) + goto err_undo; + } + + /* + * Internally the kernel uses a page offset, in libc this is a byte + * offset. Drivers should not return pgoff to userspace. + */ + entry->start_pgoff = xa_first; + xa_unlock(&ucontext->mmap_xa); + mutex_unlock(&ufile->umap_lock); + + ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n", + entry->start_pgoff, npages); + + return 0; + +err_undo: + for (; i > xa_first; i--) + __xa_erase(&ucontext->mmap_xa, i - 1); + +err_unlock: + xa_unlock(&ucontext->mmap_xa); + mutex_unlock(&ufile->umap_lock); + return -ENOMEM; +} +EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range); + +/** + * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa. + * + * @ucontext: associated user context. + * @entry: the entry to insert into the mmap_xa + * @length: length of the address that will be mmapped + * + * This function should be called by drivers that use the rdma_user_mmap + * interface for handling user mmapped addresses. The database is handled in + * the core and helper functions are provided to insert entries into the + * database and extract entries when the user calls mmap with the given offset. + * The function allocates a unique page offset that should be provided to user, + * the user will use the offset to retrieve information such as address to + * be mapped and how. + * + * Return: 0 on success and -ENOMEM on failure + */ +int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length) +{ + return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0, + U32_MAX); +} +EXPORT_SYMBOL(rdma_user_mmap_entry_insert); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 72141c5b7c95..ade71823370f 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -372,6 +372,7 @@ EXPORT_SYMBOL(iw_cm_disconnect); static void destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; + struct ib_qp *qp; unsigned long flags; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); @@ -389,6 +390,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; + switch (cm_id_priv->state) { case IW_CM_STATE_LISTEN: cm_id_priv->state = IW_CM_STATE_DESTROYING; @@ -401,7 +405,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* Abrupt close of the connection */ - (void)iwcm_modify_qp_err(cm_id_priv->qp); + (void)iwcm_modify_qp_err(qp); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_IDLE: @@ -426,11 +430,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) BUG(); break; } - if (cm_id_priv->qp) { - cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); - cm_id_priv->qp = NULL; - } spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id_priv->id.device->ops.iw_rem_ref(qp); if (cm_id->mapped) { iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); @@ -671,11 +673,11 @@ int iw_cm_accept(struct iw_cm_id *cm_id, BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id_priv->qp) { - cm_id->device->ops.iw_rem_ref(qp); - cm_id_priv->qp = NULL; - } + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id->device->ops.iw_rem_ref(qp); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); } @@ -696,7 +698,7 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) struct iwcm_id_private *cm_id_priv; int ret; unsigned long flags; - struct ib_qp *qp; + struct ib_qp *qp = NULL; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); @@ -730,13 +732,13 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) return 0; /* success */ spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id_priv->qp) { - cm_id->device->ops.iw_rem_ref(qp); - cm_id_priv->qp = NULL; - } + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; err: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id->device->ops.iw_rem_ref(qp); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return ret; @@ -878,6 +880,7 @@ static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { + struct ib_qp *qp = NULL; unsigned long flags; int ret; @@ -896,11 +899,13 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, cm_id_priv->state = IW_CM_STATE_ESTABLISHED; } else { /* REJECTED or RESET */ - cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); + qp = cm_id_priv->qp; cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id_priv->id.device->ops.iw_rem_ref(qp); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); if (iw_event->private_data_len) @@ -942,21 +947,18 @@ static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, static int cm_close_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { + struct ib_qp *qp; unsigned long flags; - int ret = 0; + int ret = 0, notify_event = 0; spin_lock_irqsave(&cm_id_priv->lock, flags); + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; - if (cm_id_priv->qp) { - cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); - cm_id_priv->qp = NULL; - } switch (cm_id_priv->state) { case IW_CM_STATE_ESTABLISHED: case IW_CM_STATE_CLOSING: cm_id_priv->state = IW_CM_STATE_IDLE; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); - spin_lock_irqsave(&cm_id_priv->lock, flags); + notify_event = 1; break; case IW_CM_STATE_DESTROYING: break; @@ -965,6 +967,10 @@ static int cm_close_handler(struct iwcm_id_private *cm_id_priv, } spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id_priv->id.device->ops.iw_rem_ref(qp); + if (notify_event) + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); return ret; } diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 2452b0ddcf0d..46686990a827 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -112,7 +112,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n", __func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name); - ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL); + ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ iwpm_user_pid = IWPM_PID_UNAVAILABLE; @@ -124,8 +124,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) return ret; pid_query_error: pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); - if (skb) - dev_kfree_skb(skb); + dev_kfree_skb(skb); if (nlmsg_request) iwpm_free_nlmsg_request(&nlmsg_request->kref); return ret; @@ -202,7 +201,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) nlmsg_end(skb, nlh); nlmsg_request->req_buffer = pm_msg; - ret = rdma_nl_unicast_wait(skb, iwpm_user_pid); + ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ iwpm_user_pid = IWPM_PID_UNDEFINED; @@ -214,8 +213,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) add_mapping_error: pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); add_mapping_error_nowarn: - if (skb) - dev_kfree_skb(skb); + dev_kfree_skb(skb); if (nlmsg_request) iwpm_free_nlmsg_request(&nlmsg_request->kref); return ret; @@ -297,7 +295,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) nlmsg_end(skb, nlh); nlmsg_request->req_buffer = pm_msg; - ret = rdma_nl_unicast_wait(skb, iwpm_user_pid); + ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ err_str = "Unable to send a nlmsg"; @@ -308,8 +306,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) query_mapping_error: pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); query_mapping_error_nowarn: - if (skb) - dev_kfree_skb(skb); + dev_kfree_skb(skb); if (nlmsg_request) iwpm_free_nlmsg_request(&nlmsg_request->kref); return ret; @@ -364,7 +361,7 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client) nlmsg_end(skb, nlh); - ret = rdma_nl_unicast_wait(skb, iwpm_user_pid); + ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ iwpm_user_pid = IWPM_PID_UNDEFINED; diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 41929bb83739..13495b43dbc1 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -645,7 +645,7 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid) nlmsg_end(skb, nlh); - ret = rdma_nl_unicast(skb, iwpm_pid); + ret = rdma_nl_unicast(&init_net, skb, iwpm_pid); if (ret) { skb = NULL; err_str = "Unable to send a nlmsg"; @@ -655,8 +655,7 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid) return 0; mapinfo_num_error: pr_info("%s: %s\n", __func__, err_str); - if (skb) - dev_kfree_skb(skb); + dev_kfree_skb(skb); return ret; } @@ -674,7 +673,7 @@ static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid) return -ENOMEM; } nlh->nlmsg_type = NLMSG_DONE; - ret = rdma_nl_unicast(skb, iwpm_pid); + ret = rdma_nl_unicast(&init_net, skb, iwpm_pid); if (ret) pr_warn("%s Unable to send a nlmsg\n", __func__); return ret; @@ -778,8 +777,7 @@ send_mapping_info_unlock: send_mapping_info_exit: if (ret) { pr_warn("%s: %s (ret = %d)\n", __func__, err_str, ret); - if (skb) - dev_kfree_skb(skb); + dev_kfree_skb(skb); return ret; } send_nlmsg_done(skb, nl_client, iwpm_pid); @@ -824,7 +822,7 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version) goto hello_num_error; nlmsg_end(skb, nlh); - ret = rdma_nl_unicast(skb, iwpm_pid); + ret = rdma_nl_unicast(&init_net, skb, iwpm_pid); if (ret) { skb = NULL; err_str = "Unable to send a nlmsg"; @@ -834,7 +832,6 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version) return 0; hello_num_error: pr_info("%s: %s\n", __func__, err_str); - if (skb) - dev_kfree_skb(skb); + dev_kfree_skb(skb); return ret; } diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index 7e2bcc72f66c..1bf87d9fd0bd 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -210,8 +210,10 @@ int iwpm_mapinfo_available(void); /** * iwpm_compare_sockaddr - Compare two sockaddr storage structs + * @a_sockaddr: first sockaddr to compare + * @b_sockaddr: second sockaddr to compare * - * Returns 0 if they are holding the same ip/tcp address info, + * Return: 0 if they are holding the same ip/tcp address info, * otherwise returns 1 */ int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, @@ -272,6 +274,7 @@ void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg); * iwpm_send_hello - Send hello response to iwpmd * * @nl_client: The index of the netlink client + * @iwpm_pid: The pid of the user space port mapper * @abi_version: The kernel's abi_version * * Returns 0 on success or a negative error code diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 9947d16edef2..c54db13fa9b0 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -913,9 +913,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, /* No GRH for DR SMP */ ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL, - (const struct ib_mad_hdr *)smp, mad_size, - (struct ib_mad_hdr *)mad_priv->mad, - &mad_size, &out_mad_pkey_index); + (const struct ib_mad *)smp, + (struct ib_mad *)mad_priv->mad, &mad_size, + &out_mad_pkey_index); switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: @@ -1397,25 +1397,6 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) } EXPORT_SYMBOL(ib_free_recv_mad); -struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, - u8 rmpp_version, - ib_mad_send_handler send_handler, - ib_mad_recv_handler recv_handler, - void *context) -{ - return ERR_PTR(-EINVAL); /* XXX: for now */ -} -EXPORT_SYMBOL(ib_redirect_mad_qp); - -int ib_process_mad_wc(struct ib_mad_agent *mad_agent, - struct ib_wc *wc) -{ - dev_err(&mad_agent->device->dev, - "ib_process_mad_wc() not implemented yet\n"); - return 0; -} -EXPORT_SYMBOL(ib_process_mad_wc); - static int method_in_use(struct ib_mad_mgmt_method_table **method, struct ib_mad_reg_req *mad_reg_req) { @@ -2340,9 +2321,9 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) if (port_priv->device->ops.process_mad) { ret = port_priv->device->ops.process_mad( port_priv->device, 0, port_priv->port_num, wc, - &recv->grh, (const struct ib_mad_hdr *)recv->mad, - recv->mad_size, (struct ib_mad_hdr *)response->mad, - &mad_size, &resp_mad_pkey_index); + &recv->grh, (const struct ib_mad *)recv->mad, + (struct ib_mad *)response->mad, &mad_size, + &resp_mad_pkey_index); if (opa) wc->pkey_index = resp_mad_pkey_index; diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index eecfc0b377c9..8cd31ef25eff 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -36,20 +36,25 @@ #include <linux/export.h> #include <net/netlink.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/sock.h> #include <rdma/rdma_netlink.h> #include <linux/module.h> #include "core_priv.h" -static DEFINE_MUTEX(rdma_nl_mutex); -static struct sock *nls; static struct { - const struct rdma_nl_cbs *cb_table; + const struct rdma_nl_cbs *cb_table; + /* Synchronizes between ongoing netlink commands and netlink client + * unregistration. + */ + struct rw_semaphore sem; } rdma_nl_types[RDMA_NL_NUM_CLIENTS]; bool rdma_nl_chk_listeners(unsigned int group) { - return netlink_has_listeners(nls, group); + struct rdma_dev_net *rnet = rdma_net_to_dev_net(&init_net); + + return netlink_has_listeners(rnet->nl_sock, group); } EXPORT_SYMBOL(rdma_nl_chk_listeners); @@ -73,62 +78,53 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op) return (op < max_num_ops[type]) ? true : false; } -static bool is_nl_valid(unsigned int type, unsigned int op) +static const struct rdma_nl_cbs * +get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op) { const struct rdma_nl_cbs *cb_table; - if (!is_nl_msg_valid(type, op)) - return false; + /* + * Currently only NLDEV client is supporting netlink commands in + * non init_net net namespace. + */ + if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV) + return NULL; - if (!rdma_nl_types[type].cb_table) { - mutex_unlock(&rdma_nl_mutex); - request_module("rdma-netlink-subsys-%d", type); - mutex_lock(&rdma_nl_mutex); - } + cb_table = READ_ONCE(rdma_nl_types[type].cb_table); + if (!cb_table) { + /* + * Didn't get valid reference of the table, attempt module + * load once. + */ + up_read(&rdma_nl_types[type].sem); - cb_table = rdma_nl_types[type].cb_table; + request_module("rdma-netlink-subsys-%d", type); + down_read(&rdma_nl_types[type].sem); + cb_table = READ_ONCE(rdma_nl_types[type].cb_table); + } if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit)) - return false; - return true; + return NULL; + return cb_table; } void rdma_nl_register(unsigned int index, const struct rdma_nl_cbs cb_table[]) { - mutex_lock(&rdma_nl_mutex); - if (!is_nl_msg_valid(index, 0)) { - /* - * All clients are not interesting in success/failure of - * this call. They want to see the print to error log and - * continue their initialization. Print warning for them, - * because it is programmer's error to be here. - */ - mutex_unlock(&rdma_nl_mutex); - WARN(true, - "The not-valid %u index was supplied to RDMA netlink\n", - index); + if (WARN_ON(!is_nl_msg_valid(index, 0)) || + WARN_ON(READ_ONCE(rdma_nl_types[index].cb_table))) return; - } - if (rdma_nl_types[index].cb_table) { - mutex_unlock(&rdma_nl_mutex); - WARN(true, - "The %u index is already registered in RDMA netlink\n", - index); - return; - } - - rdma_nl_types[index].cb_table = cb_table; - mutex_unlock(&rdma_nl_mutex); + /* Pairs with the READ_ONCE in is_nl_valid() */ + smp_store_release(&rdma_nl_types[index].cb_table, cb_table); } EXPORT_SYMBOL(rdma_nl_register); void rdma_nl_unregister(unsigned int index) { - mutex_lock(&rdma_nl_mutex); + down_write(&rdma_nl_types[index].sem); rdma_nl_types[index].cb_table = NULL; - mutex_unlock(&rdma_nl_mutex); + up_write(&rdma_nl_types[index].sem); } EXPORT_SYMBOL(rdma_nl_unregister); @@ -160,15 +156,21 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int index = RDMA_NL_GET_CLIENT(type); unsigned int op = RDMA_NL_GET_OP(type); const struct rdma_nl_cbs *cb_table; + int err = -EINVAL; - if (!is_nl_valid(index, op)) + if (!is_nl_msg_valid(index, op)) return -EINVAL; - cb_table = rdma_nl_types[index].cb_table; + down_read(&rdma_nl_types[index].sem); + cb_table = get_cb_table(skb, index, op); + if (!cb_table) + goto done; if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) && - !netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; + !netlink_capable(skb, CAP_NET_ADMIN)) { + err = -EPERM; + goto done; + } /* * LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't @@ -176,8 +178,8 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, */ if (index == RDMA_NL_LS) { if (cb_table[op].doit) - return cb_table[op].doit(skb, nlh, extack); - return -EINVAL; + err = cb_table[op].doit(skb, nlh, extack); + goto done; } /* FIXME: Convert IWCM to properly handle doit callbacks */ if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) { @@ -185,14 +187,15 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, .dump = cb_table[op].dump, }; if (c.dump) - return netlink_dump_start(nls, skb, nlh, &c); - return -EINVAL; + err = netlink_dump_start(skb->sk, skb, nlh, &c); + goto done; } if (cb_table[op].doit) - return cb_table[op].doit(skb, nlh, extack); - - return 0; + err = cb_table[op].doit(skb, nlh, extack); +done: + up_read(&rdma_nl_types[index].sem); + return err; } /* @@ -253,57 +256,76 @@ skip: static void rdma_nl_rcv(struct sk_buff *skb) { - mutex_lock(&rdma_nl_mutex); rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg); - mutex_unlock(&rdma_nl_mutex); } -int rdma_nl_unicast(struct sk_buff *skb, u32 pid) +int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid) { + struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); int err; - err = netlink_unicast(nls, skb, pid, MSG_DONTWAIT); + err = netlink_unicast(rnet->nl_sock, skb, pid, MSG_DONTWAIT); return (err < 0) ? err : 0; } EXPORT_SYMBOL(rdma_nl_unicast); -int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid) +int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid) { + struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); int err; - err = netlink_unicast(nls, skb, pid, 0); + err = netlink_unicast(rnet->nl_sock, skb, pid, 0); return (err < 0) ? err : 0; } EXPORT_SYMBOL(rdma_nl_unicast_wait); -int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags) +int rdma_nl_multicast(struct net *net, struct sk_buff *skb, + unsigned int group, gfp_t flags) { - return nlmsg_multicast(nls, skb, 0, group, flags); + struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); + + return nlmsg_multicast(rnet->nl_sock, skb, 0, group, flags); } EXPORT_SYMBOL(rdma_nl_multicast); -int __init rdma_nl_init(void) +void rdma_nl_init(void) +{ + int idx; + + for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++) + init_rwsem(&rdma_nl_types[idx].sem); +} + +void rdma_nl_exit(void) +{ + int idx; + + for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++) + WARN(rdma_nl_types[idx].cb_table, + "Netlink client %d wasn't released prior to unloading %s\n", + idx, KBUILD_MODNAME); +} + +int rdma_nl_net_init(struct rdma_dev_net *rnet) { + struct net *net = read_pnet(&rnet->net); struct netlink_kernel_cfg cfg = { .input = rdma_nl_rcv, }; + struct sock *nls; - nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg); + nls = netlink_kernel_create(net, NETLINK_RDMA, &cfg); if (!nls) return -ENOMEM; nls->sk_sndtimeo = 10 * HZ; + rnet->nl_sock = nls; return 0; } -void rdma_nl_exit(void) +void rdma_nl_net_exit(struct rdma_dev_net *rnet) { - int idx; - - for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++) - rdma_nl_unregister(idx); - - netlink_kernel_release(nls); + netlink_kernel_release(rnet->nl_sock); } MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_RDMA); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 87d40d1ecdde..37b433aa7306 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -41,6 +41,10 @@ #include "core_priv.h" #include "cma_priv.h" #include "restrack.h" +#include "uverbs.h" + +typedef int (*res_fill_func_t)(struct sk_buff*, bool, + struct rdma_restrack_entry*, uint32_t); /* * Sort array elements by the netlink attribute name @@ -180,6 +184,19 @@ static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, return 0; } +int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name, + const char *str) +{ + if (put_driver_name_print_type(msg, name, + RDMA_NLDEV_PRINT_TYPE_UNSPEC)) + return -EMSGSIZE; + if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str)) + return -EMSGSIZE; + + return 0; +} +EXPORT_SYMBOL(rdma_nl_put_driver_string); + int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value) { return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC, @@ -382,8 +399,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) for (i = 0; i < RDMA_RESTRACK_MAX; i++) { if (!names[i]) continue; - curr = rdma_restrack_count(device, i, - task_active_pid_ns(current)); + curr = rdma_restrack_count(device, i); ret = fill_res_info_entry(msg, names[i], curr); if (ret) goto err; @@ -400,20 +416,34 @@ err: static int fill_res_name_pid(struct sk_buff *msg, struct rdma_restrack_entry *res) { + int err = 0; + /* * For user resources, user is should read /proc/PID/comm to get the * name of the task file. */ if (rdma_is_kernel_res(res)) { - if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, - res->kern_name)) - return -EMSGSIZE; + err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, + res->kern_name); } else { - if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, - task_pid_vnr(res->task))) - return -EMSGSIZE; + pid_t pid; + + pid = task_pid_vnr(res->task); + /* + * Task is dead and in zombie state. + * There is no need to print PID anymore. + */ + if (pid) + /* + * This part is racy, task can be killed and PID will + * be zero right here but it is ok, next query won't + * return PID. We don't promise real-time reflection + * of SW objects. + */ + err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid); } - return 0; + + return err ? -EMSGSIZE : 0; } static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, @@ -424,6 +454,14 @@ static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, return dev->ops.fill_res_entry(msg, res); } +static bool fill_stat_entry(struct ib_device *dev, struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (!dev->ops.fill_stat_entry) + return false; + return dev->ops.fill_stat_entry(msg, res); +} + static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { @@ -562,7 +600,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, goto err; if (!rdma_is_kernel_res(res) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, - cq->uobject->context->res.id)) + cq->uobject->uevent.uobject.context->res.id)) goto err; if (fill_res_name_pid(msg, res)) @@ -699,9 +737,6 @@ static int fill_stat_counter_qps(struct sk_buff *msg, rt = &counter->device->res[RDMA_RESTRACK_QP]; xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { - if (!rdma_is_visible_in_pid_ns(res)) - continue; - qp = container_of(res, struct ib_qp, res); if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) continue; @@ -724,8 +759,8 @@ err: return ret; } -static int fill_stat_hwcounter_entry(struct sk_buff *msg, - const char *name, u64 value) +int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, + u64 value) { struct nlattr *entry_attr; @@ -747,6 +782,25 @@ err: nla_nest_cancel(msg, entry_attr); return -EMSGSIZE; } +EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry); + +static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_mr *mr = container_of(res, struct ib_mr, res); + struct ib_device *dev = mr->pd->device; + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) + goto err; + + if (fill_stat_entry(dev, msg, res)) + goto err; + + return 0; + +err: + return -EMSGSIZE; +} static int fill_stat_counter_hwcounters(struct sk_buff *msg, struct rdma_counter *counter) @@ -760,7 +814,7 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg, return -EMSGSIZE; for (i = 0; i < st->num_counters; i++) - if (fill_stat_hwcounter_entry(msg, st->names[i], st->value[i])) + if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i])) goto err; nla_nest_end(msg, table_attr); @@ -779,7 +833,7 @@ static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin, container_of(res, struct rdma_counter, res); if (port && port != counter->port) - return 0; + return -EAGAIN; /* Dump it even query failed */ rdma_counter_query_stats(counter); @@ -832,7 +886,7 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); @@ -972,7 +1026,7 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); @@ -1074,7 +1128,7 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); @@ -1118,8 +1172,6 @@ static int nldev_res_get_dumpit(struct sk_buff *skb, } struct nldev_fill_res_entry { - int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin, - struct rdma_restrack_entry *res, u32 port); enum rdma_nldev_attr nldev_attr; enum rdma_nldev_command nldev_cmd; u8 flags; @@ -1133,21 +1185,18 @@ enum nldev_res_flags { static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { [RDMA_RESTRACK_QP] = { - .fill_res_func = fill_res_qp_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_QP, .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY, .id = RDMA_NLDEV_ATTR_RES_LQPN, }, [RDMA_RESTRACK_CM_ID] = { - .fill_res_func = fill_res_cm_id_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID, .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, .id = RDMA_NLDEV_ATTR_RES_CM_IDN, }, [RDMA_RESTRACK_CQ] = { - .fill_res_func = fill_res_cq_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ, .flags = NLDEV_PER_DEV, @@ -1155,7 +1204,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { .id = RDMA_NLDEV_ATTR_RES_CQN, }, [RDMA_RESTRACK_MR] = { - .fill_res_func = fill_res_mr_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_MR, .flags = NLDEV_PER_DEV, @@ -1163,7 +1211,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { .id = RDMA_NLDEV_ATTR_RES_MRN, }, [RDMA_RESTRACK_PD] = { - .fill_res_func = fill_res_pd_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_PD, .flags = NLDEV_PER_DEV, @@ -1171,7 +1218,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { .id = RDMA_NLDEV_ATTR_RES_PDN, }, [RDMA_RESTRACK_COUNTER] = { - .fill_res_func = fill_res_counter_entry, .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET, .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER, .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, @@ -1181,7 +1227,8 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, - enum rdma_restrack_type res_type) + enum rdma_restrack_type res_type, + res_fill_func_t fill_func) { const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; @@ -1223,15 +1270,10 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err; } - if (!rdma_is_visible_in_pid_ns(res)) { - ret = -ENOENT; - goto err_get; - } - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto err; + goto err_get; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, @@ -1244,14 +1286,16 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, } has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN); - ret = fe->fill_res_func(msg, has_cap_net_admin, res, port); + + ret = fill_func(msg, has_cap_net_admin, res, port); + rdma_restrack_put(res); if (ret) goto err_free; nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); @@ -1264,7 +1308,8 @@ err: static int res_get_common_dumpit(struct sk_buff *skb, struct netlink_callback *cb, - enum rdma_restrack_type res_type) + enum rdma_restrack_type res_type, + res_fill_func_t fill_func) { const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; @@ -1335,9 +1380,6 @@ static int res_get_common_dumpit(struct sk_buff *skb, * objects. */ xa_for_each(&rt->xa, id, res) { - if (!rdma_is_visible_in_pid_ns(res)) - continue; - if (idx < start || !rdma_restrack_get(res)) goto next; @@ -1352,7 +1394,8 @@ static int res_get_common_dumpit(struct sk_buff *skb, goto msg_full; } - ret = fe->fill_res_func(skb, has_cap_net_admin, res, port); + ret = fill_func(skb, has_cap_net_admin, res, port); + rdma_restrack_put(res); if (ret) { @@ -1395,17 +1438,19 @@ err_index: return ret; } -#define RES_GET_FUNCS(name, type) \ - static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ +#define RES_GET_FUNCS(name, type) \ + static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ struct netlink_callback *cb) \ - { \ - return res_get_common_dumpit(skb, cb, type); \ - } \ - static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ - struct nlmsghdr *nlh, \ + { \ + return res_get_common_dumpit(skb, cb, type, \ + fill_res_##name##_entry); \ + } \ + static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ + struct nlmsghdr *nlh, \ struct netlink_ext_ack *extack) \ - { \ - return res_get_common_doit(skb, nlh, extack, type); \ + { \ + return res_get_common_doit(skb, nlh, extack, type, \ + fill_res_##name##_entry); \ } RES_GET_FUNCS(qp, RDMA_RESTRACK_QP); @@ -1596,7 +1641,7 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh, put_device(data.cdev); if (ibdev) ib_device_put(ibdev); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); out_data: put_device(data.cdev); @@ -1636,7 +1681,7 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } nlmsg_end(msg, nlh); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); } static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1734,7 +1779,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_fill: rdma_counter_unbind_qpn(device, port, qpn, cntn); @@ -1788,10 +1833,6 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); - ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); - if (ret) - goto err_unbind; - if (fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || @@ -1800,13 +1841,15 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err_fill; } + ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); + if (ret) + goto err_fill; + nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_fill: - rdma_counter_bind_qpn(device, port, qpn, cntn); -err_unbind: nlmsg_free(msg); err: ib_device_put(device); @@ -1883,7 +1926,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, for (i = 0; i < num_cnts; i++) { v = stats->value[i] + rdma_counter_get_hwstat_value(device, port, i); - if (fill_stat_hwcounter_entry(msg, stats->names[i], v)) { + if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) { ret = -EMSGSIZE; goto err_table; } @@ -1893,7 +1936,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, mutex_unlock(&stats->lock); nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_table: nla_nest_cancel(msg, table_attr); @@ -1965,7 +2008,7 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_msg: nlmsg_free(msg); @@ -1992,7 +2035,10 @@ static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, case RDMA_NLDEV_ATTR_RES_QP: ret = stat_get_doit_qp(skb, nlh, extack, tb); break; - + case RDMA_NLDEV_ATTR_RES_MR: + ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR, + fill_stat_mr_entry); + break; default: ret = -EINVAL; break; @@ -2016,7 +2062,10 @@ static int nldev_stat_get_dumpit(struct sk_buff *skb, case RDMA_NLDEV_ATTR_RES_QP: ret = nldev_res_get_counter_dumpit(skb, cb); break; - + case RDMA_NLDEV_ATTR_RES_MR: + ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR, + fill_stat_mr_entry); + break; default: ret = -EINVAL; break; diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index ccf4d069c25c..5128cb16bb48 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -42,26 +42,21 @@ #include "core_priv.h" #include "rdma_core.h" -void uverbs_uobject_get(struct ib_uobject *uobject) -{ - kref_get(&uobject->ref); -} - static void uverbs_uobject_free(struct kref *ref) { - struct ib_uobject *uobj = - container_of(ref, struct ib_uobject, ref); - - if (uobj->uapi_object->type_class->needs_kfree_rcu) - kfree_rcu(uobj, rcu); - else - kfree(uobj); + kfree_rcu(container_of(ref, struct ib_uobject, ref), rcu); } +/* + * In order to indicate we no longer needs this uobject, uverbs_uobject_put + * is called. When the reference count is decreased, the uobject is freed. + * For example, this is used when attaching a completion channel to a CQ. + */ void uverbs_uobject_put(struct ib_uobject *uobject) { kref_put(&uobject->ref, uverbs_uobject_free); } +EXPORT_SYMBOL(uverbs_uobject_put); static int uverbs_try_lock_object(struct ib_uobject *uobj, enum rdma_lookup_mode mode) @@ -135,7 +130,11 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, lockdep_assert_held(&ufile->hw_destroy_rwsem); assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE); - if (uobj->object) { + if (reason == RDMA_REMOVE_ABORT) { + WARN_ON(!list_empty(&uobj->list)); + WARN_ON(!uobj->context); + uobj->uapi_object->type_class->alloc_abort(uobj); + } else if (uobj->object) { ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, attrs); if (ret) { @@ -151,12 +150,6 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, uobj->object = NULL; } - if (reason == RDMA_REMOVE_ABORT) { - WARN_ON(!list_empty(&uobj->list)); - WARN_ON(!uobj->context); - uobj->uapi_object->type_class->alloc_abort(uobj); - } - uobj->context = NULL; /* @@ -263,15 +256,20 @@ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, } /* alloc_uobj must be undone by uverbs_destroy_uobject() */ -static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, +static struct ib_uobject *alloc_uobj(struct uverbs_attr_bundle *attrs, const struct uverbs_api_object *obj) { + struct ib_uverbs_file *ufile = attrs->ufile; struct ib_uobject *uobj; - struct ib_ucontext *ucontext; - ucontext = ib_uverbs_get_ucontext_file(ufile); - if (IS_ERR(ucontext)) - return ERR_CAST(ucontext); + if (!attrs->context) { + struct ib_ucontext *ucontext = + ib_uverbs_get_ucontext_file(ufile); + + if (IS_ERR(ucontext)) + return ERR_CAST(ucontext); + attrs->context = ucontext; + } uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL); if (!uobj) @@ -281,7 +279,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, * The object is added to the list in the commit stage. */ uobj->ufile = ufile; - uobj->context = ucontext; + uobj->context = attrs->context; INIT_LIST_HEAD(&uobj->list); uobj->uapi_object = obj; /* @@ -358,9 +356,9 @@ lookup_get_fd_uobject(const struct uverbs_api_object *obj, uobject = f->private_data; /* - * fget(id) ensures we are not currently running uverbs_close_fd, - * and the caller is expected to ensure that uverbs_close_fd is never - * done while a call top lookup is possible. + * fget(id) ensures we are not currently running + * uverbs_uobject_fd_release(), and the caller is expected to ensure + * that release is never done while a call to lookup is possible. */ if (f->f_op != fd_type->fops) { fput(f); @@ -424,12 +422,12 @@ free: static struct ib_uobject * alloc_begin_idr_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile) + struct uverbs_attr_bundle *attrs) { int ret; struct ib_uobject *uobj; - uobj = alloc_uobj(ufile, obj); + uobj = alloc_uobj(attrs, obj); if (IS_ERR(uobj)) return uobj; @@ -445,7 +443,7 @@ alloc_begin_idr_uobject(const struct uverbs_api_object *obj, return uobj; remove: - xa_erase(&ufile->idr, uobj->id); + xa_erase(&attrs->ufile->idr, uobj->id); uobj_put: uverbs_uobject_put(uobj); return ERR_PTR(ret); @@ -453,31 +451,48 @@ uobj_put: static struct ib_uobject * alloc_begin_fd_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile) + struct uverbs_attr_bundle *attrs) { + const struct uverbs_obj_fd_type *fd_type = + container_of(obj->type_attrs, struct uverbs_obj_fd_type, type); int new_fd; struct ib_uobject *uobj; + struct file *filp; + + if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release)) + return ERR_PTR(-EINVAL); new_fd = get_unused_fd_flags(O_CLOEXEC); if (new_fd < 0) return ERR_PTR(new_fd); - uobj = alloc_uobj(ufile, obj); - if (IS_ERR(uobj)) { - put_unused_fd(new_fd); - return uobj; + uobj = alloc_uobj(attrs, obj); + if (IS_ERR(uobj)) + goto err_fd; + + /* Note that uverbs_uobject_fd_release() is called during abort */ + filp = anon_inode_getfile(fd_type->name, fd_type->fops, NULL, + fd_type->flags); + if (IS_ERR(filp)) { + uobj = ERR_CAST(filp); + goto err_uobj; } + uobj->object = filp; uobj->id = new_fd; - uobj->ufile = ufile; + return uobj; +err_uobj: + uverbs_uobject_put(uobj); +err_fd: + put_unused_fd(new_fd); return uobj; } struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile, struct uverbs_attr_bundle *attrs) { + struct ib_uverbs_file *ufile = attrs->ufile; struct ib_uobject *ret; if (IS_ERR(obj)) @@ -491,13 +506,11 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, if (!down_read_trylock(&ufile->hw_destroy_rwsem)) return ERR_PTR(-EIO); - ret = obj->type_class->alloc_begin(obj, ufile); + ret = obj->type_class->alloc_begin(obj, attrs); if (IS_ERR(ret)) { up_read(&ufile->hw_destroy_rwsem); return ret; } - if (attrs) - attrs->context = ret->context; return ret; } @@ -544,6 +557,9 @@ static void remove_handle_idr_uobject(struct ib_uobject *uobj) static void alloc_abort_fd_uobject(struct ib_uobject *uobj) { + struct file *filp = uobj->object; + + fput(filp); put_unused_fd(uobj->id); } @@ -553,7 +569,7 @@ static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, { const struct uverbs_obj_fd_type *fd_type = container_of( uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); - int ret = fd_type->context_closed(uobj, why); + int ret = fd_type->destroy_object(uobj, why); if (ib_is_destroy_retryable(ret, why, uobj)) return ret; @@ -565,7 +581,7 @@ static void remove_handle_fd_uobject(struct ib_uobject *uobj) { } -static int alloc_commit_idr_uobject(struct ib_uobject *uobj) +static void alloc_commit_idr_uobject(struct ib_uobject *uobj) { struct ib_uverbs_file *ufile = uobj->ufile; void *old; @@ -579,33 +595,14 @@ static int alloc_commit_idr_uobject(struct ib_uobject *uobj) */ old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL); WARN_ON(old != NULL); - - return 0; } -static int alloc_commit_fd_uobject(struct ib_uobject *uobj) +static void alloc_commit_fd_uobject(struct ib_uobject *uobj) { - const struct uverbs_obj_fd_type *fd_type = container_of( - uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); int fd = uobj->id; - struct file *filp; - - /* - * The kref for uobj is moved into filp->private data and put in - * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd() - * must be guaranteed to be called from the provided fops release - * callback. - */ - filp = anon_inode_getfile(fd_type->name, - fd_type->fops, - uobj, - fd_type->flags); - if (IS_ERR(filp)) - return PTR_ERR(filp); - - uobj->object = filp; + struct file *filp = uobj->object; - /* Matching put will be done in uverbs_close_fd() */ + /* Matching put will be done in uverbs_uobject_fd_release() */ kref_get(&uobj->ufile->ref); /* This shouldn't be used anymore. Use the file object instead */ @@ -613,11 +610,10 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj) /* * NOTE: Once we install the file we loose ownership of our kref on - * uobj. It will be put by uverbs_close_fd() + * uobj. It will be put by uverbs_uobject_fd_release() */ + filp->private_data = uobj; fd_install(fd, filp); - - return 0; } /* @@ -625,19 +621,13 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj) * caller can no longer assume uobj is valid. If this function fails it * destroys the uboject, including the attached HW object. */ -int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj, - struct uverbs_attr_bundle *attrs) +void rdma_alloc_commit_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs) { struct ib_uverbs_file *ufile = attrs->ufile; - int ret; /* alloc_commit consumes the uobj kref */ - ret = uobj->uapi_object->type_class->alloc_commit(uobj); - if (ret) { - uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs); - up_read(&ufile->hw_destroy_rwsem); - return ret; - } + uobj->uapi_object->type_class->alloc_commit(uobj); /* kref is held so long as the uobj is on the uobj list. */ uverbs_uobject_get(uobj); @@ -650,8 +640,6 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj, /* Matches the down_read in rdma_alloc_begin_uobject */ up_read(&ufile->hw_destroy_rwsem); - - return 0; } /* @@ -663,7 +651,6 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj, { struct ib_uverbs_file *ufile = uobj->ufile; - uobj->object = NULL; uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs); /* Matches the down_read in rdma_alloc_begin_uobject */ @@ -681,7 +668,10 @@ static void lookup_put_fd_uobject(struct ib_uobject *uobj, struct file *filp = uobj->object; WARN_ON(mode != UVERBS_LOOKUP_READ); - /* This indirectly calls uverbs_close_fd and free the object */ + /* + * This indirectly calls uverbs_uobject_fd_release() and free the + * object + */ fput(filp); } @@ -744,33 +734,32 @@ const struct uverbs_obj_type_class uverbs_idr_class = { .lookup_put = lookup_put_idr_uobject, .destroy_hw = destroy_hw_idr_uobject, .remove_handle = remove_handle_idr_uobject, - /* - * When we destroy an object, we first just lock it for WRITE and - * actually DESTROY it in the finalize stage. So, the problematic - * scenario is when we just started the finalize stage of the - * destruction (nothing was executed yet). Now, the other thread - * fetched the object for READ access, but it didn't lock it yet. - * The DESTROY thread continues and starts destroying the object. - * When the other thread continue - without the RCU, it would - * access freed memory. However, the rcu_read_lock delays the free - * until the rcu_read_lock of the READ operation quits. Since the - * exclusive lock of the object is still taken by the DESTROY flow, the - * READ operation will get -EBUSY and it'll just bail out. - */ - .needs_kfree_rcu = true, }; EXPORT_SYMBOL(uverbs_idr_class); -void uverbs_close_fd(struct file *f) +/* + * Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct + * file_operations release method. + */ +int uverbs_uobject_fd_release(struct inode *inode, struct file *filp) { - struct ib_uobject *uobj = f->private_data; - struct ib_uverbs_file *ufile = uobj->ufile; - struct uverbs_attr_bundle attrs = { - .context = uobj->context, - .ufile = ufile, - }; + struct ib_uverbs_file *ufile; + struct ib_uobject *uobj; + + /* + * This can only happen if the fput came from alloc_abort_fd_uobject() + */ + if (!filp->private_data) + return 0; + uobj = filp->private_data; + ufile = uobj->ufile; if (down_read_trylock(&ufile->hw_destroy_rwsem)) { + struct uverbs_attr_bundle attrs = { + .context = uobj->context, + .ufile = ufile, + }; + /* * lookup_get_fd_uobject holds the kref on the struct file any * time a FD uobj is locked, which prevents this release @@ -782,13 +771,14 @@ void uverbs_close_fd(struct file *f) up_read(&ufile->hw_destroy_rwsem); } - /* Matches the get in alloc_begin_fd_uobject */ + /* Matches the get in alloc_commit_fd_uobject() */ kref_put(&ufile->ref, ib_uverbs_release_file); /* Pairs with filp->private_data in alloc_begin_fd_uobject */ uverbs_uobject_put(uobj); + return 0; } -EXPORT_SYMBOL(uverbs_close_fd); +EXPORT_SYMBOL(uverbs_uobject_fd_release); /* * Drop the ucontext off the ufile and completely disconnect it from the @@ -817,6 +807,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, rdma_restrack_del(&ucontext->res); ib_dev->ops.dealloc_ucontext(ucontext); + WARN_ON(!xa_empty(&ucontext->mmap_xa)); kfree(ucontext); ufile->ucontext = NULL; @@ -854,9 +845,7 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, } /* - * Destroy the uncontext and every uobject associated with it. If called with - * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has - * been completed and ufile->ucontext is NULL. + * Destroy the uncontext and every uobject associated with it. * * This is internally locked and can be called in parallel from multiple * contexts. @@ -864,22 +853,6 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, enum rdma_remove_reason reason) { - if (reason == RDMA_REMOVE_CLOSE) { - /* - * During destruction we might trigger something that - * synchronously calls release on any file descriptor. For - * this reason all paths that come from file_operations - * release must use try_lock. They can progress knowing that - * there is an ongoing uverbs_destroy_ufile_hw that will clean - * up the driver resources. - */ - if (!mutex_trylock(&ufile->ucontext_lock)) - return; - - } else { - mutex_lock(&ufile->ucontext_lock); - } - down_write(&ufile->hw_destroy_rwsem); /* @@ -908,7 +881,6 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, done: up_write(&ufile->hw_destroy_rwsem); - mutex_unlock(&ufile->ucontext_lock); } const struct uverbs_obj_type_class uverbs_fd_class = { @@ -919,7 +891,6 @@ const struct uverbs_obj_type_class uverbs_fd_class = { .lookup_put = lookup_put_fd_uobject, .destroy_hw = destroy_hw_fd_uobject, .remove_handle = remove_handle_fd_uobject, - .needs_kfree_rcu = false, }; EXPORT_SYMBOL(uverbs_fd_class); @@ -942,19 +913,17 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, return rdma_lookup_get_uobject(obj, attrs->ufile, id, UVERBS_LOOKUP_WRITE, attrs); case UVERBS_ACCESS_NEW: - return rdma_alloc_begin_uobject(obj, attrs->ufile, attrs); + return rdma_alloc_begin_uobject(obj, attrs); default: WARN_ON(true); return ERR_PTR(-EOPNOTSUPP); } } -int uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, bool commit, - struct uverbs_attr_bundle *attrs) +void uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_obj_access access, bool commit, + struct uverbs_attr_bundle *attrs) { - int ret = 0; - /* * refcounts should be handled at the object level and not at the * uobject level. Refcounts of the objects themselves are done in @@ -974,14 +943,11 @@ int uverbs_finalize_object(struct ib_uobject *uobj, break; case UVERBS_ACCESS_NEW: if (commit) - ret = rdma_alloc_commit_uobject(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); else rdma_alloc_abort_uobject(uobj, attrs); break; default: WARN_ON(true); - ret = -EOPNOTSUPP; } - - return ret; } diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index e63fbda25e1d..33978e0f1262 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -51,29 +51,6 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); /* - * uverbs_uobject_get is called in order to increase the reference count on - * an uobject. This is useful when a handler wants to keep the uobject's memory - * alive, regardless if this uobject is still alive in the context's objects - * repository. Objects are put via uverbs_uobject_put. - */ -void uverbs_uobject_get(struct ib_uobject *uobject); - -/* - * In order to indicate we no longer needs this uobject, uverbs_uobject_put - * is called. When the reference count is decreased, the uobject is freed. - * For example, this is used when attaching a completion channel to a CQ. - */ -void uverbs_uobject_put(struct ib_uobject *uobject); - -/* Indicate this fd is no longer used by this consumer, but its memory isn't - * necessarily released yet. When the last reference is put, we release the - * memory. After this call is executed, calling uverbs_uobject_get isn't - * allowed. - * This must be called from the release file_operations of the file! - */ -void uverbs_close_fd(struct file *f); - -/* * Get an ib_uobject that corresponds to the given id from ufile, assuming * the object is from the given type. Lock it to the required access when * applicable. @@ -86,24 +63,9 @@ struct ib_uobject * uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, s64 id, struct uverbs_attr_bundle *attrs); -/* - * Note that certain finalize stages could return a status: - * (a) alloc_commit could return a failure if the object is committed at the - * same time when the context is destroyed. - * (b) remove_commit could fail if the object wasn't destroyed successfully. - * Since multiple objects could be finalized in one transaction, it is very NOT - * recommended to have several finalize actions which have side effects. - * For example, it's NOT recommended to have a certain action which has both - * a commit action and a destroy action or two destroy objects in the same - * action. The rule of thumb is to have one destroy or commit action with - * multiple lookups. - * The first non zero return value of finalize_object is returned from this - * function. For example, this could happen when we couldn't destroy an - * object. - */ -int uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, bool commit, - struct uverbs_attr_bundle *attrs); +void uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_obj_access access, bool commit, + struct uverbs_attr_bundle *attrs); int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx); @@ -189,6 +151,7 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, unsigned int num_attrs); void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile); +extern const struct uapi_definition uverbs_def_obj_async_fd[]; extern const struct uapi_definition uverbs_def_obj_counters[]; extern const struct uapi_definition uverbs_def_obj_cq[]; extern const struct uapi_definition uverbs_def_obj_device[]; diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index bddff426ee0f..62fbb0ae9cb4 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -107,10 +107,8 @@ void rdma_restrack_clean(struct ib_device *dev) * rdma_restrack_count() - the current usage of specific object * @dev: IB device * @type: actual type of object to operate - * @ns: PID namespace */ -int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, - struct pid_namespace *ns) +int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type) { struct rdma_restrack_root *rt = &dev->res[type]; struct rdma_restrack_entry *e; @@ -118,12 +116,8 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, u32 cnt = 0; xa_lock(&rt->xa); - xas_for_each(&xas, e, U32_MAX) { - if (ns == &init_pid_ns || - (!rdma_is_kernel_res(e) && - ns == task_active_pid_ns(e->task))) - cnt++; - } + xas_for_each(&xas, e, U32_MAX) + cnt++; xa_unlock(&rt->xa); return cnt; } @@ -349,16 +343,3 @@ out: } } EXPORT_SYMBOL(rdma_restrack_del); - -bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res) -{ - /* - * 1. Kern resources should be visible in init - * namespace only - * 2. Present only resources visible in the current - * namespace - */ - if (rdma_is_kernel_res(res)) - return task_active_pid_ns(current) == &init_pid_ns; - return task_active_pid_ns(current) == task_active_pid_ns(res->task); -} diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h index 7bd177cc0a61..d084e5f89849 100644 --- a/drivers/infiniband/core/restrack.h +++ b/drivers/infiniband/core/restrack.h @@ -27,5 +27,4 @@ int rdma_restrack_init(struct ib_device *dev); void rdma_restrack_clean(struct ib_device *dev); void rdma_restrack_attach_task(struct rdma_restrack_entry *res, struct task_struct *task); -bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res); #endif /* _RDMA_CORE_RESTRACK_H_ */ diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index dce06108c8c3..4fad732f9b3c 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -20,14 +20,17 @@ module_param_named(force_mr, rdma_rw_force_mr, bool, 0); MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations"); /* - * Check if the device might use memory registration. This is currently only - * true for iWarp devices. In the future we can hopefully fine tune this based - * on HCA driver input. + * Report whether memory registration should be used. Memory registration must + * be used for iWarp devices because of iWARP-specific limitations. Memory + * registration is also enabled if registering memory might yield better + * performance than using multiple SGE entries, see rdma_rw_io_needs_mr() */ static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num) { if (rdma_protocol_iwarp(dev, port_num)) return true; + if (dev->attrs.max_sgl_rd) + return true; if (unlikely(rdma_rw_force_mr)) return true; return false; @@ -35,17 +38,19 @@ static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num) /* * Check if the device will use memory registration for this RW operation. - * We currently always use memory registrations for iWarp RDMA READs, and - * have a debug option to force usage of MRs. - * - * XXX: In the future we can hopefully fine tune this based on HCA driver - * input. + * For RDMA READs we must use MRs on iWarp and can optionally use them as an + * optimization otherwise. Additionally we have a debug option to force usage + * of MRs to help testing this code path. */ static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num, enum dma_data_direction dir, int dma_nents) { - if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE) - return true; + if (dir == DMA_FROM_DEVICE) { + if (rdma_protocol_iwarp(dev, port_num)) + return true; + if (dev->attrs.max_sgl_rd && dma_nents > dev->attrs.max_sgl_rd) + return true; + } if (unlikely(rdma_rw_force_mr)) return true; return false; @@ -583,8 +588,10 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, break; } - /* P2PDMA contexts do not need to be unmapped */ - if (!is_pci_p2pdma_page(sg_page(sg))) + if (is_pci_p2pdma_page(sg_page(sg))) + pci_p2pdma_unmap_sg(qp->pd->device->dma_device, sg, + sg_cnt, dir); + else ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); } EXPORT_SYMBOL(rdma_rw_ctx_destroy); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 7d8071c7e564..30d4c126a2db 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -860,7 +860,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask) /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); - return rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask); + return rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_mask); } static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) @@ -1068,7 +1068,7 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb, } settimeout_out: - return skb->len; + return 0; } static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh) @@ -1139,7 +1139,7 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb, } resp_out: - return skb->len; + return 0; } static void free_sm_ah(struct kref *kref) @@ -1246,7 +1246,7 @@ static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, * @port_num: Port on the specified device. * @rec: path record entry to use for ah attributes initialization. * @ah_attr: address handle attributes to initialization from path record. - * @sgid_attr: SGID attribute to consider during initialization. + * @gid_attr: SGID attribute to consider during initialization. * * When ib_init_ah_attr_from_path() returns success, * (a) for IB link layer it optionally contains a reference to SGID attribute diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 1ab423b19f77..2b4d80393bd0 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -339,22 +339,16 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, if (!new_pps) return NULL; - if (qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) { - if (!qp_pps) { - new_pps->main.port_num = qp_attr->port_num; - new_pps->main.pkey_index = qp_attr->pkey_index; - } else { - new_pps->main.port_num = (qp_attr_mask & IB_QP_PORT) ? - qp_attr->port_num : - qp_pps->main.port_num; - - new_pps->main.pkey_index = - (qp_attr_mask & IB_QP_PKEY_INDEX) ? - qp_attr->pkey_index : - qp_pps->main.pkey_index; - } + if (qp_attr_mask & IB_QP_PORT) + new_pps->main.port_num = + (qp_pps) ? qp_pps->main.port_num : qp_attr->port_num; + if (qp_attr_mask & IB_QP_PKEY_INDEX) + new_pps->main.pkey_index = (qp_pps) ? qp_pps->main.pkey_index : + qp_attr->pkey_index; + if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) new_pps->main.state = IB_PORT_PKEY_VALID; - } else if (qp_pps) { + + if (!(qp_attr_mask & (IB_QP_PKEY_INDEX || IB_QP_PORT)) && qp_pps) { new_pps->main.port_num = qp_pps->main.port_num; new_pps->main.pkey_index = qp_pps->main.pkey_index; if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) @@ -426,7 +420,7 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev) int ret; rdma_for_each_port (dev, i) { - is_ib = rdma_protocol_ib(dev, i++); + is_ib = rdma_protocol_ib(dev, i); if (is_ib) break; } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index b477295a96c2..087682e6969e 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -289,6 +289,24 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, ib_width_enum_to_int(attr.active_width), speed); } +static const char *phys_state_to_str(enum ib_port_phys_state phys_state) +{ + static const char * phys_state_str[] = { + "<unknown>", + "Sleep", + "Polling", + "Disabled", + "PortConfigurationTraining", + "LinkUp", + "LinkErrorRecovery", + "Phy Test", + }; + + if (phys_state < ARRAY_SIZE(phys_state_str)) + return phys_state_str[phys_state]; + return "<unknown>"; +} + static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused, char *buf) { @@ -300,16 +318,8 @@ static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - switch (attr.phys_state) { - case 1: return sprintf(buf, "1: Sleep\n"); - case 2: return sprintf(buf, "2: Polling\n"); - case 3: return sprintf(buf, "3: Disabled\n"); - case 4: return sprintf(buf, "4: PortConfigurationTraining\n"); - case 5: return sprintf(buf, "5: LinkUp\n"); - case 6: return sprintf(buf, "6: LinkErrorRecovery\n"); - case 7: return sprintf(buf, "7: Phy Test\n"); - default: return sprintf(buf, "%d: <unknown>\n", attr.phys_state); - } + return sprintf(buf, "%d: %s\n", attr.phys_state, + phys_state_to_str(attr.phys_state)); } static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused, @@ -471,8 +481,8 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr, if (!dev->ops.process_mad) return -ENOSYS; - in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); - out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kzalloc(sizeof(*out_mad), GFP_KERNEL); if (!in_mad || !out_mad) { ret = -ENOMEM; goto out; @@ -487,10 +497,8 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr, if (attr != IB_PMA_CLASS_PORT_INFO) in_mad->data[41] = port_num; /* PortSelect field */ - if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, - port_num, NULL, NULL, - (const struct ib_mad_hdr *)in_mad, mad_size, - (struct ib_mad_hdr *)out_mad, &mad_size, + if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, port_num, NULL, NULL, + in_mad, out_mad, &mad_size, &out_mad_pkey_index) & (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) != (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) { @@ -1258,7 +1266,7 @@ static ssize_t node_desc_store(struct device *device, int ret; if (!dev->ops.modify_device) - return -EIO; + return -EOPNOTSUPP; memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX)); ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc); diff --git a/drivers/infiniband/core/trace.c b/drivers/infiniband/core/trace.c new file mode 100644 index 000000000000..6c3514beac4d --- /dev/null +++ b/drivers/infiniband/core/trace.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Trace points for core RDMA functions. + * + * Author: Chuck Lever <chuck.lever@oracle.com> + * + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + */ + +#define CREATE_TRACE_POINTS + +#include <rdma/ib_verbs.h> + +#include <trace/events/rdma_core.h> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 08da840ed7ee..06b6125b5ae1 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -54,10 +54,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) { page = sg_page_iter_page(&sg_iter); - if (umem->writable && dirty) - put_user_pages_dirty_lock(&page, 1); - else - put_user_page(page); + unpin_user_pages_dirty_lock(&page, 1, umem->writable && dirty); } sg_free_table(&umem->sg_head); @@ -169,10 +166,13 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, * for any address. */ mask |= (sg_dma_address(sg) + pgoff) ^ va; - if (i && i != (umem->nmap - 1)) - /* restrict by length as well for interior SGEs */ - mask |= sg_dma_len(sg); va += sg_dma_len(sg) - pgoff; + /* Except for the last entry, the ending iova alignment sets + * the maximum possible page size as the low bits of the iova + * must be zero when starting the next chunk. + */ + if (i != (umem->nmap - 1)) + mask |= va; pgoff = 0; } best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap); @@ -184,19 +184,14 @@ EXPORT_SYMBOL(ib_umem_find_best_pgsz); /** * ib_umem_get - Pin and DMA map userspace memory. * - * If access flags indicate ODP memory, avoid pinning. Instead, stores - * the mm for future page fault handling in conjunction with MMU notifiers. - * - * @udata: userspace context to pin memory for + * @device: IB device to connect UMEM * @addr: userspace virtual address to start at * @size: length of region to pin * @access: IB_ACCESS_xxx flags for memory being pinned - * @dmasync: flush in-flight DMA when the memory region is written */ -struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, - size_t size, int access, int dmasync) +struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, + size_t size, int access) { - struct ib_ucontext *context; struct ib_umem *umem; struct page **page_list; unsigned long lock_limit; @@ -205,21 +200,9 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, struct mm_struct *mm; unsigned long npages; int ret; - unsigned long dma_attrs = 0; struct scatterlist *sg; unsigned int gup_flags = FOLL_WRITE; - if (!udata) - return ERR_PTR(-EIO); - - context = container_of(udata, struct uverbs_attr_bundle, driver_udata) - ->context; - if (!context) - return ERR_PTR(-EIO); - - if (dmasync) - dma_attrs |= DMA_ATTR_WRITE_BARRIER; - /* * If the combination of the addr and size requested for this memory * region causes an integer overflow, return error. @@ -231,36 +214,19 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, if (!can_do_mlock()) return ERR_PTR(-EPERM); - if (access & IB_ACCESS_ON_DEMAND) { - umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL); - if (!umem) - return ERR_PTR(-ENOMEM); - umem->is_odp = 1; - } else { - umem = kzalloc(sizeof(*umem), GFP_KERNEL); - if (!umem) - return ERR_PTR(-ENOMEM); - } + if (access & IB_ACCESS_ON_DEMAND) + return ERR_PTR(-EOPNOTSUPP); - umem->context = context; + umem = kzalloc(sizeof(*umem), GFP_KERNEL); + if (!umem) + return ERR_PTR(-ENOMEM); + umem->ibdev = device; umem->length = size; umem->address = addr; umem->writable = ib_access_writable(access); umem->owning_mm = mm = current->mm; mmgrab(mm); - if (access & IB_ACCESS_ON_DEMAND) { - if (WARN_ON_ONCE(!context->invalidate_range)) { - ret = -EINVAL; - goto umem_kfree; - } - - ret = ib_umem_odp_get(to_ib_umem_odp(umem), access); - if (ret) - goto umem_kfree; - return umem; - } - page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { ret = -ENOMEM; @@ -294,34 +260,28 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, sg = umem->sg_head.sgl; while (npages) { - down_read(&mm->mmap_sem); - ret = get_user_pages(cur_base, - min_t(unsigned long, npages, - PAGE_SIZE / sizeof (struct page *)), - gup_flags | FOLL_LONGTERM, - page_list, NULL); - if (ret < 0) { - up_read(&mm->mmap_sem); + ret = pin_user_pages_fast(cur_base, + min_t(unsigned long, npages, + PAGE_SIZE / + sizeof(struct page *)), + gup_flags | FOLL_LONGTERM, page_list); + if (ret < 0) goto umem_release; - } cur_base += ret * PAGE_SIZE; npages -= ret; sg = ib_umem_add_sg_table(sg, page_list, ret, - dma_get_max_seg_size(context->device->dma_device), + dma_get_max_seg_size(device->dma_device), &umem->sg_nents); - - up_read(&mm->mmap_sem); } sg_mark_end(sg); - umem->nmap = ib_dma_map_sg_attrs(context->device, - umem->sg_head.sgl, - umem->sg_nents, - DMA_BIDIRECTIONAL, - dma_attrs); + umem->nmap = ib_dma_map_sg(device, + umem->sg_head.sgl, + umem->sg_nents, + DMA_BIDIRECTIONAL); if (!umem->nmap) { ret = -ENOMEM; @@ -332,7 +292,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, goto out; umem_release: - __ib_umem_release(context->device, umem, 0); + __ib_umem_release(device, umem, 0); vma: atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm); out: @@ -346,15 +306,6 @@ umem_kfree: } EXPORT_SYMBOL(ib_umem_get); -static void __ib_umem_release_tail(struct ib_umem *umem) -{ - mmdrop(umem->owning_mm); - if (umem->is_odp) - kfree(to_ib_umem_odp(umem)); - else - kfree(umem); -} - /** * ib_umem_release - release memory pinned with ib_umem_get * @umem: umem struct to release @@ -363,30 +314,22 @@ void ib_umem_release(struct ib_umem *umem) { if (!umem) return; + if (umem->is_odp) + return ib_umem_odp_release(to_ib_umem_odp(umem)); - if (umem->is_odp) { - ib_umem_odp_release(to_ib_umem_odp(umem)); - __ib_umem_release_tail(umem); - return; - } - - __ib_umem_release(umem->context->device, umem, 1); + __ib_umem_release(umem->ibdev, umem, 1); atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm); - __ib_umem_release_tail(umem); + mmdrop(umem->owning_mm); + kfree(umem); } EXPORT_SYMBOL(ib_umem_release); int ib_umem_page_count(struct ib_umem *umem) { - int i; - int n; + int i, n = 0; struct scatterlist *sg; - if (umem->is_odp) - return ib_umem_num_pages(umem); - - n = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) n += sg_dma_len(sg) >> PAGE_SHIFT; diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index c0e15db34680..b8c657b28380 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -39,417 +39,211 @@ #include <linux/export.h> #include <linux/vmalloc.h> #include <linux/hugetlb.h> -#include <linux/interval_tree_generic.h> +#include <linux/interval_tree.h> #include <linux/pagemap.h> #include <rdma/ib_verbs.h> #include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> -/* - * The ib_umem list keeps track of memory regions for which the HW - * device request to receive notification when the related memory - * mapping is changed. - * - * ib_umem_lock protects the list. - */ +#include "uverbs.h" -static u64 node_start(struct umem_odp_node *n) +static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp, + const struct mmu_interval_notifier_ops *ops) { - struct ib_umem_odp *umem_odp = - container_of(n, struct ib_umem_odp, interval_tree); - - return ib_umem_start(umem_odp); -} - -/* Note that the representation of the intervals in the interval tree - * considers the ending point as contained in the interval, while the - * function ib_umem_end returns the first address which is not contained - * in the umem. - */ -static u64 node_last(struct umem_odp_node *n) -{ - struct ib_umem_odp *umem_odp = - container_of(n, struct ib_umem_odp, interval_tree); - - return ib_umem_end(umem_odp) - 1; -} - -INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, - node_start, node_last, static, rbt_ib_umem) - -static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp) -{ - mutex_lock(&umem_odp->umem_mutex); - if (umem_odp->notifiers_count++ == 0) - /* - * Initialize the completion object for waiting on - * notifiers. Since notifier_count is zero, no one should be - * waiting right now. - */ - reinit_completion(&umem_odp->notifier_completion); - mutex_unlock(&umem_odp->umem_mutex); -} - -static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp) -{ - mutex_lock(&umem_odp->umem_mutex); - /* - * This sequence increase will notify the QP page fault that the page - * that is going to be mapped in the spte could have been freed. - */ - ++umem_odp->notifiers_seq; - if (--umem_odp->notifiers_count == 0) - complete_all(&umem_odp->notifier_completion); - mutex_unlock(&umem_odp->umem_mutex); -} - -static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp, - u64 start, u64 end, void *cookie) -{ - /* - * Increase the number of notifiers running, to - * prevent any further fault handling on this MR. - */ - ib_umem_notifier_start_account(umem_odp); - complete_all(&umem_odp->notifier_completion); - umem_odp->umem.context->invalidate_range( - umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); - return 0; -} + int ret; -static void ib_umem_notifier_release(struct mmu_notifier *mn, - struct mm_struct *mm) -{ - struct ib_ucontext_per_mm *per_mm = - container_of(mn, struct ib_ucontext_per_mm, mn); - - down_read(&per_mm->umem_rwsem); - if (per_mm->active) - rbt_ib_umem_for_each_in_range( - &per_mm->umem_tree, 0, ULLONG_MAX, - ib_umem_notifier_release_trampoline, true, NULL); - up_read(&per_mm->umem_rwsem); -} + umem_odp->umem.is_odp = 1; + mutex_init(&umem_odp->umem_mutex); -static int invalidate_range_start_trampoline(struct ib_umem_odp *item, - u64 start, u64 end, void *cookie) -{ - ib_umem_notifier_start_account(item); - item->umem.context->invalidate_range(item, start, end); - return 0; -} + if (!umem_odp->is_implicit_odp) { + size_t page_size = 1UL << umem_odp->page_shift; + unsigned long start; + unsigned long end; + size_t pages; + + start = ALIGN_DOWN(umem_odp->umem.address, page_size); + if (check_add_overflow(umem_odp->umem.address, + (unsigned long)umem_odp->umem.length, + &end)) + return -EOVERFLOW; + end = ALIGN(end, page_size); + if (unlikely(end < page_size)) + return -EOVERFLOW; + + pages = (end - start) >> umem_odp->page_shift; + if (!pages) + return -EINVAL; -static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct ib_ucontext_per_mm *per_mm = - container_of(mn, struct ib_ucontext_per_mm, mn); - int rc; + umem_odp->page_list = kvcalloc( + pages, sizeof(*umem_odp->page_list), GFP_KERNEL); + if (!umem_odp->page_list) + return -ENOMEM; - if (mmu_notifier_range_blockable(range)) - down_read(&per_mm->umem_rwsem); - else if (!down_read_trylock(&per_mm->umem_rwsem)) - return -EAGAIN; + umem_odp->dma_list = kvcalloc( + pages, sizeof(*umem_odp->dma_list), GFP_KERNEL); + if (!umem_odp->dma_list) { + ret = -ENOMEM; + goto out_page_list; + } - if (!per_mm->active) { - up_read(&per_mm->umem_rwsem); - /* - * At this point active is permanently set and visible to this - * CPU without a lock, that fact is relied on to skip the unlock - * in range_end. - */ - return 0; + ret = mmu_interval_notifier_insert(&umem_odp->notifier, + umem_odp->umem.owning_mm, + start, end - start, ops); + if (ret) + goto out_dma_list; } - rc = rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start, - range->end, - invalidate_range_start_trampoline, - mmu_notifier_range_blockable(range), - NULL); - if (rc) - up_read(&per_mm->umem_rwsem); - return rc; -} - -static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start, - u64 end, void *cookie) -{ - ib_umem_notifier_end_account(item); return 0; -} - -static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct ib_ucontext_per_mm *per_mm = - container_of(mn, struct ib_ucontext_per_mm, mn); - - if (unlikely(!per_mm->active)) - return; - - rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start, - range->end, - invalidate_range_end_trampoline, true, NULL); - up_read(&per_mm->umem_rwsem); -} -static const struct mmu_notifier_ops ib_umem_notifiers = { - .release = ib_umem_notifier_release, - .invalidate_range_start = ib_umem_notifier_invalidate_range_start, - .invalidate_range_end = ib_umem_notifier_invalidate_range_end, -}; - -static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp) -{ - struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; - - down_write(&per_mm->umem_rwsem); - if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) - rbt_ib_umem_insert(&umem_odp->interval_tree, - &per_mm->umem_tree); - up_write(&per_mm->umem_rwsem); -} - -static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp) -{ - struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; - - down_write(&per_mm->umem_rwsem); - if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) - rbt_ib_umem_remove(&umem_odp->interval_tree, - &per_mm->umem_tree); - complete_all(&umem_odp->notifier_completion); - - up_write(&per_mm->umem_rwsem); +out_dma_list: + kvfree(umem_odp->dma_list); +out_page_list: + kvfree(umem_odp->page_list); + return ret; } -static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx, - struct mm_struct *mm) +/** + * ib_umem_odp_alloc_implicit - Allocate a parent implicit ODP umem + * + * Implicit ODP umems do not have a VA range and do not have any page lists. + * They exist only to hold the per_mm reference to help the driver create + * children umems. + * + * @device: IB device to create UMEM + * @access: ib_reg_mr access flags + */ +struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device, + int access) { - struct ib_ucontext_per_mm *per_mm; + struct ib_umem *umem; + struct ib_umem_odp *umem_odp; int ret; - per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL); - if (!per_mm) - return ERR_PTR(-ENOMEM); - - per_mm->context = ctx; - per_mm->mm = mm; - per_mm->umem_tree = RB_ROOT_CACHED; - init_rwsem(&per_mm->umem_rwsem); - per_mm->active = true; + if (access & IB_ACCESS_HUGETLB) + return ERR_PTR(-EINVAL); - rcu_read_lock(); - per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); - rcu_read_unlock(); - - WARN_ON(mm != current->mm); + umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL); + if (!umem_odp) + return ERR_PTR(-ENOMEM); + umem = &umem_odp->umem; + umem->ibdev = device; + umem->writable = ib_access_writable(access); + umem->owning_mm = current->mm; + umem_odp->is_implicit_odp = 1; + umem_odp->page_shift = PAGE_SHIFT; - per_mm->mn.ops = &ib_umem_notifiers; - ret = mmu_notifier_register(&per_mm->mn, per_mm->mm); + umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); + ret = ib_init_umem_odp(umem_odp, NULL); if (ret) { - dev_err(&ctx->device->dev, - "Failed to register mmu_notifier %d\n", ret); - goto out_pid; - } - - list_add(&per_mm->ucontext_list, &ctx->per_mm_list); - return per_mm; - -out_pid: - put_pid(per_mm->tgid); - kfree(per_mm); - return ERR_PTR(ret); -} - -static int get_per_mm(struct ib_umem_odp *umem_odp) -{ - struct ib_ucontext *ctx = umem_odp->umem.context; - struct ib_ucontext_per_mm *per_mm; - - /* - * Generally speaking we expect only one or two per_mm in this list, - * so no reason to optimize this search today. - */ - mutex_lock(&ctx->per_mm_list_lock); - list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) { - if (per_mm->mm == umem_odp->umem.owning_mm) - goto found; - } - - per_mm = alloc_per_mm(ctx, umem_odp->umem.owning_mm); - if (IS_ERR(per_mm)) { - mutex_unlock(&ctx->per_mm_list_lock); - return PTR_ERR(per_mm); + put_pid(umem_odp->tgid); + kfree(umem_odp); + return ERR_PTR(ret); } - -found: - umem_odp->per_mm = per_mm; - per_mm->odp_mrs_count++; - mutex_unlock(&ctx->per_mm_list_lock); - - return 0; + return umem_odp; } +EXPORT_SYMBOL(ib_umem_odp_alloc_implicit); -static void free_per_mm(struct rcu_head *rcu) -{ - kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu)); -} - -static void put_per_mm(struct ib_umem_odp *umem_odp) +/** + * ib_umem_odp_alloc_child - Allocate a child ODP umem under an implicit + * parent ODP umem + * + * @root: The parent umem enclosing the child. This must be allocated using + * ib_alloc_implicit_odp_umem() + * @addr: The starting userspace VA + * @size: The length of the userspace VA + */ +struct ib_umem_odp * +ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr, + size_t size, + const struct mmu_interval_notifier_ops *ops) { - struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; - struct ib_ucontext *ctx = umem_odp->umem.context; - bool need_free; - - mutex_lock(&ctx->per_mm_list_lock); - umem_odp->per_mm = NULL; - per_mm->odp_mrs_count--; - need_free = per_mm->odp_mrs_count == 0; - if (need_free) - list_del(&per_mm->ucontext_list); - mutex_unlock(&ctx->per_mm_list_lock); - - if (!need_free) - return; - /* - * NOTE! mmu_notifier_unregister() can happen between a start/end - * callback, resulting in an start/end, and thus an unbalanced - * lock. This doesn't really matter to us since we are about to kfree - * the memory that holds the lock, however LOCKDEP doesn't like this. + * Caller must ensure that root cannot be freed during the call to + * ib_alloc_odp_umem. */ - down_write(&per_mm->umem_rwsem); - per_mm->active = false; - up_write(&per_mm->umem_rwsem); - - WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root)); - mmu_notifier_unregister_no_release(&per_mm->mn, per_mm->mm); - put_pid(per_mm->tgid); - mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm); -} - -struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root, - unsigned long addr, size_t size) -{ - struct ib_ucontext_per_mm *per_mm = root->per_mm; - struct ib_ucontext *ctx = per_mm->context; struct ib_umem_odp *odp_data; struct ib_umem *umem; - int pages = size >> PAGE_SHIFT; int ret; + if (WARN_ON(!root->is_implicit_odp)) + return ERR_PTR(-EINVAL); + odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL); if (!odp_data) return ERR_PTR(-ENOMEM); umem = &odp_data->umem; - umem->context = ctx; + umem->ibdev = root->umem.ibdev; umem->length = size; umem->address = addr; - odp_data->page_shift = PAGE_SHIFT; umem->writable = root->umem.writable; - umem->is_odp = 1; - odp_data->per_mm = per_mm; - umem->owning_mm = per_mm->mm; - mmgrab(umem->owning_mm); - - mutex_init(&odp_data->umem_mutex); - init_completion(&odp_data->notifier_completion); - - odp_data->page_list = - vzalloc(array_size(pages, sizeof(*odp_data->page_list))); - if (!odp_data->page_list) { - ret = -ENOMEM; - goto out_odp_data; - } + umem->owning_mm = root->umem.owning_mm; + odp_data->page_shift = PAGE_SHIFT; + odp_data->notifier.ops = ops; - odp_data->dma_list = - vzalloc(array_size(pages, sizeof(*odp_data->dma_list))); - if (!odp_data->dma_list) { - ret = -ENOMEM; - goto out_page_list; + odp_data->tgid = get_pid(root->tgid); + ret = ib_init_umem_odp(odp_data, ops); + if (ret) { + put_pid(odp_data->tgid); + kfree(odp_data); + return ERR_PTR(ret); } - - /* - * Caller must ensure that the umem_odp that the per_mm came from - * cannot be freed during the call to ib_alloc_odp_umem. - */ - mutex_lock(&ctx->per_mm_list_lock); - per_mm->odp_mrs_count++; - mutex_unlock(&ctx->per_mm_list_lock); - add_umem_to_per_mm(odp_data); - return odp_data; - -out_page_list: - vfree(odp_data->page_list); -out_odp_data: - mmdrop(umem->owning_mm); - kfree(odp_data); - return ERR_PTR(ret); } -EXPORT_SYMBOL(ib_alloc_odp_umem); +EXPORT_SYMBOL(ib_umem_odp_alloc_child); -int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) +/** + * ib_umem_odp_get - Create a umem_odp for a userspace va + * + * @device: IB device struct to get UMEM + * @addr: userspace virtual address to start at + * @size: length of region to pin + * @access: IB_ACCESS_xxx flags for memory being pinned + * + * The driver should use when the access flags indicate ODP memory. It avoids + * pinning, instead, stores the mm for future page fault handling in + * conjunction with MMU notifiers. + */ +struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device, + unsigned long addr, size_t size, int access, + const struct mmu_interval_notifier_ops *ops) { - struct ib_umem *umem = &umem_odp->umem; - /* - * NOTE: This must called in a process context where umem->owning_mm - * == current->mm - */ - struct mm_struct *mm = umem->owning_mm; - int ret_val; - - umem_odp->page_shift = PAGE_SHIFT; - if (access & IB_ACCESS_HUGETLB) { - struct vm_area_struct *vma; - struct hstate *h; - - down_read(&mm->mmap_sem); - vma = find_vma(mm, ib_umem_start(umem_odp)); - if (!vma || !is_vm_hugetlb_page(vma)) { - up_read(&mm->mmap_sem); - return -EINVAL; - } - h = hstate_vma(vma); - umem_odp->page_shift = huge_page_shift(h); - up_read(&mm->mmap_sem); - } - - mutex_init(&umem_odp->umem_mutex); - - init_completion(&umem_odp->notifier_completion); - - if (ib_umem_odp_num_pages(umem_odp)) { - umem_odp->page_list = - vzalloc(array_size(sizeof(*umem_odp->page_list), - ib_umem_odp_num_pages(umem_odp))); - if (!umem_odp->page_list) - return -ENOMEM; + struct ib_umem_odp *umem_odp; + struct mm_struct *mm; + int ret; - umem_odp->dma_list = - vzalloc(array_size(sizeof(*umem_odp->dma_list), - ib_umem_odp_num_pages(umem_odp))); - if (!umem_odp->dma_list) { - ret_val = -ENOMEM; - goto out_page_list; - } - } + if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND))) + return ERR_PTR(-EINVAL); - ret_val = get_per_mm(umem_odp); - if (ret_val) - goto out_dma_list; - add_umem_to_per_mm(umem_odp); + umem_odp = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL); + if (!umem_odp) + return ERR_PTR(-ENOMEM); - return 0; + umem_odp->umem.ibdev = device; + umem_odp->umem.length = size; + umem_odp->umem.address = addr; + umem_odp->umem.writable = ib_access_writable(access); + umem_odp->umem.owning_mm = mm = current->mm; + umem_odp->notifier.ops = ops; -out_dma_list: - vfree(umem_odp->dma_list); -out_page_list: - vfree(umem_odp->page_list); - return ret_val; + umem_odp->page_shift = PAGE_SHIFT; +#ifdef CONFIG_HUGETLB_PAGE + if (access & IB_ACCESS_HUGETLB) + umem_odp->page_shift = HPAGE_SHIFT; +#endif + + umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); + ret = ib_init_umem_odp(umem_odp, ops); + if (ret) + goto err_put_pid; + return umem_odp; + +err_put_pid: + put_pid(umem_odp->tgid); + kfree(umem_odp); + return ERR_PTR(ret); } +EXPORT_SYMBOL(ib_umem_odp_get); void ib_umem_odp_release(struct ib_umem_odp *umem_odp) { @@ -459,14 +253,19 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) * It is the driver's responsibility to ensure, before calling us, * that the hardware will not attempt to access the MR any more. */ - ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); - - remove_umem_from_per_mm(umem_odp); - put_per_mm(umem_odp); - vfree(umem_odp->dma_list); - vfree(umem_odp->page_list); + if (!umem_odp->is_implicit_odp) { + mutex_lock(&umem_odp->umem_mutex); + ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); + mutex_unlock(&umem_odp->umem_mutex); + mmu_interval_notifier_remove(&umem_odp->notifier); + kvfree(umem_odp->dma_list); + kvfree(umem_odp->page_list); + put_pid(umem_odp->tgid); + } + kfree(umem_odp); } +EXPORT_SYMBOL(ib_umem_odp_release); /* * Map for DMA and insert a single page into the on-demand paging page tables. @@ -482,29 +281,21 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) * The function returns -EFAULT if the DMA mapping operation fails. It returns * -EAGAIN if a concurrent invalidation prevents us from updating the page. * - * The page is released via put_user_page even if the operation failed. For - * on-demand pinning, the page is released whenever it isn't stored in the - * umem. + * The page is released via put_page even if the operation failed. For on-demand + * pinning, the page is released whenever it isn't stored in the umem. */ static int ib_umem_odp_map_dma_single_page( struct ib_umem_odp *umem_odp, - int page_index, + unsigned int page_index, struct page *page, u64 access_mask, unsigned long current_seq) { - struct ib_ucontext *context = umem_odp->umem.context; - struct ib_device *dev = context->device; + struct ib_device *dev = umem_odp->umem.ibdev; dma_addr_t dma_addr; - int remove_existing_mapping = 0; int ret = 0; - /* - * Note: we avoid writing if seq is different from the initial seq, to - * handle case of a racing notifier. This check also allows us to bail - * early if we have a notifier running in parallel with us. - */ - if (ib_umem_mmu_notifier_retry(umem_odp, current_seq)) { + if (mmu_interval_check_retry(&umem_odp->notifier, current_seq)) { ret = -EAGAIN; goto out; } @@ -522,28 +313,29 @@ static int ib_umem_odp_map_dma_single_page( } else if (umem_odp->page_list[page_index] == page) { umem_odp->dma_list[page_index] |= access_mask; } else { - pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n", - umem_odp->page_list[page_index], page); - /* Better remove the mapping now, to prevent any further - * damage. */ - remove_existing_mapping = 1; - } - -out: - put_user_page(page); - - if (remove_existing_mapping) { - ib_umem_notifier_start_account(umem_odp); - context->invalidate_range( - umem_odp, - ib_umem_start(umem_odp) + - (page_index << umem_odp->page_shift), - ib_umem_start(umem_odp) + - ((page_index + 1) << umem_odp->page_shift)); - ib_umem_notifier_end_account(umem_odp); + /* + * This is a race here where we could have done: + * + * CPU0 CPU1 + * get_user_pages() + * invalidate() + * page_fault() + * mutex_lock(umem_mutex) + * page from GUP != page in ODP + * + * It should be prevented by the retry test above as reading + * the seq number should be reliable under the + * umem_mutex. Thus something is really not working right if + * things get here. + */ + WARN(true, + "Got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n", + umem_odp->page_list[page_index], page); ret = -EAGAIN; } +out: + put_page(page); return ret; } @@ -606,7 +398,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, * existing beyond the lifetime of the originating process.. Presumably * mmget_not_zero will fail in this case. */ - owning_process = get_pid_task(umem_odp->per_mm->tgid, PIDTYPE_PID); + owning_process = get_pid_task(umem_odp->tgid, PIDTYPE_PID); if (!owning_process || !mmget_not_zero(owning_mm)) { ret = -EINVAL; goto out_put_task; @@ -620,7 +412,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, while (bcnt > 0) { const size_t gup_num_pages = min_t(size_t, - (bcnt + BIT(page_shift) - 1) >> page_shift, + ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE, PAGE_SIZE / sizeof(struct page *)); down_read(&owning_mm->mmap_sem); @@ -653,7 +445,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, ret = -EFAULT; break; } - put_user_page(local_page_list[j]); + put_page(local_page_list[j]); continue; } @@ -680,8 +472,8 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, * ib_umem_odp_map_dma_single_page(). */ if (npages - (j + 1) > 0) - put_user_pages(&local_page_list[j+1], - npages - (j + 1)); + release_pages(&local_page_list[j+1], + npages - (j + 1)); break; } } @@ -707,7 +499,9 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, { int idx; u64 addr; - struct ib_device *dev = umem_odp->umem.context->device; + struct ib_device *dev = umem_odp->umem.ibdev; + + lockdep_assert_held(&umem_odp->umem_mutex); virt = max_t(u64, virt, ib_umem_start(umem_odp)); bound = min_t(u64, bound, ib_umem_end(umem_odp)); @@ -716,7 +510,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, * faults from completion. We might be racing with other * invalidations, so we must make sure we free each page only * once. */ - mutex_lock(&umem_odp->umem_mutex); for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) { idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; if (umem_odp->page_list[idx]) { @@ -747,49 +540,5 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, umem_odp->npages--; } } - mutex_unlock(&umem_odp->umem_mutex); } EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); - -/* @last is not a part of the interval. See comment for function - * node_last. - */ -int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, - u64 start, u64 last, - umem_call_back cb, - bool blockable, - void *cookie) -{ - int ret_val = 0; - struct umem_odp_node *node, *next; - struct ib_umem_odp *umem; - - if (unlikely(start == last)) - return ret_val; - - for (node = rbt_ib_umem_iter_first(root, start, last - 1); - node; node = next) { - /* TODO move the blockable decision up to the callback */ - if (!blockable) - return -EAGAIN; - next = rbt_ib_umem_iter_next(node, start, last - 1); - umem = container_of(node, struct ib_umem_odp, interval_tree); - ret_val = cb(umem, start, last, cookie) || ret_val; - } - - return ret_val; -} -EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range); - -struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, - u64 addr, u64 length) -{ - struct umem_odp_node *node; - - node = rbt_ib_umem_iter_first(root, addr, addr + length - 1); - if (node) - return container_of(node, struct ib_umem_odp, interval_tree); - return NULL; - -} -EXPORT_SYMBOL(rbt_ib_umem_lookup); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index ffdeaf6e0b68..1235ffb2389b 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1042,7 +1042,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp) ib_unregister_mad_agent(file->agent[i]); mutex_unlock(&file->port->file_mutex); - + mutex_destroy(&file->mutex); kfree(file); return 0; } @@ -1312,6 +1312,9 @@ static void ib_umad_kill_port(struct ib_umad_port *port) struct ib_umad_file *file; int id; + cdev_device_del(&port->sm_cdev, &port->sm_dev); + cdev_device_del(&port->cdev, &port->dev); + mutex_lock(&port->file_mutex); /* Mark ib_dev NULL and block ioctl or other file ops to progress @@ -1331,8 +1334,6 @@ static void ib_umad_kill_port(struct ib_umad_port *port) mutex_unlock(&port->file_mutex); - cdev_device_del(&port->sm_cdev, &port->sm_dev); - cdev_device_del(&port->cdev, &port->dev); ida_free(&umad_ida, port->dev_num); /* balances device_initialize() */ diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 1e5aeb39f774..7df71983212d 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -98,7 +98,7 @@ ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata, struct ib_uverbs_device { atomic_t refcount; - int num_comp_vectors; + u32 num_comp_vectors; struct completion comp; struct device dev; /* First group for device attributes, NULL terminated array */ @@ -111,7 +111,6 @@ struct ib_uverbs_device { struct srcu_struct disassociate_srcu; struct mutex lists_mutex; /* protect lists */ struct list_head uverbs_file_list; - struct list_head uverbs_events_file_list; struct uverbs_api *uapi; }; @@ -124,10 +123,9 @@ struct ib_uverbs_event_queue { }; struct ib_uverbs_async_event_file { + struct ib_uobject uobj; struct ib_uverbs_event_queue ev_queue; - struct ib_uverbs_file *uverbs_file; - struct kref ref; - struct list_head list; + struct ib_event_handler event_handler; }; struct ib_uverbs_completion_event_file { @@ -144,8 +142,7 @@ struct ib_uverbs_file { * ucontext_lock held */ struct ib_ucontext *ucontext; - struct ib_event_handler event_handler; - struct ib_uverbs_async_event_file *async_file; + struct ib_uverbs_async_event_file *async_file; struct list_head list; /* @@ -183,6 +180,7 @@ struct ib_uverbs_mcast_entry { struct ib_uevent_object { struct ib_uobject uobject; + /* List member for ib_uverbs_async_event_file list */ struct list_head event_list; u32 events_reported; }; @@ -210,25 +208,24 @@ struct ib_uwq_object { }; struct ib_ucq_object { - struct ib_uobject uobject; + struct ib_uevent_object uevent; struct list_head comp_list; - struct list_head async_list; u32 comp_events_reported; - u32 async_events_reported; }; extern const struct file_operations uverbs_event_fops; +extern const struct file_operations uverbs_async_event_fops; void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue); -struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, - struct ib_device *ib_dev); -void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file); +void ib_uverbs_init_async_event_file(struct ib_uverbs_async_event_file *ev_file); +void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue); void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res); -void ib_uverbs_release_ucq(struct ib_uverbs_file *file, - struct ib_uverbs_completion_event_file *ev_file, +int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs); +int ib_init_ucontext(struct uverbs_attr_bundle *attrs); + +void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file, struct ib_ucq_object *uobj); -void ib_uverbs_release_uevent(struct ib_uverbs_file *file, - struct ib_uevent_object *uobj); +void ib_uverbs_release_uevent(struct ib_uevent_object *uobj); void ib_uverbs_release_file(struct kref *ref); void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); @@ -236,8 +233,6 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); -void ib_uverbs_event_handler(struct ib_event_handler *handler, - struct ib_event *event); int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs); @@ -276,23 +271,6 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, size_t kern_filter_sz, union ib_flow_spec *ib_spec); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DEVICE); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_PD); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MR); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_CQ); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_QP); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_AH); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MW); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_SRQ); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_WQ); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS); - /* * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the * PortInfo CapabilityMask, but was extended with unique bits. diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 7ddd0e5bc6b3..025933752e1d 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -203,85 +203,55 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs) #define ib_uverbs_lookup_comp_file(_fd, _ufile) \ _ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile) -static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) +int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_file *file = attrs->ufile; - struct ib_uverbs_get_context cmd; - struct ib_uverbs_get_context_resp resp; - struct ib_ucontext *ucontext; - struct file *filp; - struct ib_rdmacg_object cg_obj; + struct ib_uverbs_file *ufile = attrs->ufile; + struct ib_ucontext *ucontext; struct ib_device *ib_dev; - int ret; - - ret = uverbs_request(attrs, &cmd, sizeof(cmd)); - if (ret) - return ret; - mutex_lock(&file->ucontext_lock); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - ret = -EIO; - goto err; - } - - if (file->ucontext) { - ret = -EINVAL; - goto err; - } - - ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); - if (ret) - goto err; + ib_dev = srcu_dereference(ufile->device->ib_dev, + &ufile->device->disassociate_srcu); + if (!ib_dev) + return -EIO; ucontext = rdma_zalloc_drv_obj(ib_dev, ib_ucontext); - if (!ucontext) { - ret = -ENOMEM; - goto err_alloc; - } - - attrs->context = ucontext; + if (!ucontext) + return -ENOMEM; ucontext->res.type = RDMA_RESTRACK_CTX; ucontext->device = ib_dev; - ucontext->cg_obj = cg_obj; - /* ufile is required when some objects are released */ - ucontext->ufile = file; - - ucontext->closing = false; - ucontext->cleanup_retryable = false; - - mutex_init(&ucontext->per_mm_list_lock); - INIT_LIST_HEAD(&ucontext->per_mm_list); + ucontext->ufile = ufile; + xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC); + attrs->context = ucontext; + return 0; +} - ret = get_unused_fd_flags(O_CLOEXEC); - if (ret < 0) - goto err_free; - resp.async_fd = ret; +int ib_init_ucontext(struct uverbs_attr_bundle *attrs) +{ + struct ib_ucontext *ucontext = attrs->context; + struct ib_uverbs_file *file = attrs->ufile; + int ret; - filp = ib_uverbs_alloc_async_event_file(file, ib_dev); - if (IS_ERR(filp)) { - ret = PTR_ERR(filp); - goto err_fd; + if (!down_read_trylock(&file->hw_destroy_rwsem)) + return -EIO; + mutex_lock(&file->ucontext_lock); + if (file->ucontext) { + ret = -EINVAL; + goto err; } - resp.num_comp_vectors = file->device->num_comp_vectors; - - ret = uverbs_response(attrs, &resp, sizeof(resp)); + ret = ib_rdmacg_try_charge(&ucontext->cg_obj, ucontext->device, + RDMACG_RESOURCE_HCA_HANDLE); if (ret) - goto err_file; + goto err; - ret = ib_dev->ops.alloc_ucontext(ucontext, &attrs->driver_udata); + ret = ucontext->device->ops.alloc_ucontext(ucontext, + &attrs->driver_udata); if (ret) - goto err_file; - if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) - ucontext->invalidate_range = NULL; + goto err_uncharge; rdma_restrack_uadd(&ucontext->res); - fd_install(resp.async_fd, filp); - /* * Make sure that ib_uverbs_get_ucontext() sees the pointer update * only after all writes to setup the ucontext have completed @@ -289,24 +259,62 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) smp_store_release(&file->ucontext, ucontext); mutex_unlock(&file->ucontext_lock); - + up_read(&file->hw_destroy_rwsem); return 0; -err_file: - ib_uverbs_free_async_event_file(file); - fput(filp); +err_uncharge: + ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device, + RDMACG_RESOURCE_HCA_HANDLE); +err: + mutex_unlock(&file->ucontext_lock); + up_read(&file->hw_destroy_rwsem); + return ret; +} -err_fd: - put_unused_fd(resp.async_fd); +static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_get_context_resp resp; + struct ib_uverbs_get_context cmd; + struct ib_device *ib_dev; + struct ib_uobject *uobj; + int ret; -err_free: - kfree(ucontext); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; -err_alloc: - ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); + ret = ib_alloc_ucontext(attrs); + if (ret) + return ret; -err: - mutex_unlock(&file->ucontext_lock); + uobj = uobj_alloc(UVERBS_OBJECT_ASYNC_EVENT, attrs, &ib_dev); + if (IS_ERR(uobj)) { + ret = PTR_ERR(uobj); + goto err_ucontext; + } + + resp = (struct ib_uverbs_get_context_resp){ + .num_comp_vectors = attrs->ufile->device->num_comp_vectors, + .async_fd = uobj->id, + }; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) + goto err_uobj; + + ret = ib_init_ucontext(attrs); + if (ret) + goto err_uobj; + + ib_uverbs_init_async_event_file( + container_of(uobj, struct ib_uverbs_async_event_file, uobj)); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; + +err_uobj: + rdma_alloc_abort_uobject(uobj, attrs); +err_ucontext: + kfree(attrs->context); + attrs->context = NULL; return ret; } @@ -449,7 +457,8 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) if (ret) goto err_copy; - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: ib_dealloc_pd_user(pd, uverbs_get_cleared_udata(attrs)); @@ -645,7 +654,8 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) mutex_unlock(&ibudev->xrcd_tree_mutex); - return uobj_alloc_commit(&obj->uobject, attrs); + rdma_alloc_commit_uobject(&obj->uobject, attrs); + return 0; err_copy: if (inode) { @@ -777,7 +787,8 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs)); @@ -931,7 +942,8 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs) goto err_copy; uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: uverbs_dealloc_mw(mw); @@ -983,7 +995,8 @@ static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs) return ret; } - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; } static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, @@ -1013,11 +1026,9 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, } } - obj->uobject.user_handle = cmd->user_handle; - obj->comp_events_reported = 0; - obj->async_events_reported = 0; + obj->uevent.uobject.user_handle = cmd->user_handle; INIT_LIST_HEAD(&obj->comp_list); - INIT_LIST_HEAD(&obj->async_list); + INIT_LIST_HEAD(&obj->uevent.event_list); attr.cqe = cmd->cqe; attr.comp_vector = cmd->comp_vector; @@ -1029,7 +1040,7 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, goto err_file; } cq->device = ib_dev; - cq->uobject = &obj->uobject; + cq->uobject = obj; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; @@ -1039,9 +1050,9 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, if (ret) goto err_free; - obj->uobject.object = cq; + obj->uevent.uobject.object = cq; memset(&resp, 0, sizeof resp); - resp.base.cq_handle = obj->uobject.id; + resp.base.cq_handle = obj->uevent.uobject.id; resp.base.cqe = cq->cqe; resp.response_length = uverbs_response_length(attrs, sizeof(resp)); @@ -1052,9 +1063,7 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, if (ret) goto err_cb; - ret = uobj_alloc_commit(&obj->uobject, attrs); - if (ret) - return ERR_PTR(ret); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); return obj; err_cb: @@ -1064,10 +1073,10 @@ err_free: kfree(cq); err_file: if (ev_file) - ib_uverbs_release_ucq(attrs->ufile, ev_file, obj); + ib_uverbs_release_ucq(ev_file, obj); err: - uobj_alloc_abort(&obj->uobject, attrs); + uobj_alloc_abort(&obj->uevent.uobject, attrs); return ERR_PTR(ret); } @@ -1136,7 +1145,8 @@ static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs) ret = uverbs_response(attrs, &resp, sizeof(resp)); out: - uobj_put_obj_read(cq); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -1219,7 +1229,8 @@ static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs) ret = uverbs_output_written(attrs, UVERBS_ATTR_CORE_OUT); out_put: - uobj_put_obj_read(cq); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -1240,8 +1251,8 @@ static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs) ib_req_notify_cq(cq, cmd.solicited_only ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); - uobj_put_obj_read(cq); - + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return 0; } @@ -1261,10 +1272,10 @@ static int ib_uverbs_destroy_cq(struct uverbs_attr_bundle *attrs) if (IS_ERR(uobj)) return PTR_ERR(uobj); - obj = container_of(uobj, struct ib_ucq_object, uobject); + obj = container_of(uobj, struct ib_ucq_object, uevent.uobject); memset(&resp, 0, sizeof(resp)); resp.comp_events_reported = obj->comp_events_reported; - resp.async_events_reported = obj->async_events_reported; + resp.async_events_reported = obj->uevent.events_reported; uobj_put_destroy(uobj); @@ -1378,7 +1389,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs, } attr.event_handler = ib_uverbs_qp_event_handler; - attr.qp_context = attrs->ufile; attr.send_cq = scq; attr.recv_cq = rcq; attr.srq = srq; @@ -1394,7 +1404,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs, attr.cap.max_recv_sge = cmd->max_recv_sge; attr.cap.max_inline_data = cmd->max_inline_data; - obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); @@ -1424,7 +1433,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, qp = ib_create_qp(pd, &attr); else qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, - &obj->uevent.uobject); + obj); if (IS_ERR(qp)) { ret = PTR_ERR(qp); @@ -1442,7 +1451,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs, qp->srq = attr.srq; qp->rwq_ind_tbl = ind_tbl; qp->event_handler = attr.event_handler; - qp->qp_context = attr.qp_context; qp->qp_type = attr.qp_type; atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); @@ -1457,7 +1465,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, atomic_inc(&ind_tbl->usecnt); } else { /* It is done in _ib_create_qp for other QP types */ - qp->uobject = &obj->uevent.uobject; + qp->uobject = obj; } obj->uevent.uobject.object = qp; @@ -1486,15 +1494,19 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (pd) uobj_put_obj_read(pd); if (scq) - uobj_put_obj_read(scq); + rdma_lookup_put_uobject(&scq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (rcq && rcq != scq) - uobj_put_obj_read(rcq); + rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (srq) - uobj_put_obj_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ind_tbl) uobj_put_obj_read(ind_tbl); - return uobj_alloc_commit(&obj->uevent.uobject, attrs); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); + return 0; err_cb: ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); @@ -1504,11 +1516,14 @@ err_put: if (pd) uobj_put_obj_read(pd); if (scq) - uobj_put_obj_read(scq); + rdma_lookup_put_uobject(&scq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (rcq && rcq != scq) - uobj_put_obj_read(rcq); + rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (srq) - uobj_put_obj_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ind_tbl) uobj_put_obj_read(ind_tbl); @@ -1570,7 +1585,7 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) struct ib_xrcd *xrcd; struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_qp *qp; - struct ib_qp_open_attr attr; + struct ib_qp_open_attr attr = {}; int ret; struct ib_device *ib_dev; @@ -1596,11 +1611,9 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) } attr.event_handler = ib_uverbs_qp_event_handler; - attr.qp_context = attrs->ufile; attr.qp_num = cmd.qpn; attr.qp_type = cmd.qp_type; - obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); @@ -1623,10 +1636,11 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); - qp->uobject = &obj->uevent.uobject; + qp->uobject = obj; uobj_put_read(xrcd_uobj); - return uobj_alloc_commit(&obj->uevent.uobject, attrs); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); + return 0; err_destroy: ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); @@ -1687,7 +1701,8 @@ static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs) ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ret) goto out; @@ -1924,7 +1939,8 @@ static int modify_qp(struct uverbs_attr_bundle *attrs, &attrs->driver_udata); release_qp: - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); out: kfree(attr); @@ -2188,7 +2204,8 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs) ret = ret2; out_put: - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); while (wr) { if (is_ud && ud_wr(wr)->ah) @@ -2330,7 +2347,8 @@ static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs) resp.bad_wr = 0; ret = qp->device->ops.post_recv(qp->real_qp, wr, &bad_wr); - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ret) { for (next = wr; next; next = next->next) { ++resp.bad_wr; @@ -2380,7 +2398,8 @@ static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs) resp.bad_wr = 0; ret = srq->device->ops.post_srq_recv(srq, wr, &bad_wr); - uobj_put_obj_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ret) for (next = wr; next; next = next->next) { @@ -2468,7 +2487,8 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs) goto err_copy; uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: rdma_destroy_ah_user(ah, RDMA_DESTROY_AH_SLEEPABLE, @@ -2510,7 +2530,7 @@ static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs) if (!qp) return -EINVAL; - obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + obj = qp->uobject; mutex_lock(&obj->mcast_lock); list_for_each_entry(mcast, &obj->mcast_list, list) @@ -2537,7 +2557,8 @@ static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs) out_put: mutex_unlock(&obj->mcast_lock); - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -2559,7 +2580,7 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs) if (!qp) return -EINVAL; - obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + obj = qp->uobject; mutex_lock(&obj->mcast_lock); list_for_each_entry(mcast, &obj->mcast_list, list) @@ -2580,7 +2601,8 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs) out_put: mutex_unlock(&obj->mcast_lock); - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -2723,12 +2745,6 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs, return 0; } -static size_t kern_spec_filter_sz(const struct ib_uverbs_flow_spec_hdr *spec) -{ - /* Returns user space filter size, includes padding */ - return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2; -} - static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size, u16 ib_real_filter_sz) { @@ -2872,11 +2888,16 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec) { - ssize_t kern_filter_sz; + size_t kern_filter_sz; void *kern_spec_mask; void *kern_spec_val; - kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); + if (check_sub_overflow((size_t)kern_spec->hdr.size, + sizeof(struct ib_uverbs_flow_spec_hdr), + &kern_filter_sz)) + return -EINVAL; + + kern_filter_sz /= 2; kern_spec_val = (void *)kern_spec + sizeof(struct ib_uverbs_flow_spec_hdr); @@ -2946,7 +2967,6 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) wq_init_attr.wq_type = cmd.wq_type; wq_init_attr.event_handler = ib_uverbs_wq_event_handler; wq_init_attr.create_flags = cmd.create_flags; - obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata); @@ -2955,7 +2975,7 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) goto err_put_cq; } - wq->uobject = &obj->uevent.uobject; + wq->uobject = obj; obj->uevent.uobject.object = wq; wq->wq_type = wq_init_attr.wq_type; wq->cq = cq; @@ -2965,7 +2985,7 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) atomic_set(&wq->usecnt, 0); atomic_inc(&pd->usecnt); atomic_inc(&cq->usecnt); - wq->uobject = &obj->uevent.uobject; + wq->uobject = obj; obj->uevent.uobject.object = wq; memset(&resp, 0, sizeof(resp)); @@ -2979,13 +2999,16 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) goto err_copy; uobj_put_obj_read(pd); - uobj_put_obj_read(cq); - return uobj_alloc_commit(&obj->uevent.uobject, attrs); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); + return 0; err_copy: ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs)); err_put_cq: - uobj_put_obj_read(cq); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_put_pd: uobj_put_obj_read(pd); err_uobj: @@ -3051,7 +3074,8 @@ static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs) } ret = wq->device->ops.modify_wq(wq, &wq_attr, cmd.attr_mask, &attrs->driver_udata); - uobj_put_obj_read(wq); + rdma_lookup_put_uobject(&wq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -3152,9 +3176,11 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) kfree(wqs_handles); for (j = 0; j < num_read_wqs; j++) - uobj_put_obj_read(wqs[j]); + rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: ib_destroy_rwq_ind_table(rwq_ind_tbl); @@ -3162,7 +3188,8 @@ err_uobj: uobj_alloc_abort(uobj, attrs); put_wqs: for (j = 0; j < num_read_wqs; j++) - uobj_put_obj_read(wqs[j]); + rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_free: kfree(wqs_handles); kfree(wqs); @@ -3328,11 +3355,13 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) if (err) goto err_copy; - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: if (!qp->device->ops.destroy_flow(flow_id)) atomic_dec(&qp->usecnt); @@ -3341,7 +3370,8 @@ err_free: err_free_flow_attr: kfree(flow_attr); err_put: - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_uobj: uobj_alloc_abort(uobj, attrs); err_free_attr: @@ -3426,7 +3456,6 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, attr.attr.max_sge = cmd->max_sge; attr.attr.srq_limit = cmd->srq_limit; - obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); srq = rdma_zalloc_drv_obj(ib_dev, ib_srq); @@ -3438,7 +3467,7 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, srq->device = pd->device; srq->pd = pd; srq->srq_type = cmd->srq_type; - srq->uobject = &obj->uevent.uobject; + srq->uobject = obj; srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; @@ -3477,14 +3506,17 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, uobj_put_read(xrcd_uobj); if (ib_srq_has_cq(cmd->srq_type)) - uobj_put_obj_read(attr.ext.cq); + rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); uobj_put_obj_read(pd); - return uobj_alloc_commit(&obj->uevent.uobject, attrs); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); + return 0; err_copy: ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs)); - + /* It was released in ib_destroy_srq_user */ + srq = NULL; err_free: kfree(srq); err_put: @@ -3492,7 +3524,8 @@ err_put: err_put_cq: if (ib_srq_has_cq(cmd->srq_type)) - uobj_put_obj_read(attr.ext.cq); + rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_put_xrcd: if (cmd->srq_type == IB_SRQT_XRC) { @@ -3560,7 +3593,8 @@ static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs) ret = srq->device->ops.modify_srq(srq, &attr, cmd.attr_mask, &attrs->driver_udata); - uobj_put_obj_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -3583,7 +3617,8 @@ static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs) ret = ib_query_srq(srq, &attr); - uobj_put_obj_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ret) return ret; @@ -3708,8 +3743,8 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs) ret = rdma_set_cq_moderation(cq, cmd.attr.cq_count, cmd.attr.cq_period); - uobj_put_obj_read(cq); - + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 61758201d9b2..538affbc517e 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -220,24 +220,17 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle, return ret; } -static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi, - struct uverbs_objs_arr_attr *attr, - bool commit, struct uverbs_attr_bundle *attrs) +static void uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi, + struct uverbs_objs_arr_attr *attr, + bool commit, + struct uverbs_attr_bundle *attrs) { const struct uverbs_attr_spec *spec = &attr_uapi->spec; - int current_ret; - int ret = 0; size_t i; - for (i = 0; i != attr->len; i++) { - current_ret = uverbs_finalize_object(attr->uobjects[i], - spec->u2.objs_arr.access, - commit, attrs); - if (!ret) - ret = current_ret; - } - - return ret; + for (i = 0; i != attr->len; i++) + uverbs_finalize_object(attr->uobjects[i], + spec->u2.objs_arr.access, commit, attrs); } static int uverbs_process_attr(struct bundle_priv *pbundle, @@ -495,26 +488,22 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, return ret; } -static int bundle_destroy(struct bundle_priv *pbundle, bool commit) +static void bundle_destroy(struct bundle_priv *pbundle, bool commit) { unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len; struct bundle_alloc_head *memblock; unsigned int i; - int ret = 0; /* fast path for simple uobjects */ i = -1; while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len, i + 1)) < key_bitmap_len) { struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; - int current_ret; - current_ret = uverbs_finalize_object( + uverbs_finalize_object( attr->obj_attr.uobject, attr->obj_attr.attr_elm->spec.u.obj.access, commit, &pbundle->bundle); - if (!ret) - ret = current_ret; } i = -1; @@ -523,7 +512,6 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit) struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; const struct uverbs_api_attr *attr_uapi; void __rcu **slot; - int current_ret; slot = uapi_get_attr_for_method( pbundle, @@ -534,11 +522,8 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit) attr_uapi = rcu_dereference_protected(*slot, true); if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) { - current_ret = uverbs_free_idrs_array( - attr_uapi, &attr->objs_arr_attr, commit, - &pbundle->bundle); - if (!ret) - ret = current_ret; + uverbs_free_idrs_array(attr_uapi, &attr->objs_arr_attr, + commit, &pbundle->bundle); } } @@ -548,8 +533,6 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit) memblock = memblock->next; kvfree(tmp); } - - return ret; } static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, @@ -562,7 +545,6 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, struct bundle_priv *pbundle; struct bundle_priv onstack; void __rcu **slot; - int destroy_ret; int ret; if (unlikely(hdr->driver_id != uapi->driver_id)) @@ -610,10 +592,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize)); ret = ib_uverbs_run_method(pbundle, hdr->num_attrs); - destroy_ret = bundle_destroy(pbundle, ret == 0); - if (unlikely(destroy_ret && !ret)) - return destroy_ret; - + bundle_destroy(pbundle, ret == 0); return ret; } @@ -795,6 +774,9 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, { const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); + if (IS_ERR(attr)) + return PTR_ERR(attr); + if (size < attr->ptr_attr.len) { if (clear_user(u64_to_user_ptr(attr->ptr_attr.data) + size, attr->ptr_attr.len - size)) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 11c13c1381cf..2d4083bf4a04 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -120,20 +120,13 @@ static void ib_uverbs_release_dev(struct device *device) uverbs_destroy_api(dev->uapi); cleanup_srcu_struct(&dev->disassociate_srcu); + mutex_destroy(&dev->lists_mutex); + mutex_destroy(&dev->xrcd_tree_mutex); kfree(dev); } -static void ib_uverbs_release_async_event_file(struct kref *ref) -{ - struct ib_uverbs_async_event_file *file = - container_of(ref, struct ib_uverbs_async_event_file, ref); - - kfree(file); -} - -void ib_uverbs_release_ucq(struct ib_uverbs_file *file, - struct ib_uverbs_completion_event_file *ev_file, - struct ib_ucq_object *uobj) +void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file, + struct ib_ucq_object *uobj) { struct ib_uverbs_event *evt, *tmp; @@ -148,25 +141,24 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file, uverbs_uobject_put(&ev_file->uobj); } - spin_lock_irq(&file->async_file->ev_queue.lock); - list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file->ev_queue.lock); + ib_uverbs_release_uevent(&uobj->uevent); } -void ib_uverbs_release_uevent(struct ib_uverbs_file *file, - struct ib_uevent_object *uobj) +void ib_uverbs_release_uevent(struct ib_uevent_object *uobj) { + struct ib_uverbs_async_event_file *async_file = + READ_ONCE(uobj->uobject.ufile->async_file); struct ib_uverbs_event *evt, *tmp; - spin_lock_irq(&file->async_file->ev_queue.lock); + if (!async_file) + return; + + spin_lock_irq(&async_file->ev_queue.lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file->ev_queue.lock); + spin_unlock_irq(&async_file->ev_queue.lock); } void ib_uverbs_detach_umcast(struct ib_qp *qp, @@ -206,17 +198,17 @@ void ib_uverbs_release_file(struct kref *ref) ib_uverbs_comp_dev(file->device); if (file->async_file) - kref_put(&file->async_file->ref, - ib_uverbs_release_async_event_file); + uverbs_uobject_put(&file->async_file->uobj); put_device(&file->device->dev); if (file->disassociate_page) __free_pages(file->disassociate_page, 0); + mutex_destroy(&file->umap_lock); + mutex_destroy(&file->ucontext_lock); kfree(file); } static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, - struct ib_uverbs_file *uverbs_file, struct file *filp, char __user *buf, size_t count, loff_t *pos, size_t eventsz) @@ -234,19 +226,16 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, if (wait_event_interruptible(ev_queue->poll_wait, (!list_empty(&ev_queue->event_list) || - /* The barriers built into wait_event_interruptible() - * and wake_up() guarentee this will see the null set - * without using RCU - */ - !uverbs_file->device->ib_dev))) + ev_queue->is_closed))) return -ERESTARTSYS; + spin_lock_irq(&ev_queue->lock); + /* If device was disassociated and no event exists set an error */ - if (list_empty(&ev_queue->event_list) && - !uverbs_file->device->ib_dev) + if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) { + spin_unlock_irq(&ev_queue->lock); return -EIO; - - spin_lock_irq(&ev_queue->lock); + } } event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list); @@ -281,8 +270,7 @@ static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf, { struct ib_uverbs_async_event_file *file = filp->private_data; - return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp, - buf, count, pos, + return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos, sizeof(struct ib_uverbs_async_event_desc)); } @@ -292,9 +280,8 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, struct ib_uverbs_completion_event_file *comp_ev_file = filp->private_data; - return ib_uverbs_event_read(&comp_ev_file->ev_queue, - comp_ev_file->uobj.ufile, filp, - buf, count, pos, + return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count, + pos, sizeof(struct ib_uverbs_comp_event_desc)); } @@ -317,7 +304,9 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue, static __poll_t ib_uverbs_async_event_poll(struct file *filp, struct poll_table_struct *wait) { - return ib_uverbs_event_poll(filp->private_data, filp, wait); + struct ib_uverbs_async_event_file *file = filp->private_data; + + return ib_uverbs_event_poll(&file->ev_queue, filp, wait); } static __poll_t ib_uverbs_comp_event_poll(struct file *filp, @@ -331,9 +320,9 @@ static __poll_t ib_uverbs_comp_event_poll(struct file *filp, static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on) { - struct ib_uverbs_event_queue *ev_queue = filp->private_data; + struct ib_uverbs_async_event_file *file = filp->private_data; - return fasync_helper(fd, filp, on, &ev_queue->async_queue); + return fasync_helper(fd, filp, on, &file->ev_queue.async_queue); } static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) @@ -344,70 +333,20 @@ static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue); } -static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) -{ - struct ib_uverbs_async_event_file *file = filp->private_data; - struct ib_uverbs_file *uverbs_file = file->uverbs_file; - struct ib_uverbs_event *entry, *tmp; - int closed_already = 0; - - mutex_lock(&uverbs_file->device->lists_mutex); - spin_lock_irq(&file->ev_queue.lock); - closed_already = file->ev_queue.is_closed; - file->ev_queue.is_closed = 1; - list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { - if (entry->counter) - list_del(&entry->obj_list); - kfree(entry); - } - spin_unlock_irq(&file->ev_queue.lock); - if (!closed_already) { - list_del(&file->list); - ib_unregister_event_handler(&uverbs_file->event_handler); - } - mutex_unlock(&uverbs_file->device->lists_mutex); - - kref_put(&uverbs_file->ref, ib_uverbs_release_file); - kref_put(&file->ref, ib_uverbs_release_async_event_file); - - return 0; -} - -static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) -{ - struct ib_uobject *uobj = filp->private_data; - struct ib_uverbs_completion_event_file *file = container_of( - uobj, struct ib_uverbs_completion_event_file, uobj); - struct ib_uverbs_event *entry, *tmp; - - spin_lock_irq(&file->ev_queue.lock); - list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { - if (entry->counter) - list_del(&entry->obj_list); - kfree(entry); - } - file->ev_queue.is_closed = 1; - spin_unlock_irq(&file->ev_queue.lock); - - uverbs_close_fd(filp); - - return 0; -} - const struct file_operations uverbs_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_comp_event_read, .poll = ib_uverbs_comp_event_poll, - .release = ib_uverbs_comp_event_close, + .release = uverbs_uobject_fd_release, .fasync = ib_uverbs_comp_event_fasync, .llseek = no_llseek, }; -static const struct file_operations uverbs_async_event_fops = { +const struct file_operations uverbs_async_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_async_event_read, .poll = ib_uverbs_async_event_poll, - .release = ib_uverbs_async_event_close, + .release = uverbs_uobject_fd_release, .fasync = ib_uverbs_async_event_fasync, .llseek = no_llseek, }; @@ -434,9 +373,9 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) return; } - uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + uobj = cq->uobject; - entry->desc.comp.cq_handle = cq->uobject->user_handle; + entry->desc.comp.cq_handle = cq->uobject->uevent.uobject.user_handle; entry->counter = &uobj->comp_events_reported; list_add_tail(&entry->list, &ev_queue->event_list); @@ -447,102 +386,82 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN); } -static void ib_uverbs_async_handler(struct ib_uverbs_file *file, - __u64 element, __u64 event, - struct list_head *obj_list, - u32 *counter) +static void +ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file, + __u64 element, __u64 event, struct list_head *obj_list, + u32 *counter) { struct ib_uverbs_event *entry; unsigned long flags; - spin_lock_irqsave(&file->async_file->ev_queue.lock, flags); - if (file->async_file->ev_queue.is_closed) { - spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); + if (!async_file) + return; + + spin_lock_irqsave(&async_file->ev_queue.lock, flags); + if (async_file->ev_queue.is_closed) { + spin_unlock_irqrestore(&async_file->ev_queue.lock, flags); return; } entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) { - spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); + spin_unlock_irqrestore(&async_file->ev_queue.lock, flags); return; } - entry->desc.async.element = element; + entry->desc.async.element = element; entry->desc.async.event_type = event; - entry->desc.async.reserved = 0; - entry->counter = counter; + entry->desc.async.reserved = 0; + entry->counter = counter; - list_add_tail(&entry->list, &file->async_file->ev_queue.event_list); + list_add_tail(&entry->list, &async_file->ev_queue.event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); - spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); + spin_unlock_irqrestore(&async_file->ev_queue.lock, flags); - wake_up_interruptible(&file->async_file->ev_queue.poll_wait); - kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&async_file->ev_queue.poll_wait); + kill_fasync(&async_file->ev_queue.async_queue, SIGIO, POLL_IN); } -void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) +static void uverbs_uobj_event(struct ib_uevent_object *eobj, + struct ib_event *event) { - struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, - struct ib_ucq_object, uobject); + ib_uverbs_async_handler(READ_ONCE(eobj->uobject.ufile->async_file), + eobj->uobject.user_handle, event->event, + &eobj->event_list, &eobj->events_reported); +} - ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle, - event->event, &uobj->async_list, - &uobj->async_events_reported); +void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) +{ + uverbs_uobj_event(&event->element.cq->uobject->uevent, event); } void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) { - struct ib_uevent_object *uobj; - /* for XRC target qp's, check that qp is live */ if (!event->element.qp->uobject) return; - uobj = container_of(event->element.qp->uobject, - struct ib_uevent_object, uobject); - - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, - event->event, &uobj->event_list, - &uobj->events_reported); + uverbs_uobj_event(&event->element.qp->uobject->uevent, event); } void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) { - struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, - struct ib_uevent_object, uobject); - - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, - event->event, &uobj->event_list, - &uobj->events_reported); + uverbs_uobj_event(&event->element.wq->uobject->uevent, event); } void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { - struct ib_uevent_object *uobj; - - uobj = container_of(event->element.srq->uobject, - struct ib_uevent_object, uobject); - - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, - event->event, &uobj->event_list, - &uobj->events_reported); -} - -void ib_uverbs_event_handler(struct ib_event_handler *handler, - struct ib_event *event) -{ - struct ib_uverbs_file *file = - container_of(handler, struct ib_uverbs_file, event_handler); - - ib_uverbs_async_handler(file, event->element.port_num, event->event, - NULL, NULL); + uverbs_uobj_event(&event->element.srq->uobject->uevent, event); } -void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) +static void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event) { - kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file); - file->async_file = NULL; + ib_uverbs_async_handler( + container_of(handler, struct ib_uverbs_async_event_file, + event_handler), + event->element.port_num, event->event, NULL, NULL); } void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue) @@ -554,45 +473,26 @@ void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue) ev_queue->async_queue = NULL; } -struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, - struct ib_device *ib_dev) +void ib_uverbs_init_async_event_file( + struct ib_uverbs_async_event_file *async_file) { - struct ib_uverbs_async_event_file *ev_file; - struct file *filp; - - ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL); - if (!ev_file) - return ERR_PTR(-ENOMEM); - - ib_uverbs_init_event_queue(&ev_file->ev_queue); - ev_file->uverbs_file = uverbs_file; - kref_get(&ev_file->uverbs_file->ref); - kref_init(&ev_file->ref); - filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops, - ev_file, O_RDONLY); - if (IS_ERR(filp)) - goto err_put_refs; - - mutex_lock(&uverbs_file->device->lists_mutex); - list_add_tail(&ev_file->list, - &uverbs_file->device->uverbs_events_file_list); - mutex_unlock(&uverbs_file->device->lists_mutex); - - WARN_ON(uverbs_file->async_file); - uverbs_file->async_file = ev_file; - kref_get(&uverbs_file->async_file->ref); - INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, - ib_dev, - ib_uverbs_event_handler); - ib_register_event_handler(&uverbs_file->event_handler); - /* At that point async file stuff was fully set */ + struct ib_uverbs_file *uverbs_file = async_file->uobj.ufile; + struct ib_device *ib_dev = async_file->uobj.context->device; + + ib_uverbs_init_event_queue(&async_file->ev_queue); - return filp; + /* The first async_event_file becomes the default one for the file. */ + mutex_lock(&uverbs_file->ucontext_lock); + if (!uverbs_file->async_file) { + /* Pairs with the put in ib_uverbs_release_file */ + uverbs_uobject_get(&async_file->uobj); + smp_store_release(&uverbs_file->async_file, async_file); + } + mutex_unlock(&uverbs_file->ucontext_lock); -err_put_refs: - kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file); - kref_put(&ev_file->ref, ib_uverbs_release_async_event_file); - return filp; + INIT_IB_EVENT_HANDLER(&async_file->event_handler, ib_dev, + ib_uverbs_event_handler); + ib_register_event_handler(&async_file->event_handler); } static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, @@ -768,6 +668,8 @@ out_unlock: return (ret) ? : count; } +static const struct vm_operations_struct rdma_umap_ops; + static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) { struct ib_uverbs_file *file = filp->private_data; @@ -781,7 +683,7 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) ret = PTR_ERR(ucontext); goto out; } - + vma->vm_ops = &rdma_umap_ops; ret = ucontext->device->ops.mmap(ucontext, vma); out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); @@ -789,38 +691,6 @@ out: } /* - * Each time we map IO memory into user space this keeps track of the mapping. - * When the device is hot-unplugged we 'zap' the mmaps in user space to point - * to the zero page and allow the hot unplug to proceed. - * - * This is necessary for cases like PCI physical hot unplug as the actual BAR - * memory may vanish after this and access to it from userspace could MCE. - * - * RDMA drivers supporting disassociation must have their user space designed - * to cope in some way with their IO pages going to the zero page. - */ -struct rdma_umap_priv { - struct vm_area_struct *vma; - struct list_head list; -}; - -static const struct vm_operations_struct rdma_umap_ops; - -static void rdma_umap_priv_init(struct rdma_umap_priv *priv, - struct vm_area_struct *vma) -{ - struct ib_uverbs_file *ufile = vma->vm_file->private_data; - - priv->vma = vma; - vma->vm_private_data = priv; - vma->vm_ops = &rdma_umap_ops; - - mutex_lock(&ufile->umap_lock); - list_add(&priv->list, &ufile->umaps); - mutex_unlock(&ufile->umap_lock); -} - -/* * The VMA has been dup'd, initialize the vm_private_data with a new tracking * struct */ @@ -845,7 +715,7 @@ static void rdma_umap_open(struct vm_area_struct *vma) priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) goto out_unlock; - rdma_umap_priv_init(priv, vma); + rdma_umap_priv_init(priv, vma, opriv->entry); up_read(&ufile->hw_destroy_rwsem); return; @@ -876,6 +746,9 @@ static void rdma_umap_close(struct vm_area_struct *vma) * this point. */ mutex_lock(&ufile->umap_lock); + if (priv->entry) + rdma_user_mmap_entry_put(priv->entry); + list_del(&priv->list); mutex_unlock(&ufile->umap_lock); kfree(priv); @@ -927,44 +800,6 @@ static const struct vm_operations_struct rdma_umap_ops = { .fault = rdma_umap_fault, }; -/* - * Map IO memory into a process. This is to be called by drivers as part of - * their mmap() functions if they wish to send something like PCI-E BAR memory - * to userspace. - */ -int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, pgprot_t prot) -{ - struct ib_uverbs_file *ufile = ucontext->ufile; - struct rdma_umap_priv *priv; - - if (!(vma->vm_flags & VM_SHARED)) - return -EINVAL; - - if (vma->vm_end - vma->vm_start != size) - return -EINVAL; - - /* Driver is using this wrong, must be called by ib_uverbs_mmap */ - if (WARN_ON(!vma->vm_file || - vma->vm_file->private_data != ufile)) - return -EINVAL; - lockdep_assert_held(&ufile->device->disassociate_srcu); - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - vma->vm_page_prot = prot; - if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { - kfree(priv); - return -EAGAIN; - } - - rdma_umap_priv_init(priv, vma); - return 0; -} -EXPORT_SYMBOL(rdma_user_mmap_io); - void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) { struct rdma_umap_priv *priv, *next_priv; @@ -1014,6 +849,11 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); + + if (priv->entry) { + rdma_user_mmap_entry_put(priv->entry); + priv->entry = NULL; + } } mutex_unlock(&ufile->umap_lock); skip_mm: @@ -1135,7 +975,7 @@ static const struct file_operations uverbs_fops = { .release = ib_uverbs_close, .llseek = no_llseek, .unlocked_ioctl = ib_uverbs_ioctl, - .compat_ioctl = ib_uverbs_ioctl, + .compat_ioctl = compat_ptr_ioctl, }; static const struct file_operations uverbs_mmap_fops = { @@ -1146,7 +986,7 @@ static const struct file_operations uverbs_mmap_fops = { .release = ib_uverbs_close, .llseek = no_llseek, .unlocked_ioctl = ib_uverbs_ioctl, - .compat_ioctl = ib_uverbs_ioctl, + .compat_ioctl = compat_ptr_ioctl, }; static int ib_uverbs_get_nl_info(struct ib_device *ibdev, void *client_data, @@ -1281,7 +1121,6 @@ static void ib_uverbs_add_one(struct ib_device *device) mutex_init(&uverbs_dev->xrcd_tree_mutex); mutex_init(&uverbs_dev->lists_mutex); INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); - INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); rcu_assign_pointer(uverbs_dev->ib_dev, device); uverbs_dev->num_comp_vectors = device->num_comp_vectors; @@ -1326,14 +1165,9 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_device *ib_dev) { struct ib_uverbs_file *file; - struct ib_uverbs_async_event_file *event_file; - struct ib_event event; /* Pending running commands to terminate */ uverbs_disassociate_api_pre(uverbs_dev); - event.event = IB_EVENT_DEVICE_FATAL; - event.element.port_num = 0; - event.device = ib_dev; mutex_lock(&uverbs_dev->lists_mutex); while (!list_empty(&uverbs_dev->uverbs_file_list)) { @@ -1349,31 +1183,14 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, */ mutex_unlock(&uverbs_dev->lists_mutex); - ib_uverbs_event_handler(&file->event_handler, &event); + ib_uverbs_async_handler(READ_ONCE(file->async_file), 0, + IB_EVENT_DEVICE_FATAL, NULL, NULL); + uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE); kref_put(&file->ref, ib_uverbs_release_file); mutex_lock(&uverbs_dev->lists_mutex); } - - while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { - event_file = list_first_entry(&uverbs_dev-> - uverbs_events_file_list, - struct ib_uverbs_async_event_file, - list); - spin_lock_irq(&event_file->ev_queue.lock); - event_file->ev_queue.is_closed = 1; - spin_unlock_irq(&event_file->ev_queue.lock); - - list_del(&event_file->list); - ib_unregister_event_handler( - &event_file->uverbs_file->event_handler); - event_file->uverbs_file->event_handler.device = - NULL; - - wake_up_interruptible(&event_file->ev_queue.poll_wait); - kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN); - } mutex_unlock(&uverbs_dev->lists_mutex); uverbs_disassociate_api(uverbs_dev->uapi); @@ -1487,6 +1304,7 @@ static void __exit ib_uverbs_cleanup(void) IB_UVERBS_NUM_FIXED_MINOR); unregister_chrdev_region(dynamic_uverbs_dev, IB_UVERBS_NUM_DYNAMIC_MINOR); + mmu_notifier_synchronize(); } module_init(ib_uverbs_init); diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 35b2e2c640cc..3abfc63225cb 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -105,7 +105,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject, if (uqp->uxrcd) atomic_dec(&uqp->uxrcd->refcnt); - ib_uverbs_release_uevent(attrs->ufile, &uqp->uevent); + ib_uverbs_release_uevent(&uqp->uevent); return ret; } @@ -138,7 +138,7 @@ static int uverbs_free_wq(struct ib_uobject *uobject, if (ib_is_destroy_retryable(ret, why, uobject)) return ret; - ib_uverbs_release_uevent(attrs->ufile, &uwq->uevent); + ib_uverbs_release_uevent(&uwq->uevent); return ret; } @@ -163,7 +163,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, atomic_dec(&us->uxrcd->refcnt); } - ib_uverbs_release_uevent(attrs->ufile, uevent); + ib_uverbs_release_uevent(uevent); return ret; } @@ -202,24 +202,41 @@ static int uverbs_free_pd(struct ib_uobject *uobject, return 0; } -static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj, - enum rdma_remove_reason why) +void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue) { - struct ib_uverbs_completion_event_file *comp_event_file = - container_of(uobj, struct ib_uverbs_completion_event_file, - uobj); - struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue; + struct ib_uverbs_event *entry, *tmp; spin_lock_irq(&event_queue->lock); + /* + * The user must ensure that no new items are added to the event_list + * once is_closed is set. + */ event_queue->is_closed = 1; spin_unlock_irq(&event_queue->lock); + wake_up_interruptible(&event_queue->poll_wait); + kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN); - if (why == RDMA_REMOVE_DRIVER_REMOVE) { - wake_up_interruptible(&event_queue->poll_wait); - kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN); + spin_lock_irq(&event_queue->lock); + list_for_each_entry_safe(entry, tmp, &event_queue->event_list, list) { + if (entry->counter) + list_del(&entry->obj_list); + list_del(&entry->list); + kfree(entry); } + spin_unlock_irq(&event_queue->lock); +} + +static int +uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + struct ib_uverbs_completion_event_file *file = + container_of(uobj, struct ib_uverbs_completion_event_file, + uobj); + + ib_uverbs_free_event_queue(&file->ev_queue); return 0; -}; +} int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) { @@ -230,7 +247,7 @@ EXPORT_SYMBOL(uverbs_destroy_def_handler); DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_COMP_CHANNEL, UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), - uverbs_hot_unplug_completion_event_file, + uverbs_completion_event_file_destroy_uobj, &uverbs_event_fops, "[infinibandevent]", O_RDONLY)); diff --git a/drivers/infiniband/core/uverbs_std_types_async_fd.c b/drivers/infiniband/core/uverbs_std_types_async_fd.c new file mode 100644 index 000000000000..82ec0806b34b --- /dev/null +++ b/drivers/infiniband/core/uverbs_std_types_async_fd.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + */ + +#include <rdma/uverbs_std_types.h> +#include <rdma/uverbs_ioctl.h> +#include "rdma_core.h" +#include "uverbs.h" + +static int UVERBS_HANDLER(UVERBS_METHOD_ASYNC_EVENT_ALLOC)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, UVERBS_METHOD_ASYNC_EVENT_ALLOC); + + ib_uverbs_init_async_event_file( + container_of(uobj, struct ib_uverbs_async_event_file, uobj)); + return 0; +} + +static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + struct ib_uverbs_async_event_file *event_file = + container_of(uobj, struct ib_uverbs_async_event_file, uobj); + + ib_unregister_event_handler(&event_file->event_handler); + ib_uverbs_free_event_queue(&event_file->ev_queue); + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_ASYNC_EVENT_ALLOC, + UVERBS_ATTR_FD(UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE, + UVERBS_OBJECT_ASYNC_EVENT, + UVERBS_ACCESS_NEW, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_ASYNC_EVENT, + UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_async_event_file), + uverbs_async_event_destroy_uobj, + &uverbs_async_event_fops, + "[infinibandevent]", + O_RDONLY), + &UVERBS_METHOD(UVERBS_METHOD_ASYNC_EVENT_ALLOC)); + +const struct uapi_definition uverbs_def_obj_async_fd[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_ASYNC_EVENT), + {} +}; diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index e39fe6a8aac4..da4110a0eea2 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -41,7 +41,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject, struct ib_cq *cq = uobject->object; struct ib_uverbs_event_queue *ev_queue = cq->cq_context; struct ib_ucq_object *ucq = - container_of(uobject, struct ib_ucq_object, uobject); + container_of(uobject, struct ib_ucq_object, uevent.uobject); int ret; ret = ib_destroy_cq_user(cq, &attrs->driver_udata); @@ -49,7 +49,6 @@ static int uverbs_free_cq(struct ib_uobject *uobject, return ret; ib_uverbs_release_ucq( - attrs->ufile, ev_queue ? container_of(ev_queue, struct ib_uverbs_completion_event_file, ev_queue) : @@ -63,7 +62,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( { struct ib_ucq_object *obj = container_of( uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE), - typeof(*obj), uobject); + typeof(*obj), uevent.uobject); struct ib_device *ib_dev = attrs->context->device; int ret; u64 user_handle; @@ -106,10 +105,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( goto err_event_file; } - obj->comp_events_reported = 0; - obj->async_events_reported = 0; INIT_LIST_HEAD(&obj->comp_list); - INIT_LIST_HEAD(&obj->async_list); + INIT_LIST_HEAD(&obj->uevent.event_list); cq = rdma_zalloc_drv_obj(ib_dev, ib_cq); if (!cq) { @@ -118,7 +115,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( } cq->device = ib_dev; - cq->uobject = &obj->uobject; + cq->uobject = obj; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; @@ -129,8 +126,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( if (ret) goto err_free; - obj->uobject.object = cq; - obj->uobject.user_handle = user_handle; + obj->uevent.uobject.object = cq; + obj->uevent.uobject.user_handle = user_handle; rdma_restrack_uadd(&cq->res); ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe, @@ -182,10 +179,10 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)( struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE); struct ib_ucq_object *obj = - container_of(uobj, struct ib_ucq_object, uobject); + container_of(uobj, struct ib_ucq_object, uevent.uobject); struct ib_uverbs_destroy_cq_resp resp = { .comp_events_reported = obj->comp_events_reported, - .async_events_reported = obj->async_events_reported + .async_events_reported = obj->uevent.events_reported }; return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp, diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index 2a3f2f01028d..ae4a59d6f9b1 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -200,6 +200,43 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)( &resp, sizeof(resp)); } +static int UVERBS_HANDLER(UVERBS_METHOD_GET_CONTEXT)( + struct uverbs_attr_bundle *attrs) +{ + u32 num_comp = attrs->ufile->device->num_comp_vectors; + u64 core_support = IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS; + int ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS, + &num_comp, sizeof(num_comp)); + if (IS_UVERBS_COPY_ERR(ret)) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT, + &core_support, sizeof(core_support)); + if (IS_UVERBS_COPY_ERR(ret)) + return ret; + + ret = ib_alloc_ucontext(attrs); + if (ret) + return ret; + ret = ib_init_ucontext(attrs); + if (ret) { + kfree(attrs->context); + attrs->context = NULL; + return ret; + } + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_GET_CONTEXT, + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS, + UVERBS_ATTR_TYPE(u32), UA_OPTIONAL), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT, + UVERBS_ATTR_TYPE(u64), UA_OPTIONAL), + UVERBS_ATTR_UHW()); + DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_INFO_HANDLES, /* Also includes any device specific object ids */ @@ -220,6 +257,7 @@ DECLARE_UVERBS_NAMED_METHOD( UA_MANDATORY)); DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE, + &UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT), &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE), &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES), &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT)); diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 00c547887132..3f121ac31e0a 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -195,9 +195,9 @@ static int uapi_merge_obj_tree(struct uverbs_api *uapi, * disassociation, and the FD types require the driver to use * struct file_operations.owner to prevent the driver module * code from unloading while the file is open. This provides - * enough safety that uverbs_close_fd() will continue to work. - * Drivers using FD are responsible to handle disassociation of - * the device on their own. + * enough safety that uverbs_uobject_fd_release() will + * continue to work. Drivers using FD are responsible to + * handle disassociation of the device on their own. */ if (WARN_ON(is_driver && obj->type_attrs->type_class != &uverbs_idr_class && @@ -626,6 +626,7 @@ void uverbs_destroy_api(struct uverbs_api *uapi) } static const struct uapi_definition uverbs_core_api[] = { + UAPI_DEF_CHAIN(uverbs_def_obj_async_fd), UAPI_DEF_CHAIN(uverbs_def_obj_counters), UAPI_DEF_CHAIN(uverbs_def_obj_cq), UAPI_DEF_CHAIN(uverbs_def_obj_device), diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 92349bf37589..3ebae3b65c28 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -52,6 +52,9 @@ #include <rdma/rw.h> #include "core_priv.h" +#include <trace/events/rdma_core.h> + +#include <trace/events/rdma_core.h> static int ib_resolve_eth_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr); @@ -244,6 +247,8 @@ EXPORT_SYMBOL(rdma_port_get_link_layer); /** * ib_alloc_pd - Allocates an unused protection domain. * @device: The device on which to allocate the protection domain. + * @flags: protection domain flags + * @caller: caller's build-time module name * * A protection domain object provides an association between QPs, shared * receive queues, address handles, memory regions, and memory windows. @@ -662,16 +667,17 @@ static bool find_gid_index(const union ib_gid *gid, void *context) { struct find_gid_index_context *ctx = context; + u16 vlan_id = 0xffff; + int ret; if (ctx->gid_type != gid_attr->gid_type) return false; - if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) || - (is_vlan_dev(gid_attr->ndev) && - vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id)) + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); + if (ret) return false; - return true; + return ctx->vlan_id == vlan_id; } static const struct ib_gid_attr * @@ -1050,11 +1056,11 @@ static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) struct ib_qp *qp = context; unsigned long flags; - spin_lock_irqsave(&qp->device->event_handler_lock, flags); + spin_lock_irqsave(&qp->device->qp_open_list_lock, flags); list_for_each_entry(event->element.qp, &qp->open_list, open_list) if (event->element.qp->event_handler) event->element.qp->event_handler(event, event->element.qp->qp_context); - spin_unlock_irqrestore(&qp->device->event_handler_lock, flags); + spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags); } static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) @@ -1091,9 +1097,9 @@ static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, qp->qp_num = real_qp->qp_num; qp->qp_type = real_qp->qp_type; - spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags); list_add(&qp->open_list, &real_qp->open_list); - spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags); return qp; } @@ -1821,9 +1827,9 @@ int ib_close_qp(struct ib_qp *qp) if (real_qp == qp) return -EINVAL; - spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags); list_del(&qp->open_list); - spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags); atomic_dec(&real_qp->usecnt); if (qp->qp_sec) @@ -1987,6 +1993,47 @@ EXPORT_SYMBOL(ib_resize_cq); /* Memory regions */ +struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags) +{ + struct ib_mr *mr; + + if (access_flags & IB_ACCESS_ON_DEMAND) { + if (!(pd->device->attrs.device_cap_flags & + IB_DEVICE_ON_DEMAND_PAGING)) { + pr_debug("ODP support not available\n"); + return ERR_PTR(-EINVAL); + } + } + + mr = pd->device->ops.reg_user_mr(pd, start, length, virt_addr, + access_flags, NULL); + + if (IS_ERR(mr)) + return mr; + + mr->device = pd->device; + mr->pd = pd; + mr->dm = NULL; + atomic_inc(&pd->usecnt); + mr->res.type = RDMA_RESTRACK_MR; + rdma_restrack_kadd(&mr->res); + + return mr; +} +EXPORT_SYMBOL(ib_reg_user_mr); + +int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, u32 num_sge) +{ + if (!pd->device->ops.advise_mr) + return -EOPNOTSUPP; + + return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge, + NULL); +} +EXPORT_SYMBOL(ib_advise_mr); + int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) { struct ib_pd *pd = mr->pd; @@ -1994,6 +2041,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) struct ib_sig_attrs *sig_attrs = mr->sig_attrs; int ret; + trace_mr_dereg(mr); rdma_restrack_del(&mr->res); ret = mr->device->ops.dereg_mr(mr, udata); if (!ret) { @@ -2025,11 +2073,16 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, { struct ib_mr *mr; - if (!pd->device->ops.alloc_mr) - return ERR_PTR(-EOPNOTSUPP); + if (!pd->device->ops.alloc_mr) { + mr = ERR_PTR(-EOPNOTSUPP); + goto out; + } - if (WARN_ON_ONCE(mr_type == IB_MR_TYPE_INTEGRITY)) - return ERR_PTR(-EINVAL); + if (mr_type == IB_MR_TYPE_INTEGRITY) { + WARN_ON_ONCE(1); + mr = ERR_PTR(-EINVAL); + goto out; + } mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata); if (!IS_ERR(mr)) { @@ -2045,6 +2098,8 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, mr->sig_attrs = NULL; } +out: + trace_mr_alloc(pd, mr_type, max_num_sg, mr); return mr; } EXPORT_SYMBOL(ib_alloc_mr_user); @@ -2069,21 +2124,27 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, struct ib_sig_attrs *sig_attrs; if (!pd->device->ops.alloc_mr_integrity || - !pd->device->ops.map_mr_sg_pi) - return ERR_PTR(-EOPNOTSUPP); + !pd->device->ops.map_mr_sg_pi) { + mr = ERR_PTR(-EOPNOTSUPP); + goto out; + } - if (!max_num_meta_sg) - return ERR_PTR(-EINVAL); + if (!max_num_meta_sg) { + mr = ERR_PTR(-EINVAL); + goto out; + } sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL); - if (!sig_attrs) - return ERR_PTR(-ENOMEM); + if (!sig_attrs) { + mr = ERR_PTR(-ENOMEM); + goto out; + } mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg, max_num_meta_sg); if (IS_ERR(mr)) { kfree(sig_attrs); - return mr; + goto out; } mr->device = pd->device; @@ -2097,6 +2158,8 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, mr->type = IB_MR_TYPE_INTEGRITY; mr->sig_attrs = sig_attrs; +out: + trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr); return mr; } EXPORT_SYMBOL(ib_alloc_mr_integrity); @@ -2259,6 +2322,7 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) if (ret) return ret; } + mutex_destroy(&xrcd->tgt_qp_mutex); return xrcd->device->ops.dealloc_xrcd(xrcd, udata); } @@ -2457,6 +2521,16 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, } EXPORT_SYMBOL(ib_set_vf_guid); +int ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid) +{ + if (!device->ops.get_vf_guid) + return -EOPNOTSUPP; + + return device->ops.get_vf_guid(device, vf, port, node_guid, port_guid); +} +EXPORT_SYMBOL(ib_get_vf_guid); /** * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection * information) and set an appropriate memory region for registration. @@ -2730,6 +2804,7 @@ void ib_drain_sq(struct ib_qp *qp) qp->device->ops.drain_sq(qp); else __ib_drain_sq(qp); + trace_cq_drain_complete(qp->send_cq); } EXPORT_SYMBOL(ib_drain_sq); @@ -2758,6 +2833,7 @@ void ib_drain_rq(struct ib_qp *qp) qp->device->ops.drain_rq(qp); else __ib_drain_rq(qp); + trace_cq_drain_complete(qp->recv_cq); } EXPORT_SYMBOL(ib_drain_rq); |