diff options
Diffstat (limited to 'drivers/infiniband/core/cma.c')
-rw-r--r-- | drivers/infiniband/core/cma.c | 209 |
1 files changed, 144 insertions, 65 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 19f1730a4f24..72f032160c4b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1,36 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. - * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/completion.h> @@ -63,6 +36,7 @@ #include "core_priv.h" #include "cma_priv.h" +#include "cma_trace.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("Generic RDMA CM Agent"); @@ -904,6 +878,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, id_priv->id.route.addr.dev_addr.net = get_net(net); id_priv->seq_num &= 0x00ffffff; + trace_cm_id_create(id_priv); return &id_priv->id; } EXPORT_SYMBOL(__rdma_create_id); @@ -955,27 +930,34 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (id->device != pd->device) - return -EINVAL; + if (id->device != pd->device) { + ret = -EINVAL; + goto out_err; + } qp_init_attr->port_num = id->port_num; qp = ib_create_qp(pd, qp_init_attr); - if (IS_ERR(qp)) - return PTR_ERR(qp); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto out_err; + } if (id->qp_type == IB_QPT_UD) ret = cma_init_ud_qp(id_priv, qp); else ret = cma_init_conn_qp(id_priv, qp); if (ret) - goto err; + goto out_destroy; id->qp = qp; id_priv->qp_num = qp->qp_num; id_priv->srq = (qp->srq != NULL); + trace_cm_qp_create(id_priv, pd, qp_init_attr, 0); return 0; -err: +out_destroy: ib_destroy_qp(qp); +out_err: + trace_cm_qp_create(id_priv, pd, qp_init_attr, ret); return ret; } EXPORT_SYMBOL(rdma_create_qp); @@ -985,6 +967,7 @@ void rdma_destroy_qp(struct rdma_cm_id *id) struct rdma_id_private *id_priv; id_priv = container_of(id, struct rdma_id_private, id); + trace_cm_qp_destroy(id_priv); mutex_lock(&id_priv->qp_mutex); ib_destroy_qp(id_priv->id.qp); id_priv->id.qp = NULL; @@ -1838,6 +1821,7 @@ void rdma_destroy_id(struct rdma_cm_id *id) enum rdma_cm_state state; id_priv = container_of(id, struct rdma_id_private, id); + trace_cm_id_destroy(id_priv); state = cma_exch(id_priv, RDMA_CM_DESTROYING); cma_cancel_operation(id_priv, state); @@ -1890,6 +1874,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv) if (ret) goto reject; + trace_cm_send_rtu(id_priv); ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); if (ret) goto reject; @@ -1898,6 +1883,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv) reject: pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret); cma_modify_qp_err(id_priv); + trace_cm_send_rej(id_priv); ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); return ret; @@ -1917,6 +1903,17 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event, event->param.conn.qp_num = rep_data->remote_qpn; } +static int cma_cm_event_handler(struct rdma_id_private *id_priv, + struct rdma_cm_event *event) +{ + int ret; + + trace_cm_event_handler(id_priv, event); + ret = id_priv->id.event_handler(&id_priv->id, event); + trace_cm_event_done(id_priv, event, ret); + return ret; +} + static int cma_ib_handler(struct ib_cm_id *cm_id, const struct ib_cm_event *ib_event) { @@ -1939,8 +1936,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, break; case IB_CM_REP_RECEIVED: if (cma_comp(id_priv, RDMA_CM_CONNECT) && - (id_priv->id.qp_type != IB_QPT_UD)) + (id_priv->id.qp_type != IB_QPT_UD)) { + trace_cm_send_mra(id_priv); ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); + } if (id_priv->id.qp) { event.status = cma_rep_recv(id_priv); event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : @@ -1985,7 +1984,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, goto out; } - ret = id_priv->id.event_handler(&id_priv->id, &event); + ret = cma_cm_event_handler(id_priv, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; @@ -2146,6 +2145,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, if (IS_ERR(listen_id)) return PTR_ERR(listen_id); + trace_cm_req_handler(listen_id, ib_event->event); if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) { ret = -EINVAL; goto net_dev_put; @@ -2188,7 +2188,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, * until we're done accessing it. */ atomic_inc(&conn_id->refcount); - ret = conn_id->id.event_handler(&conn_id->id, &event); + ret = cma_cm_event_handler(conn_id, &event); if (ret) goto err3; /* @@ -2197,8 +2197,10 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, */ mutex_lock(&lock); if (cma_comp(conn_id, RDMA_CM_CONNECT) && - (conn_id->id.qp_type != IB_QPT_UD)) + (conn_id->id.qp_type != IB_QPT_UD)) { + trace_cm_send_mra(cm_id->context); ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); + } mutex_unlock(&lock); mutex_unlock(&conn_id->handler_mutex); mutex_unlock(&listen_id->handler_mutex); @@ -2313,7 +2315,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) event.status = iw_event->status; event.param.conn.private_data = iw_event->private_data; event.param.conn.private_data_len = iw_event->private_data_len; - ret = id_priv->id.event_handler(&id_priv->id, &event); + ret = cma_cm_event_handler(id_priv, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.iw = NULL; @@ -2390,15 +2392,16 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, * until we're done accessing it. */ atomic_inc(&conn_id->refcount); - ret = conn_id->id.event_handler(&conn_id->id, &event); + ret = cma_cm_event_handler(conn_id, &event); if (ret) { /* User wants to destroy the CM ID */ conn_id->cm_id.iw = NULL; cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); + mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); rdma_destroy_id(&conn_id->id); - goto out; + return ret; } mutex_unlock(&conn_id->handler_mutex); @@ -2461,6 +2464,7 @@ static int cma_listen_handler(struct rdma_cm_id *id, id->context = id_priv->id.context; id->event_handler = id_priv->id.event_handler; + trace_cm_event_handler(id_priv, event); return id_priv->id.event_handler(id, event); } @@ -2530,7 +2534,9 @@ EXPORT_SYMBOL(rdma_set_service_type); * This function should be called before rdma_connect() on active side, * and on passive side before rdma_accept(). It is applicable to primary * path only. The timeout will affect the local side of the QP, it is not - * negotiated with remote side and zero disables the timer. + * negotiated with remote side and zero disables the timer. In case it is + * set before rdma_resolve_route, the value will also be used to determine + * PacketLifeTime for RoCE. * * Return: 0 for success */ @@ -2635,7 +2641,7 @@ static void cma_work_handler(struct work_struct *_work) if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) goto out; - if (id_priv->id.event_handler(&id_priv->id, &work->event)) { + if (cma_cm_event_handler(id_priv, &work->event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); destroy = 1; } @@ -2658,7 +2664,7 @@ static void cma_ndev_work_handler(struct work_struct *_work) id_priv->state == RDMA_CM_DEVICE_REMOVAL) goto out; - if (id_priv->id.event_handler(&id_priv->id, &work->event)) { + if (cma_cm_event_handler(id_priv, &work->event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); destroy = 1; } @@ -2827,22 +2833,65 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv) return 0; } -static int iboe_tos_to_sl(struct net_device *ndev, int tos) +static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio) { - int prio; struct net_device *dev; - prio = rt_tos2priority(tos); - dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev; + dev = vlan_dev_real_dev(vlan_ndev); if (dev->num_tc) return netdev_get_prio_tc_map(dev, prio); -#if IS_ENABLED(CONFIG_VLAN_8021Q) + return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) & + VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; +} + +struct iboe_prio_tc_map { + int input_prio; + int output_tc; + bool found; +}; + +static int get_lower_vlan_dev_tc(struct net_device *dev, void *data) +{ + struct iboe_prio_tc_map *map = data; + + if (is_vlan_dev(dev)) + map->output_tc = get_vlan_ndev_tc(dev, map->input_prio); + else if (dev->num_tc) + map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio); + else + map->output_tc = 0; + /* We are interested only in first level VLAN device, so always + * return 1 to stop iterating over next level devices. + */ + map->found = true; + return 1; +} + +static int iboe_tos_to_sl(struct net_device *ndev, int tos) +{ + struct iboe_prio_tc_map prio_tc_map = {}; + int prio = rt_tos2priority(tos); + + /* If VLAN device, get it directly from the VLAN netdev */ if (is_vlan_dev(ndev)) - return (vlan_dev_get_egress_qos_mask(ndev, prio) & - VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; -#endif - return 0; + return get_vlan_ndev_tc(ndev, prio); + + prio_tc_map.input_prio = prio; + rcu_read_lock(); + netdev_walk_all_lower_dev_rcu(ndev, + get_lower_vlan_dev_tc, + &prio_tc_map); + rcu_read_unlock(); + /* If map is found from lower device, use it; Otherwise + * continue with the current netdevice to get priority to tc map. + */ + if (prio_tc_map.found) + return prio_tc_map.output_tc; + else if (ndev->num_tc) + return netdev_get_prio_tc_map(ndev, prio); + else + return 0; } static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) @@ -2896,7 +2945,16 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->path_rec->rate = iboe_get_rate(ndev); dev_put(ndev); route->path_rec->packet_life_time_selector = IB_SA_EQ; - route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; + /* In case ACK timeout is set, use this value to calculate + * PacketLifeTime. As per IBTA 12.7.34, + * local ACK timeout = (2 * PacketLifeTime + Local CA’s ACK delay). + * Assuming a negligible local ACK delay, we can use + * PacketLifeTime = local ACK timeout/2 + * as a reasonable approximation for RoCE networks. + */ + route->path_rec->packet_life_time = id_priv->timeout_set ? + id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME; + if (!route->path_rec->mtu) { ret = -EINVAL; goto err2; @@ -3046,7 +3104,7 @@ static void addr_handler(int status, struct sockaddr *src_addr, if (status) pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n", status); - } else { + } else if (status) { pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status); } @@ -3061,7 +3119,7 @@ static void addr_handler(int status, struct sockaddr *src_addr, } else event.event = RDMA_CM_EVENT_ADDR_RESOLVED; - if (id_priv->id.event_handler(&id_priv->id, &event)) { + if (cma_cm_event_handler(id_priv, &event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); @@ -3090,6 +3148,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); + atomic_inc(&id_priv->refcount); cma_init_resolve_addr_work(work, id_priv); queue_work(cma_wq, &work->work); return 0; @@ -3116,6 +3175,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); + atomic_inc(&id_priv->refcount); cma_init_resolve_addr_work(work, id_priv); queue_work(cma_wq, &work->work); return 0; @@ -3708,7 +3768,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, goto out; } - ret = id_priv->id.event_handler(&id_priv->id, &event); + ret = cma_cm_event_handler(id_priv, &event); rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { @@ -3772,6 +3832,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); req.max_cm_retries = CMA_MAX_CM_RETRIES; + trace_cm_send_sidr_req(id_priv); ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); if (ret) { ib_destroy_cm_id(id_priv->cm_id.ib); @@ -3845,6 +3906,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, req.max_cm_retries = CMA_MAX_CM_RETRIES; req.srq = id_priv->srq ? 1 : 0; + trace_cm_send_req(id_priv); ret = ib_send_cm_req(id_priv->cm_id.ib, &req); out: if (ret && !IS_ERR(id)) { @@ -3958,6 +4020,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); rep.srq = id_priv->srq ? 1 : 0; + trace_cm_send_rep(id_priv); ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); out: return ret; @@ -4007,6 +4070,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, rep.private_data = private_data; rep.private_data_len = private_data_len; + trace_cm_send_sidr_rep(id_priv); return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); } @@ -4092,13 +4156,15 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, return -EINVAL; if (rdma_cap_ib_cm(id->device, id->port_num)) { - if (id->qp_type == IB_QPT_UD) + if (id->qp_type == IB_QPT_UD) { ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, private_data, private_data_len); - else + } else { + trace_cm_send_rej(id_priv); ret = ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, private_data, private_data_len); + } } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_reject(id_priv->cm_id.iw, private_data, private_data_len); @@ -4123,8 +4189,13 @@ int rdma_disconnect(struct rdma_cm_id *id) if (ret) goto out; /* Initiate or respond to a disconnect. */ - if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) - ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); + trace_cm_disconnect(id_priv); + if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) { + if (!ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0)) + trace_cm_sent_drep(id_priv); + } else { + trace_cm_sent_dreq(id_priv); + } } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); } else @@ -4190,7 +4261,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) } else event.event = RDMA_CM_EVENT_MULTICAST_ERROR; - ret = id_priv->id.event_handler(&id_priv->id, &event); + ret = cma_cm_event_handler(id_priv, &event); rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { @@ -4595,6 +4666,7 @@ static void cma_add_one(struct ib_device *device) cma_listen_on_dev(id_priv, cma_dev); mutex_unlock(&lock); + trace_cm_add_one(device); return; free_gid_type: @@ -4625,7 +4697,7 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv) goto out; event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; - ret = id_priv->id.event_handler(&id_priv->id, &event); + ret = cma_cm_event_handler(id_priv, &event); out: mutex_unlock(&id_priv->handler_mutex); return ret; @@ -4663,6 +4735,8 @@ static void cma_remove_one(struct ib_device *device, void *client_data) { struct cma_device *cma_dev = client_data; + trace_cm_remove_one(device); + if (!cma_dev) return; @@ -4724,13 +4798,18 @@ static int __init cma_init(void) if (ret) goto err; - cma_configfs_init(); + ret = cma_configfs_init(); + if (ret) + goto err_ib; return 0; +err_ib: + ib_unregister_client(&cma_client); err: unregister_netdevice_notifier(&cma_nb); ib_sa_unregister_client(&sa_client); + unregister_pernet_subsys(&cma_pernet_operations); err_wq: destroy_workqueue(cma_wq); return ret; |