summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core/cma.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core/cma.c')
-rw-r--r--drivers/infiniband/core/cma.c209
1 files changed, 144 insertions, 65 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 19f1730a4f24..72f032160c4b 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1,36 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2005 Voltaire Inc. All rights reserved.
* Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
- * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/completion.h>
@@ -63,6 +36,7 @@
#include "core_priv.h"
#include "cma_priv.h"
+#include "cma_trace.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -904,6 +878,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
id_priv->id.route.addr.dev_addr.net = get_net(net);
id_priv->seq_num &= 0x00ffffff;
+ trace_cm_id_create(id_priv);
return &id_priv->id;
}
EXPORT_SYMBOL(__rdma_create_id);
@@ -955,27 +930,34 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (id->device != pd->device)
- return -EINVAL;
+ if (id->device != pd->device) {
+ ret = -EINVAL;
+ goto out_err;
+ }
qp_init_attr->port_num = id->port_num;
qp = ib_create_qp(pd, qp_init_attr);
- if (IS_ERR(qp))
- return PTR_ERR(qp);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto out_err;
+ }
if (id->qp_type == IB_QPT_UD)
ret = cma_init_ud_qp(id_priv, qp);
else
ret = cma_init_conn_qp(id_priv, qp);
if (ret)
- goto err;
+ goto out_destroy;
id->qp = qp;
id_priv->qp_num = qp->qp_num;
id_priv->srq = (qp->srq != NULL);
+ trace_cm_qp_create(id_priv, pd, qp_init_attr, 0);
return 0;
-err:
+out_destroy:
ib_destroy_qp(qp);
+out_err:
+ trace_cm_qp_create(id_priv, pd, qp_init_attr, ret);
return ret;
}
EXPORT_SYMBOL(rdma_create_qp);
@@ -985,6 +967,7 @@ void rdma_destroy_qp(struct rdma_cm_id *id)
struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
+ trace_cm_qp_destroy(id_priv);
mutex_lock(&id_priv->qp_mutex);
ib_destroy_qp(id_priv->id.qp);
id_priv->id.qp = NULL;
@@ -1838,6 +1821,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
enum rdma_cm_state state;
id_priv = container_of(id, struct rdma_id_private, id);
+ trace_cm_id_destroy(id_priv);
state = cma_exch(id_priv, RDMA_CM_DESTROYING);
cma_cancel_operation(id_priv, state);
@@ -1890,6 +1874,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
if (ret)
goto reject;
+ trace_cm_send_rtu(id_priv);
ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
if (ret)
goto reject;
@@ -1898,6 +1883,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
reject:
pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret);
cma_modify_qp_err(id_priv);
+ trace_cm_send_rej(id_priv);
ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
return ret;
@@ -1917,6 +1903,17 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event,
event->param.conn.qp_num = rep_data->remote_qpn;
}
+static int cma_cm_event_handler(struct rdma_id_private *id_priv,
+ struct rdma_cm_event *event)
+{
+ int ret;
+
+ trace_cm_event_handler(id_priv, event);
+ ret = id_priv->id.event_handler(&id_priv->id, event);
+ trace_cm_event_done(id_priv, event, ret);
+ return ret;
+}
+
static int cma_ib_handler(struct ib_cm_id *cm_id,
const struct ib_cm_event *ib_event)
{
@@ -1939,8 +1936,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
break;
case IB_CM_REP_RECEIVED:
if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
- (id_priv->id.qp_type != IB_QPT_UD))
+ (id_priv->id.qp_type != IB_QPT_UD)) {
+ trace_cm_send_mra(id_priv);
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+ }
if (id_priv->id.qp) {
event.status = cma_rep_recv(id_priv);
event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
@@ -1985,7 +1984,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
goto out;
}
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
@@ -2146,6 +2145,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
if (IS_ERR(listen_id))
return PTR_ERR(listen_id);
+ trace_cm_req_handler(listen_id, ib_event->event);
if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) {
ret = -EINVAL;
goto net_dev_put;
@@ -2188,7 +2188,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
* until we're done accessing it.
*/
atomic_inc(&conn_id->refcount);
- ret = conn_id->id.event_handler(&conn_id->id, &event);
+ ret = cma_cm_event_handler(conn_id, &event);
if (ret)
goto err3;
/*
@@ -2197,8 +2197,10 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
*/
mutex_lock(&lock);
if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
- (conn_id->id.qp_type != IB_QPT_UD))
+ (conn_id->id.qp_type != IB_QPT_UD)) {
+ trace_cm_send_mra(cm_id->context);
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+ }
mutex_unlock(&lock);
mutex_unlock(&conn_id->handler_mutex);
mutex_unlock(&listen_id->handler_mutex);
@@ -2313,7 +2315,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
event.status = iw_event->status;
event.param.conn.private_data = iw_event->private_data;
event.param.conn.private_data_len = iw_event->private_data_len;
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.iw = NULL;
@@ -2390,15 +2392,16 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
* until we're done accessing it.
*/
atomic_inc(&conn_id->refcount);
- ret = conn_id->id.event_handler(&conn_id->id, &event);
+ ret = cma_cm_event_handler(conn_id, &event);
if (ret) {
/* User wants to destroy the CM ID */
conn_id->cm_id.iw = NULL;
cma_exch(conn_id, RDMA_CM_DESTROYING);
mutex_unlock(&conn_id->handler_mutex);
+ mutex_unlock(&listen_id->handler_mutex);
cma_deref_id(conn_id);
rdma_destroy_id(&conn_id->id);
- goto out;
+ return ret;
}
mutex_unlock(&conn_id->handler_mutex);
@@ -2461,6 +2464,7 @@ static int cma_listen_handler(struct rdma_cm_id *id,
id->context = id_priv->id.context;
id->event_handler = id_priv->id.event_handler;
+ trace_cm_event_handler(id_priv, event);
return id_priv->id.event_handler(id, event);
}
@@ -2530,7 +2534,9 @@ EXPORT_SYMBOL(rdma_set_service_type);
* This function should be called before rdma_connect() on active side,
* and on passive side before rdma_accept(). It is applicable to primary
* path only. The timeout will affect the local side of the QP, it is not
- * negotiated with remote side and zero disables the timer.
+ * negotiated with remote side and zero disables the timer. In case it is
+ * set before rdma_resolve_route, the value will also be used to determine
+ * PacketLifeTime for RoCE.
*
* Return: 0 for success
*/
@@ -2635,7 +2641,7 @@ static void cma_work_handler(struct work_struct *_work)
if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
goto out;
- if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+ if (cma_cm_event_handler(id_priv, &work->event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
destroy = 1;
}
@@ -2658,7 +2664,7 @@ static void cma_ndev_work_handler(struct work_struct *_work)
id_priv->state == RDMA_CM_DEVICE_REMOVAL)
goto out;
- if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+ if (cma_cm_event_handler(id_priv, &work->event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
destroy = 1;
}
@@ -2827,22 +2833,65 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
return 0;
}
-static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio)
{
- int prio;
struct net_device *dev;
- prio = rt_tos2priority(tos);
- dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
+ dev = vlan_dev_real_dev(vlan_ndev);
if (dev->num_tc)
return netdev_get_prio_tc_map(dev, prio);
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
+ return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) &
+ VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+}
+
+struct iboe_prio_tc_map {
+ int input_prio;
+ int output_tc;
+ bool found;
+};
+
+static int get_lower_vlan_dev_tc(struct net_device *dev, void *data)
+{
+ struct iboe_prio_tc_map *map = data;
+
+ if (is_vlan_dev(dev))
+ map->output_tc = get_vlan_ndev_tc(dev, map->input_prio);
+ else if (dev->num_tc)
+ map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio);
+ else
+ map->output_tc = 0;
+ /* We are interested only in first level VLAN device, so always
+ * return 1 to stop iterating over next level devices.
+ */
+ map->found = true;
+ return 1;
+}
+
+static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+{
+ struct iboe_prio_tc_map prio_tc_map = {};
+ int prio = rt_tos2priority(tos);
+
+ /* If VLAN device, get it directly from the VLAN netdev */
if (is_vlan_dev(ndev))
- return (vlan_dev_get_egress_qos_mask(ndev, prio) &
- VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
-#endif
- return 0;
+ return get_vlan_ndev_tc(ndev, prio);
+
+ prio_tc_map.input_prio = prio;
+ rcu_read_lock();
+ netdev_walk_all_lower_dev_rcu(ndev,
+ get_lower_vlan_dev_tc,
+ &prio_tc_map);
+ rcu_read_unlock();
+ /* If map is found from lower device, use it; Otherwise
+ * continue with the current netdevice to get priority to tc map.
+ */
+ if (prio_tc_map.found)
+ return prio_tc_map.output_tc;
+ else if (ndev->num_tc)
+ return netdev_get_prio_tc_map(ndev, prio);
+ else
+ return 0;
}
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
@@ -2896,7 +2945,16 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->rate = iboe_get_rate(ndev);
dev_put(ndev);
route->path_rec->packet_life_time_selector = IB_SA_EQ;
- route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
+ /* In case ACK timeout is set, use this value to calculate
+ * PacketLifeTime. As per IBTA 12.7.34,
+ * local ACK timeout = (2 * PacketLifeTime + Local CA’s ACK delay).
+ * Assuming a negligible local ACK delay, we can use
+ * PacketLifeTime = local ACK timeout/2
+ * as a reasonable approximation for RoCE networks.
+ */
+ route->path_rec->packet_life_time = id_priv->timeout_set ?
+ id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME;
+
if (!route->path_rec->mtu) {
ret = -EINVAL;
goto err2;
@@ -3046,7 +3104,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
if (status)
pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
status);
- } else {
+ } else if (status) {
pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
}
@@ -3061,7 +3119,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
} else
event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
- if (id_priv->id.event_handler(&id_priv->id, &event)) {
+ if (cma_cm_event_handler(id_priv, &event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
rdma_destroy_id(&id_priv->id);
@@ -3090,6 +3148,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
+ atomic_inc(&id_priv->refcount);
cma_init_resolve_addr_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
@@ -3116,6 +3175,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
&(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
+ atomic_inc(&id_priv->refcount);
cma_init_resolve_addr_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
@@ -3708,7 +3768,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
goto out;
}
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
@@ -3772,6 +3832,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
req.max_cm_retries = CMA_MAX_CM_RETRIES;
+ trace_cm_send_sidr_req(id_priv);
ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
if (ret) {
ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -3845,6 +3906,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
req.max_cm_retries = CMA_MAX_CM_RETRIES;
req.srq = id_priv->srq ? 1 : 0;
+ trace_cm_send_req(id_priv);
ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
out:
if (ret && !IS_ERR(id)) {
@@ -3958,6 +4020,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
rep.srq = id_priv->srq ? 1 : 0;
+ trace_cm_send_rep(id_priv);
ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
out:
return ret;
@@ -4007,6 +4070,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
rep.private_data = private_data;
rep.private_data_len = private_data_len;
+ trace_cm_send_sidr_rep(id_priv);
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
@@ -4092,13 +4156,15 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
return -EINVAL;
if (rdma_cap_ib_cm(id->device, id->port_num)) {
- if (id->qp_type == IB_QPT_UD)
+ if (id->qp_type == IB_QPT_UD) {
ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
private_data, private_data_len);
- else
+ } else {
+ trace_cm_send_rej(id_priv);
ret = ib_send_cm_rej(id_priv->cm_id.ib,
IB_CM_REJ_CONSUMER_DEFINED, NULL,
0, private_data, private_data_len);
+ }
} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = iw_cm_reject(id_priv->cm_id.iw,
private_data, private_data_len);
@@ -4123,8 +4189,13 @@ int rdma_disconnect(struct rdma_cm_id *id)
if (ret)
goto out;
/* Initiate or respond to a disconnect. */
- if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
- ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
+ trace_cm_disconnect(id_priv);
+ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) {
+ if (!ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0))
+ trace_cm_sent_drep(id_priv);
+ } else {
+ trace_cm_sent_dreq(id_priv);
+ }
} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
} else
@@ -4190,7 +4261,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
} else
event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
@@ -4595,6 +4666,7 @@ static void cma_add_one(struct ib_device *device)
cma_listen_on_dev(id_priv, cma_dev);
mutex_unlock(&lock);
+ trace_cm_add_one(device);
return;
free_gid_type:
@@ -4625,7 +4697,7 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv)
goto out;
event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
out:
mutex_unlock(&id_priv->handler_mutex);
return ret;
@@ -4663,6 +4735,8 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
{
struct cma_device *cma_dev = client_data;
+ trace_cm_remove_one(device);
+
if (!cma_dev)
return;
@@ -4724,13 +4798,18 @@ static int __init cma_init(void)
if (ret)
goto err;
- cma_configfs_init();
+ ret = cma_configfs_init();
+ if (ret)
+ goto err_ib;
return 0;
+err_ib:
+ ib_unregister_client(&cma_client);
err:
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
+ unregister_pernet_subsys(&cma_pernet_operations);
err_wq:
destroy_workqueue(cma_wq);
return ret;
OpenPOWER on IntegriCloud