summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/addr.c13
-rw-r--r--drivers/infiniband/core/cm.c126
-rw-r--r--drivers/infiniband/core/cma.c77
-rw-r--r--drivers/infiniband/core/iwcm.c2
-rw-r--r--drivers/infiniband/core/mad.c4
-rw-r--r--drivers/infiniband/core/multicast.c2
-rw-r--r--drivers/infiniband/core/sa_query.c2
-rw-r--r--drivers/infiniband/core/sysfs.c2
-rw-r--r--drivers/infiniband/core/ucma.c3
-rw-r--r--drivers/infiniband/core/umem.c8
-rw-r--r--drivers/infiniband/core/umem_odp.c7
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c121
-rw-r--r--drivers/infiniband/core/uverbs_main.c7
-rw-r--r--drivers/infiniband/core/verbs.c77
14 files changed, 323 insertions, 128 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 1374541a4528..0f58f46dbad7 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -699,13 +699,16 @@ EXPORT_SYMBOL(rdma_addr_cancel);
struct resolve_cb_context {
struct rdma_dev_addr *addr;
struct completion comp;
+ int status;
};
static void resolve_cb(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context)
{
- memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
- rdma_dev_addr));
+ if (!status)
+ memcpy(((struct resolve_cb_context *)context)->addr,
+ addr, sizeof(struct rdma_dev_addr));
+ ((struct resolve_cb_context *)context)->status = status;
complete(&((struct resolve_cb_context *)context)->comp);
}
@@ -743,6 +746,10 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
wait_for_completion(&ctx.comp);
+ ret = ctx.status;
+ if (ret)
+ return ret;
+
memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
if (!dev)
@@ -800,7 +807,7 @@ static struct notifier_block nb = {
int addr_init(void)
{
- addr_wq = create_singlethread_workqueue("ib_addr");
+ addr_wq = alloc_workqueue("ib_addr", WQ_MEM_RECLAIM, 0);
if (!addr_wq)
return -ENOMEM;
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c99525512b34..71c7c4c328ef 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -80,6 +80,8 @@ static struct ib_cm {
__be32 random_id_operand;
struct list_head timewait_list;
struct workqueue_struct *wq;
+ /* Sync on cm change port state */
+ spinlock_t state_lock;
} cm;
/* Counter indexes ordered by attribute ID */
@@ -161,6 +163,8 @@ struct cm_port {
struct ib_mad_agent *mad_agent;
struct kobject port_obj;
u8 port_num;
+ struct list_head cm_priv_prim_list;
+ struct list_head cm_priv_altr_list;
struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
};
@@ -241,6 +245,12 @@ struct cm_id_private {
u8 service_timeout;
u8 target_ack_delay;
+ struct list_head prim_list;
+ struct list_head altr_list;
+ /* Indicates that the send port mad is registered and av is set */
+ int prim_send_port_not_ready;
+ int altr_send_port_not_ready;
+
struct list_head work_list;
atomic_t work_count;
};
@@ -259,20 +269,47 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
struct ib_mad_agent *mad_agent;
struct ib_mad_send_buf *m;
struct ib_ah *ah;
+ struct cm_av *av;
+ unsigned long flags, flags2;
+ int ret = 0;
+ /* don't let the port to be released till the agent is down */
+ spin_lock_irqsave(&cm.state_lock, flags2);
+ spin_lock_irqsave(&cm.lock, flags);
+ if (!cm_id_priv->prim_send_port_not_ready)
+ av = &cm_id_priv->av;
+ else if (!cm_id_priv->altr_send_port_not_ready &&
+ (cm_id_priv->alt_av.port))
+ av = &cm_id_priv->alt_av;
+ else {
+ pr_info("%s: not valid CM id\n", __func__);
+ ret = -ENODEV;
+ spin_unlock_irqrestore(&cm.lock, flags);
+ goto out;
+ }
+ spin_unlock_irqrestore(&cm.lock, flags);
+ /* Make sure the port haven't released the mad yet */
mad_agent = cm_id_priv->av.port->mad_agent;
- ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
- if (IS_ERR(ah))
- return PTR_ERR(ah);
+ if (!mad_agent) {
+ pr_info("%s: not a valid MAD agent\n", __func__);
+ ret = -ENODEV;
+ goto out;
+ }
+ ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
+ if (IS_ERR(ah)) {
+ ret = PTR_ERR(ah);
+ goto out;
+ }
m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
- cm_id_priv->av.pkey_index,
+ av->pkey_index,
0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
ib_destroy_ah(ah);
- return PTR_ERR(m);
+ ret = PTR_ERR(m);
+ goto out;
}
/* Timeout set by caller if response is expected. */
@@ -282,7 +319,10 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
atomic_inc(&cm_id_priv->refcount);
m->context[0] = cm_id_priv;
*msg = m;
- return 0;
+
+out:
+ spin_unlock_irqrestore(&cm.state_lock, flags2);
+ return ret;
}
static int cm_alloc_response_msg(struct cm_port *port,
@@ -352,7 +392,8 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr);
}
-static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
+static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
+ struct cm_id_private *cm_id_priv)
{
struct cm_device *cm_dev;
struct cm_port *port = NULL;
@@ -387,7 +428,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
&av->ah_attr);
av->timeout = path->packet_life_time + 1;
- return 0;
+ spin_lock_irqsave(&cm.lock, flags);
+ if (&cm_id_priv->av == av)
+ list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
+ else if (&cm_id_priv->alt_av == av)
+ list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
+ else
+ ret = -EINVAL;
+
+ spin_unlock_irqrestore(&cm.lock, flags);
+
+ return ret;
}
static int cm_alloc_id(struct cm_id_private *cm_id_priv)
@@ -677,6 +728,8 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
spin_lock_init(&cm_id_priv->lock);
init_completion(&cm_id_priv->comp);
INIT_LIST_HEAD(&cm_id_priv->work_list);
+ INIT_LIST_HEAD(&cm_id_priv->prim_list);
+ INIT_LIST_HEAD(&cm_id_priv->altr_list);
atomic_set(&cm_id_priv->work_count, -1);
atomic_set(&cm_id_priv->refcount, 1);
return &cm_id_priv->id;
@@ -892,6 +945,15 @@ retest:
break;
}
+ spin_lock_irq(&cm.lock);
+ if (!list_empty(&cm_id_priv->altr_list) &&
+ (!cm_id_priv->altr_send_port_not_ready))
+ list_del(&cm_id_priv->altr_list);
+ if (!list_empty(&cm_id_priv->prim_list) &&
+ (!cm_id_priv->prim_send_port_not_ready))
+ list_del(&cm_id_priv->prim_list);
+ spin_unlock_irq(&cm.lock);
+
cm_free_id(cm_id->local_id);
cm_deref_id(cm_id_priv);
wait_for_completion(&cm_id_priv->comp);
@@ -1192,12 +1254,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
goto out;
}
- ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
+ ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
+ cm_id_priv);
if (ret)
goto error1;
if (param->alternate_path) {
ret = cm_init_av_by_path(param->alternate_path,
- &cm_id_priv->alt_av);
+ &cm_id_priv->alt_av, cm_id_priv);
if (ret)
goto error1;
}
@@ -1653,7 +1716,8 @@ static int cm_req_handler(struct cm_work *work)
dev_put(gid_attr.ndev);
}
work->path[0].gid_type = gid_attr.gid_type;
- ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+ ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
+ cm_id_priv);
}
if (ret) {
int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
@@ -1672,7 +1736,8 @@ static int cm_req_handler(struct cm_work *work)
goto rejected;
}
if (req_msg->alt_local_lid) {
- ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
+ ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
+ cm_id_priv);
if (ret) {
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
&work->path[0].sgid,
@@ -2727,7 +2792,8 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
goto out;
}
- ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
+ ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
+ cm_id_priv);
if (ret)
goto out;
cm_id_priv->alt_av.timeout =
@@ -2839,7 +2905,8 @@ static int cm_lap_handler(struct cm_work *work)
cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
- cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
+ cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
+ cm_id_priv);
ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -3031,7 +3098,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
+ ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
if (ret)
goto out;
@@ -3468,7 +3535,9 @@ out:
static int cm_migrate(struct ib_cm_id *cm_id)
{
struct cm_id_private *cm_id_priv;
+ struct cm_av tmp_av;
unsigned long flags;
+ int tmp_send_port_not_ready;
int ret = 0;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -3477,7 +3546,14 @@ static int cm_migrate(struct ib_cm_id *cm_id)
(cm_id->lap_state == IB_CM_LAP_UNINIT ||
cm_id->lap_state == IB_CM_LAP_IDLE)) {
cm_id->lap_state = IB_CM_LAP_IDLE;
+ /* Swap address vector */
+ tmp_av = cm_id_priv->av;
cm_id_priv->av = cm_id_priv->alt_av;
+ cm_id_priv->alt_av = tmp_av;
+ /* Swap port send ready state */
+ tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
+ cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
+ cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
} else
ret = -EINVAL;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -3888,6 +3964,9 @@ static void cm_add_one(struct ib_device *ib_device)
port->cm_dev = cm_dev;
port->port_num = i;
+ INIT_LIST_HEAD(&port->cm_priv_prim_list);
+ INIT_LIST_HEAD(&port->cm_priv_altr_list);
+
ret = cm_create_port_fs(port);
if (ret)
goto error1;
@@ -3945,6 +4024,8 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
{
struct cm_device *cm_dev = client_data;
struct cm_port *port;
+ struct cm_id_private *cm_id_priv;
+ struct ib_mad_agent *cur_mad_agent;
struct ib_port_modify port_modify = {
.clr_port_cap_mask = IB_PORT_CM_SUP
};
@@ -3968,15 +4049,27 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
port = cm_dev->port[i-1];
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
+ /* Mark all the cm_id's as not valid */
+ spin_lock_irq(&cm.lock);
+ list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
+ cm_id_priv->altr_send_port_not_ready = 1;
+ list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
+ cm_id_priv->prim_send_port_not_ready = 1;
+ spin_unlock_irq(&cm.lock);
/*
* We flush the queue here after the going_down set, this
* verify that no new works will be queued in the recv handler,
* after that we can call the unregister_mad_agent
*/
flush_workqueue(cm.wq);
- ib_unregister_mad_agent(port->mad_agent);
+ spin_lock_irq(&cm.state_lock);
+ cur_mad_agent = port->mad_agent;
+ port->mad_agent = NULL;
+ spin_unlock_irq(&cm.state_lock);
+ ib_unregister_mad_agent(cur_mad_agent);
cm_remove_port_fs(port);
}
+
device_unregister(cm_dev->device);
kfree(cm_dev);
}
@@ -3989,6 +4082,7 @@ static int __init ib_cm_init(void)
INIT_LIST_HEAD(&cm.device_list);
rwlock_init(&cm.device_lock);
spin_lock_init(&cm.lock);
+ spin_lock_init(&cm.state_lock);
cm.listen_service_table = RB_ROOT;
cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
cm.remote_id_table = RB_ROOT;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 5f65a78b27c9..2a6fc47a1dfb 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1094,47 +1094,47 @@ static void cma_save_ib_info(struct sockaddr *src_addr,
}
}
-static void cma_save_ip4_info(struct sockaddr *src_addr,
- struct sockaddr *dst_addr,
+static void cma_save_ip4_info(struct sockaddr_in *src_addr,
+ struct sockaddr_in *dst_addr,
struct cma_hdr *hdr,
__be16 local_port)
{
- struct sockaddr_in *ip4;
-
if (src_addr) {
- ip4 = (struct sockaddr_in *)src_addr;
- ip4->sin_family = AF_INET;
- ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr;
- ip4->sin_port = local_port;
+ *src_addr = (struct sockaddr_in) {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = hdr->dst_addr.ip4.addr,
+ .sin_port = local_port,
+ };
}
if (dst_addr) {
- ip4 = (struct sockaddr_in *)dst_addr;
- ip4->sin_family = AF_INET;
- ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr;
- ip4->sin_port = hdr->port;
+ *dst_addr = (struct sockaddr_in) {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = hdr->src_addr.ip4.addr,
+ .sin_port = hdr->port,
+ };
}
}
-static void cma_save_ip6_info(struct sockaddr *src_addr,
- struct sockaddr *dst_addr,
+static void cma_save_ip6_info(struct sockaddr_in6 *src_addr,
+ struct sockaddr_in6 *dst_addr,
struct cma_hdr *hdr,
__be16 local_port)
{
- struct sockaddr_in6 *ip6;
-
if (src_addr) {
- ip6 = (struct sockaddr_in6 *)src_addr;
- ip6->sin6_family = AF_INET6;
- ip6->sin6_addr = hdr->dst_addr.ip6;
- ip6->sin6_port = local_port;
+ *src_addr = (struct sockaddr_in6) {
+ .sin6_family = AF_INET6,
+ .sin6_addr = hdr->dst_addr.ip6,
+ .sin6_port = local_port,
+ };
}
if (dst_addr) {
- ip6 = (struct sockaddr_in6 *)dst_addr;
- ip6->sin6_family = AF_INET6;
- ip6->sin6_addr = hdr->src_addr.ip6;
- ip6->sin6_port = hdr->port;
+ *dst_addr = (struct sockaddr_in6) {
+ .sin6_family = AF_INET6,
+ .sin6_addr = hdr->src_addr.ip6,
+ .sin6_port = hdr->port,
+ };
}
}
@@ -1159,10 +1159,12 @@ static int cma_save_ip_info(struct sockaddr *src_addr,
switch (cma_get_ip_ver(hdr)) {
case 4:
- cma_save_ip4_info(src_addr, dst_addr, hdr, port);
+ cma_save_ip4_info((struct sockaddr_in *)src_addr,
+ (struct sockaddr_in *)dst_addr, hdr, port);
break;
case 6:
- cma_save_ip6_info(src_addr, dst_addr, hdr, port);
+ cma_save_ip6_info((struct sockaddr_in6 *)src_addr,
+ (struct sockaddr_in6 *)dst_addr, hdr, port);
break;
default:
return -EAFNOSUPPORT;
@@ -2436,6 +2438,18 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
return 0;
}
+static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
+ unsigned long supported_gids,
+ enum ib_gid_type default_gid)
+{
+ if ((network_type == RDMA_NETWORK_IPV4 ||
+ network_type == RDMA_NETWORK_IPV6) &&
+ test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
+ return IB_GID_TYPE_ROCE_UDP_ENCAP;
+
+ return default_gid;
+}
+
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
{
struct rdma_route *route = &id_priv->id.route;
@@ -2461,6 +2475,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->num_paths = 1;
if (addr->dev_addr.bound_dev_if) {
+ unsigned long supported_gids;
+
ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
if (!ndev) {
ret = -ENODEV;
@@ -2484,7 +2500,12 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->net = &init_net;
route->path_rec->ifindex = ndev->ifindex;
- route->path_rec->gid_type = id_priv->gid_type;
+ supported_gids = roce_gid_type_mask_support(id_priv->id.device,
+ id_priv->id.port_num);
+ route->path_rec->gid_type =
+ cma_route_gid_type(addr->dev_addr.network,
+ supported_gids,
+ id_priv->gid_type);
}
if (!ndev) {
ret = -ENODEV;
@@ -4369,7 +4390,7 @@ static int __init cma_init(void)
{
int ret;
- cma_wq = create_singlethread_workqueue("rdma_cm");
+ cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM);
if (!cma_wq)
return -ENOMEM;
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 357624f8b9d3..5495e22839a7 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -1160,7 +1160,7 @@ static int __init iw_cm_init(void)
if (ret)
pr_err("iw_cm: couldn't register netlink callbacks\n");
- iwcm_wq = create_singlethread_workqueue("iw_cm_wq");
+ iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM);
if (!iwcm_wq)
return -ENOMEM;
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 2d49228f28b2..40cbd6bdb73b 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -3160,7 +3160,7 @@ static int ib_mad_port_open(struct ib_device *device,
goto error3;
}
- port_priv->pd = ib_alloc_pd(device);
+ port_priv->pd = ib_alloc_pd(device, 0);
if (IS_ERR(port_priv->pd)) {
dev_err(&device->dev, "Couldn't create ib_mad PD\n");
ret = PTR_ERR(port_priv->pd);
@@ -3177,7 +3177,7 @@ static int ib_mad_port_open(struct ib_device *device,
goto error7;
snprintf(name, sizeof name, "ib_mad%d", port_num);
- port_priv->wq = create_singlethread_workqueue(name);
+ port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
if (!port_priv->wq) {
ret = -ENOMEM;
goto error8;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 51c79b2fb0b8..e51b739f6ea3 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -873,7 +873,7 @@ int mcast_init(void)
{
int ret;
- mcast_wq = create_singlethread_workqueue("ib_mcast");
+ mcast_wq = alloc_ordered_workqueue("ib_mcast", WQ_MEM_RECLAIM);
if (!mcast_wq)
return -ENOMEM;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index b9bf7aa055e7..81b742ca1639 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -2015,7 +2015,7 @@ int ib_sa_init(void)
goto err2;
}
- ib_nl_wq = create_singlethread_workqueue("ib_nl_sa_wq");
+ ib_nl_wq = alloc_ordered_workqueue("ib_nl_sa_wq", WQ_MEM_RECLAIM);
if (!ib_nl_wq) {
ret = -ENOMEM;
goto err3;
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 15defefecb4f..c1fb545e8d78 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1193,7 +1193,7 @@ static ssize_t set_node_desc(struct device *device,
if (!dev->modify_device)
return -EIO;
- memcpy(desc.node_desc, buf, min_t(int, count, 64));
+ memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX));
ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 2825ece91d3c..9520154f1d7c 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1638,7 +1638,8 @@ static int ucma_open(struct inode *inode, struct file *filp)
if (!file)
return -ENOMEM;
- file->close_wq = create_singlethread_workqueue("ucma_close_id");
+ file->close_wq = alloc_ordered_workqueue("ucma_close_id",
+ WQ_MEM_RECLAIM);
if (!file->close_wq) {
kfree(file);
return -ENOMEM;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index c68746ce6624..84b4eff90395 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -94,6 +94,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
unsigned long dma_attrs = 0;
struct scatterlist *sg, *sg_list_start;
int need_release = 0;
+ unsigned int gup_flags = FOLL_WRITE;
if (dmasync)
dma_attrs |= DMA_ATTR_WRITE_BARRIER;
@@ -174,7 +175,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
cur_base = addr & PAGE_MASK;
- if (npages == 0) {
+ if (npages == 0 || npages > UINT_MAX) {
ret = -EINVAL;
goto out;
}
@@ -183,6 +184,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (ret)
goto out;
+ if (!umem->writable)
+ gup_flags |= FOLL_FORCE;
+
need_release = 1;
sg_list_start = umem->sg_head.sgl;
@@ -190,7 +194,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
ret = get_user_pages(cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof (struct page *)),
- 1, !umem->writable, page_list, vma_list);
+ gup_flags, page_list, vma_list);
if (ret < 0)
goto out;
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 75077a018675..1f0fe3217f23 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -527,6 +527,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
u64 off;
int j, k, ret = 0, start_idx, npages = 0;
u64 base_virt_addr;
+ unsigned int flags = 0;
if (access_mask == 0)
return -EINVAL;
@@ -556,6 +557,9 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
goto out_put_task;
}
+ if (access_mask & ODP_WRITE_ALLOWED_BIT)
+ flags |= FOLL_WRITE;
+
start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT;
k = start_idx;
@@ -574,8 +578,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
*/
npages = get_user_pages_remote(owning_process, owning_mm,
user_virt, gup_num_pages,
- access_mask & ODP_WRITE_ALLOWED_BIT,
- 0, local_page_list, NULL);
+ flags, local_page_list, NULL);
up_read(&owning_mm->mmap_sem);
if (npages < 0)
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index f6647318138d..cb3f515a2285 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -571,7 +571,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
pd->device = ib_dev;
pd->uobject = uobj;
- pd->local_mr = NULL;
+ pd->__internal_mr = NULL;
atomic_set(&pd->usecnt, 0);
uobj->object = pd;
@@ -3078,51 +3078,102 @@ out_put:
return ret ? ret : in_len;
}
+static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
+{
+ /* Returns user space filter size, includes padding */
+ return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2;
+}
+
+static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
+ u16 ib_real_filter_sz)
+{
+ /*
+ * User space filter structures must be 64 bit aligned, otherwise this
+ * may pass, but we won't handle additional new attributes.
+ */
+
+ if (kern_filter_size > ib_real_filter_sz) {
+ if (memchr_inv(kern_spec_filter +
+ ib_real_filter_sz, 0,
+ kern_filter_size - ib_real_filter_sz))
+ return -EINVAL;
+ return ib_real_filter_sz;
+ }
+ return kern_filter_size;
+}
+
static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec)
{
+ ssize_t actual_filter_sz;
+ ssize_t kern_filter_sz;
+ ssize_t ib_filter_sz;
+ void *kern_spec_mask;
+ void *kern_spec_val;
+
if (kern_spec->reserved)
return -EINVAL;
ib_spec->type = kern_spec->type;
+ kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
+ /* User flow spec size must be aligned to 4 bytes */
+ if (kern_filter_sz != ALIGN(kern_filter_sz, 4))
+ return -EINVAL;
+
+ kern_spec_val = (void *)kern_spec +
+ sizeof(struct ib_uverbs_flow_spec_hdr);
+ kern_spec_mask = kern_spec_val + kern_filter_sz;
+
switch (ib_spec->type) {
case IB_FLOW_SPEC_ETH:
- ib_spec->eth.size = sizeof(struct ib_flow_spec_eth);
- if (ib_spec->eth.size != kern_spec->eth.size)
+ ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
return -EINVAL;
- memcpy(&ib_spec->eth.val, &kern_spec->eth.val,
- sizeof(struct ib_flow_eth_filter));
- memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask,
- sizeof(struct ib_flow_eth_filter));
+ ib_spec->size = sizeof(struct ib_flow_spec_eth);
+ memcpy(&ib_spec->eth.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->eth.mask, kern_spec_mask, actual_filter_sz);
break;
case IB_FLOW_SPEC_IPV4:
- ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4);
- if (ib_spec->ipv4.size != kern_spec->ipv4.size)
+ ib_filter_sz = offsetof(struct ib_flow_ipv4_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
return -EINVAL;
- memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val,
- sizeof(struct ib_flow_ipv4_filter));
- memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask,
- sizeof(struct ib_flow_ipv4_filter));
+ ib_spec->size = sizeof(struct ib_flow_spec_ipv4);
+ memcpy(&ib_spec->ipv4.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->ipv4.mask, kern_spec_mask, actual_filter_sz);
break;
case IB_FLOW_SPEC_IPV6:
- ib_spec->ipv6.size = sizeof(struct ib_flow_spec_ipv6);
- if (ib_spec->ipv6.size != kern_spec->ipv6.size)
+ ib_filter_sz = offsetof(struct ib_flow_ipv6_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->size = sizeof(struct ib_flow_spec_ipv6);
+ memcpy(&ib_spec->ipv6.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->ipv6.mask, kern_spec_mask, actual_filter_sz);
+
+ if ((ntohl(ib_spec->ipv6.mask.flow_label)) >= BIT(20) ||
+ (ntohl(ib_spec->ipv6.val.flow_label)) >= BIT(20))
return -EINVAL;
- memcpy(&ib_spec->ipv6.val, &kern_spec->ipv6.val,
- sizeof(struct ib_flow_ipv6_filter));
- memcpy(&ib_spec->ipv6.mask, &kern_spec->ipv6.mask,
- sizeof(struct ib_flow_ipv6_filter));
break;
case IB_FLOW_SPEC_TCP:
case IB_FLOW_SPEC_UDP:
- ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp);
- if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size)
+ ib_filter_sz = offsetof(struct ib_flow_tcp_udp_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
return -EINVAL;
- memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val,
- sizeof(struct ib_flow_tcp_udp_filter));
- memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask,
- sizeof(struct ib_flow_tcp_udp_filter));
+ ib_spec->size = sizeof(struct ib_flow_spec_tcp_udp);
+ memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz);
break;
default:
return -EINVAL;
@@ -3654,7 +3705,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
goto err_uobj;
}
- flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL);
+ flow_attr = kzalloc(sizeof(*flow_attr) + cmd.flow_attr.num_of_specs *
+ sizeof(union ib_flow_spec), GFP_KERNEL);
if (!flow_attr) {
err = -ENOMEM;
goto err_put;
@@ -4173,6 +4225,23 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.device_cap_flags_ex = attr.device_cap_flags;
resp.response_length += sizeof(resp.device_cap_flags_ex);
+
+ if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps))
+ goto end;
+
+ resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts;
+ resp.rss_caps.max_rwq_indirection_tables =
+ attr.rss_caps.max_rwq_indirection_tables;
+ resp.rss_caps.max_rwq_indirection_table_size =
+ attr.rss_caps.max_rwq_indirection_table_size;
+
+ resp.response_length += sizeof(resp.rss_caps);
+
+ if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq))
+ goto end;
+
+ resp.max_wq_type_rq = attr.max_wq_type_rq;
+ resp.response_length += sizeof(resp.max_wq_type_rq);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 0012fa58c105..44b1104eb168 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -262,12 +262,9 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
container_of(uobj, struct ib_uqp_object, uevent.uobject);
idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
- if (qp != qp->real_qp) {
- ib_close_qp(qp);
- } else {
+ if (qp == qp->real_qp)
ib_uverbs_detach_umcast(qp, uqp);
- ib_destroy_qp(qp);
- }
+ ib_destroy_qp(qp);
ib_uverbs_release_uevent(file, &uqp->uevent);
kfree(uqp);
}
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index f2b776efab3a..83687646da68 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -227,9 +227,11 @@ EXPORT_SYMBOL(rdma_port_get_link_layer);
* Every PD has a local_dma_lkey which can be used as the lkey value for local
* memory operations.
*/
-struct ib_pd *ib_alloc_pd(struct ib_device *device)
+struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
+ const char *caller)
{
struct ib_pd *pd;
+ int mr_access_flags = 0;
pd = device->alloc_pd(device, NULL, NULL);
if (IS_ERR(pd))
@@ -237,26 +239,46 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device)
pd->device = device;
pd->uobject = NULL;
- pd->local_mr = NULL;
+ pd->__internal_mr = NULL;
atomic_set(&pd->usecnt, 0);
+ pd->flags = flags;
if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
- else {
+ else
+ mr_access_flags |= IB_ACCESS_LOCAL_WRITE;
+
+ if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
+ pr_warn("%s: enabling unsafe global rkey\n", caller);
+ mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE;
+ }
+
+ if (mr_access_flags) {
struct ib_mr *mr;
- mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
+ mr = pd->device->get_dma_mr(pd, mr_access_flags);
if (IS_ERR(mr)) {
ib_dealloc_pd(pd);
- return (struct ib_pd *)mr;
+ return ERR_CAST(mr);
}
- pd->local_mr = mr;
- pd->local_dma_lkey = pd->local_mr->lkey;
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = NULL;
+ mr->need_inval = false;
+
+ pd->__internal_mr = mr;
+
+ if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY))
+ pd->local_dma_lkey = pd->__internal_mr->lkey;
+
+ if (flags & IB_PD_UNSAFE_GLOBAL_RKEY)
+ pd->unsafe_global_rkey = pd->__internal_mr->rkey;
}
+
return pd;
}
-EXPORT_SYMBOL(ib_alloc_pd);
+EXPORT_SYMBOL(__ib_alloc_pd);
/**
* ib_dealloc_pd - Deallocates a protection domain.
@@ -270,10 +292,10 @@ void ib_dealloc_pd(struct ib_pd *pd)
{
int ret;
- if (pd->local_mr) {
- ret = ib_dereg_mr(pd->local_mr);
+ if (pd->__internal_mr) {
+ ret = pd->device->dereg_mr(pd->__internal_mr);
WARN_ON(ret);
- pd->local_mr = NULL;
+ pd->__internal_mr = NULL;
}
/* uverbs manipulates usecnt with proper locking, while the kabi
@@ -821,7 +843,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
if (ret) {
pr_err("failed to init MR pool ret= %d\n", ret);
ib_destroy_qp(qp);
- qp = ERR_PTR(ret);
+ return ERR_PTR(ret);
}
}
@@ -1391,29 +1413,6 @@ EXPORT_SYMBOL(ib_resize_cq);
/* Memory regions */
-struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
-{
- struct ib_mr *mr;
- int err;
-
- err = ib_check_mr_access(mr_access_flags);
- if (err)
- return ERR_PTR(err);
-
- mr = pd->device->get_dma_mr(pd, mr_access_flags);
-
- if (!IS_ERR(mr)) {
- mr->device = pd->device;
- mr->pd = pd;
- mr->uobject = NULL;
- atomic_inc(&pd->usecnt);
- mr->need_inval = false;
- }
-
- return mr;
-}
-EXPORT_SYMBOL(ib_get_dma_mr);
-
int ib_dereg_mr(struct ib_mr *mr)
{
struct ib_pd *pd = mr->pd;
@@ -1812,13 +1811,13 @@ EXPORT_SYMBOL(ib_set_vf_guid);
*
* Constraints:
* - The first sg element is allowed to have an offset.
- * - Each sg element must be aligned to page_size (or physically
- * contiguous to the previous element). In case an sg element has a
- * non contiguous offset, the mapping prefix will not include it.
+ * - Each sg element must either be aligned to page_size or virtually
+ * contiguous to the previous element. In case an sg element has a
+ * non-contiguous offset, the mapping prefix will not include it.
* - The last sg element is allowed to have length less than page_size.
* - If sg_nents total byte length exceeds the mr max_num_sge * page_size
* then only max_num_sg entries will be mapped.
- * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS_REG, non of these
+ * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS, none of these
* constraints holds and the page_size argument is ignored.
*
* Returns the number of sg elements that were mapped to the memory region.
OpenPOWER on IntegriCloud