diff options
Diffstat (limited to 'net/rds')
-rw-r--r-- | net/rds/connection.c | 11 | ||||
-rw-r--r-- | net/rds/ib_cm.c | 52 | ||||
-rw-r--r-- | net/rds/ib_fmr.c | 38 | ||||
-rw-r--r-- | net/rds/ib_mr.h | 2 | ||||
-rw-r--r-- | net/rds/rds.h | 6 | ||||
-rw-r--r-- | net/rds/recv.c | 4 | ||||
-rw-r--r-- | net/rds/tcp.c | 43 | ||||
-rw-r--r-- | net/rds/tcp.h | 2 | ||||
-rw-r--r-- | net/rds/tcp_listen.c | 11 | ||||
-rw-r--r-- | net/rds/tcp_send.c | 8 | ||||
-rw-r--r-- | net/rds/threads.c | 2 |
11 files changed, 109 insertions, 70 deletions
diff --git a/net/rds/connection.c b/net/rds/connection.c index 0e04dcceb1d4..6a5ebdea7d2e 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -333,11 +333,19 @@ void rds_conn_shutdown(struct rds_conn_path *cp) rds_conn_path_reset(cp); if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING, + RDS_CONN_DOWN) && + !rds_conn_path_transition(cp, RDS_CONN_ERROR, RDS_CONN_DOWN)) { /* This can happen - eg when we're in the middle of tearing * down the connection, and someone unloads the rds module. - * Quite reproduceable with loopback connections. + * Quite reproducible with loopback connections. * Mostly harmless. + * + * Note that this also happens with rds-tcp because + * we could have triggered rds_conn_path_drop in irq + * mode from rds_tcp_state change on the receipt of + * a FIN, thus we need to recheck for RDS_CONN_ERROR + * here. */ rds_conn_path_error(cp, "%s: failed to transition " "to state DOWN, current state " @@ -429,6 +437,7 @@ void rds_conn_destroy(struct rds_connection *conn) */ rds_cong_remove_conn(conn); + put_net(conn->c_net); kmem_cache_free(rds_conn_slab, conn); spin_lock_irqsave(&rds_conn_lock, flags); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index ce3775abc6e7..80fb6f63e768 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -442,7 +442,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) ic->i_send_cq = NULL; ibdev_put_vector(rds_ibdev, ic->i_scq_vector); rdsdebug("ib_create_cq send failed: %d\n", ret); - goto out; + goto rds_ibdev_out; } ic->i_rcq_vector = ibdev_get_unused_vector(rds_ibdev); @@ -456,19 +456,19 @@ static int rds_ib_setup_qp(struct rds_connection *conn) ic->i_recv_cq = NULL; ibdev_put_vector(rds_ibdev, ic->i_rcq_vector); rdsdebug("ib_create_cq recv failed: %d\n", ret); - goto out; + goto send_cq_out; } ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); if (ret) { rdsdebug("ib_req_notify_cq send failed: %d\n", ret); - goto out; + goto recv_cq_out; } ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); if (ret) { rdsdebug("ib_req_notify_cq recv failed: %d\n", ret); - goto out; + goto recv_cq_out; } /* XXX negotiate max send/recv with remote? */ @@ -494,7 +494,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); if (ret) { rdsdebug("rdma_create_qp failed: %d\n", ret); - goto out; + goto recv_cq_out; } ic->i_send_hdrs = ib_dma_alloc_coherent(dev, @@ -504,7 +504,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!ic->i_send_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent send failed\n"); - goto out; + goto qp_out; } ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, @@ -514,7 +514,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!ic->i_recv_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent recv failed\n"); - goto out; + goto send_hdrs_dma_out; } ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), @@ -522,7 +522,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!ic->i_ack) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent ack failed\n"); - goto out; + goto recv_hdrs_dma_out; } ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), @@ -530,7 +530,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!ic->i_sends) { ret = -ENOMEM; rdsdebug("send allocation failed\n"); - goto out; + goto ack_dma_out; } ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), @@ -538,7 +538,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!ic->i_recvs) { ret = -ENOMEM; rdsdebug("recv allocation failed\n"); - goto out; + goto sends_out; } rds_ib_recv_init_ack(ic); @@ -546,8 +546,33 @@ static int rds_ib_setup_qp(struct rds_connection *conn) rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd, ic->i_send_cq, ic->i_recv_cq); -out: + return ret; + +sends_out: + vfree(ic->i_sends); +ack_dma_out: + ib_dma_free_coherent(dev, sizeof(struct rds_header), + ic->i_ack, ic->i_ack_dma); +recv_hdrs_dma_out: + ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr * + sizeof(struct rds_header), + ic->i_recv_hdrs, ic->i_recv_hdrs_dma); +send_hdrs_dma_out: + ib_dma_free_coherent(dev, ic->i_send_ring.w_nr * + sizeof(struct rds_header), + ic->i_send_hdrs, ic->i_send_hdrs_dma); +qp_out: + rdma_destroy_qp(ic->i_cm_id); +recv_cq_out: + if (!ib_destroy_cq(ic->i_recv_cq)) + ic->i_recv_cq = NULL; +send_cq_out: + if (!ib_destroy_cq(ic->i_send_cq)) + ic->i_send_cq = NULL; +rds_ibdev_out: + rds_ib_remove_conn(rds_ibdev, conn); rds_ib_dev_put(rds_ibdev); + return ret; } @@ -677,9 +702,8 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, event->param.conn.initiator_depth); /* rdma_accept() calls rdma_reject() internally if it fails */ - err = rdma_accept(cm_id, &conn_param); - if (err) - rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err); + if (rdma_accept(cm_id, &conn_param)) + rds_ib_conn_error(conn, "rdma_accept failed\n"); out: if (conn) diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c index 4fe8f4fec4ee..86ef907067bb 100644 --- a/net/rds/ib_fmr.c +++ b/net/rds/ib_fmr.c @@ -78,17 +78,15 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages) return ibmr; out_no_cigar: - if (ibmr) { - if (fmr->fmr) - ib_dealloc_fmr(fmr->fmr); - kfree(ibmr); - } + kfree(ibmr); atomic_dec(&pool->item_count); + return ERR_PTR(err); } -int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr, - struct scatterlist *sg, unsigned int nents) +static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, + struct rds_ib_mr *ibmr, struct scatterlist *sg, + unsigned int nents) { struct ib_device *dev = rds_ibdev->dev; struct rds_ib_fmr *fmr = &ibmr->u.fmr; @@ -114,29 +112,39 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr, u64 dma_addr = ib_sg_dma_address(dev, &scat[i]); if (dma_addr & ~PAGE_MASK) { - if (i > 0) + if (i > 0) { + ib_dma_unmap_sg(dev, sg, nents, + DMA_BIDIRECTIONAL); return -EINVAL; - else + } else { ++page_cnt; + } } if ((dma_addr + dma_len) & ~PAGE_MASK) { - if (i < sg_dma_len - 1) + if (i < sg_dma_len - 1) { + ib_dma_unmap_sg(dev, sg, nents, + DMA_BIDIRECTIONAL); return -EINVAL; - else + } else { ++page_cnt; + } } len += dma_len; } page_cnt += len >> PAGE_SHIFT; - if (page_cnt > ibmr->pool->fmr_attr.max_pages) + if (page_cnt > ibmr->pool->fmr_attr.max_pages) { + ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); return -EINVAL; + } dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC, rdsibdev_to_node(rds_ibdev)); - if (!dma_pages) + if (!dma_pages) { + ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); return -ENOMEM; + } page_cnt = 0; for (i = 0; i < sg_dma_len; ++i) { @@ -149,8 +157,10 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr, } ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr); - if (ret) + if (ret) { + ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); goto out; + } /* Success - we successfully remapped the MR, so we can * safely tear down the old mapping. diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h index 5d6e98a79a5e..0ea4ab017a8c 100644 --- a/net/rds/ib_mr.h +++ b/net/rds/ib_mr.h @@ -125,8 +125,6 @@ void rds_ib_mr_exit(void); void __rds_ib_teardown_mr(struct rds_ib_mr *); void rds_ib_teardown_mr(struct rds_ib_mr *); struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *, int); -int rds_ib_map_fmr(struct rds_ib_device *, struct rds_ib_mr *, - struct scatterlist *, unsigned int); struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *); int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *, int, struct rds_ib_mr **); struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *, struct scatterlist *, diff --git a/net/rds/rds.h b/net/rds/rds.h index 39518ef7af4d..82d38ccf5e8b 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -147,7 +147,7 @@ struct rds_connection { /* Protocol version */ unsigned int c_version; - possible_net_t c_net; + struct net *c_net; struct list_head c_map_item; unsigned long c_map_queued; @@ -162,13 +162,13 @@ struct rds_connection { static inline struct net *rds_conn_net(struct rds_connection *conn) { - return read_pnet(&conn->c_net); + return conn->c_net; } static inline void rds_conn_net_set(struct rds_connection *conn, struct net *net) { - write_pnet(&conn->c_net, net); + conn->c_net = get_net(net); } #define RDS_FLAG_CONG_BITMAP 0x01 diff --git a/net/rds/recv.c b/net/rds/recv.c index 8b7e7b7f2c2d..c70c32cb05f5 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -594,7 +594,6 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, goto out; while (1) { - struct iov_iter save; /* If there are pending notifications, do those - and nothing else */ if (!list_empty(&rs->rs_notify_queue)) { ret = rds_notify_queue_get(rs, msg); @@ -630,7 +629,6 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, rdsdebug("copying inc %p from %pI4:%u to user\n", inc, &inc->i_conn->c_faddr, ntohs(inc->i_hdr.h_sport)); - save = msg->msg_iter; ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &msg->msg_iter); if (ret < 0) break; @@ -644,7 +642,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, rds_inc_put(inc); inc = NULL; rds_stats_inc(s_recv_deliver_raced); - msg->msg_iter = save; + iov_iter_revert(&msg->msg_iter, ret); continue; } diff --git a/net/rds/tcp.c b/net/rds/tcp.c index a973d3b4dff0..431404dbdad1 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -84,13 +84,10 @@ static struct ctl_table rds_tcp_sysctl_table[] = { /* doing it this way avoids calling tcp_sk() */ void rds_tcp_nonagle(struct socket *sock) { - mm_segment_t oldfs = get_fs(); int val = 1; - set_fs(KERNEL_DS); - sock->ops->setsockopt(sock, SOL_TCP, TCP_NODELAY, (char __user *)&val, + kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (void *)&val, sizeof(val)); - set_fs(oldfs); } u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc) @@ -484,9 +481,10 @@ static void __net_exit rds_tcp_exit_net(struct net *net) * we do need to clean up the listen socket here. */ if (rtn->rds_tcp_listen_sock) { - rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); + struct socket *lsock = rtn->rds_tcp_listen_sock; + rtn->rds_tcp_listen_sock = NULL; - flush_work(&rtn->rds_tcp_accept_w); + rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); } } @@ -523,13 +521,13 @@ static void rds_tcp_kill_sock(struct net *net) struct rds_tcp_connection *tc, *_tc; LIST_HEAD(tmp_list); struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct socket *lsock = rtn->rds_tcp_listen_sock; - rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); rtn->rds_tcp_listen_sock = NULL; - flush_work(&rtn->rds_tcp_accept_w); + rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); + struct net *c_net = tc->t_cpath->cp_conn->c_net; if (net != c_net || !tc->t_sock) continue; @@ -546,8 +544,12 @@ static void rds_tcp_kill_sock(struct net *net) void *rds_tcp_listen_sock_def_readable(struct net *net) { struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct socket *lsock = rtn->rds_tcp_listen_sock; + + if (!lsock) + return NULL; - return rtn->rds_tcp_listen_sock->sk->sk_user_data; + return lsock->sk->sk_user_data; } static int rds_tcp_dev_event(struct notifier_block *this, @@ -584,7 +586,7 @@ static void rds_tcp_sysctl_reset(struct net *net) spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); + struct net *c_net = tc->t_cpath->cp_conn->c_net; if (net != c_net || !tc->t_sock) continue; @@ -638,19 +640,19 @@ static int rds_tcp_init(void) goto out; } - ret = register_netdevice_notifier(&rds_tcp_dev_notifier); - if (ret) { - pr_warn("could not register rds_tcp_dev_notifier\n"); + ret = rds_tcp_recv_init(); + if (ret) goto out_slab; - } ret = register_pernet_subsys(&rds_tcp_net_ops); if (ret) - goto out_notifier; + goto out_recv; - ret = rds_tcp_recv_init(); - if (ret) + ret = register_netdevice_notifier(&rds_tcp_dev_notifier); + if (ret) { + pr_warn("could not register rds_tcp_dev_notifier\n"); goto out_pernet; + } rds_trans_register(&rds_tcp_transport); @@ -660,9 +662,8 @@ static int rds_tcp_init(void) out_pernet: unregister_pernet_subsys(&rds_tcp_net_ops); -out_notifier: - if (unregister_netdevice_notifier(&rds_tcp_dev_notifier)) - pr_warn("could not unregister rds_tcp_dev_notifier\n"); +out_recv: + rds_tcp_recv_exit(); out_slab: kmem_cache_destroy(rds_tcp_conn_slab); out: diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 9a1cc8906576..56ea6620fcf9 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk); /* tcp_listen.c */ struct socket *rds_tcp_listen_init(struct net *); -void rds_tcp_listen_stop(struct socket *); +void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor); void rds_tcp_listen_data_ready(struct sock *sk); int rds_tcp_accept_one(struct socket *sock); int rds_tcp_keepalive(struct socket *sock); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 67d0929c7d3d..507678853e6c 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -133,7 +133,7 @@ int rds_tcp_accept_one(struct socket *sock) new_sock->type = sock->type; new_sock->ops = sock->ops; - ret = sock->ops->accept(sock, new_sock, O_NONBLOCK); + ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true); if (ret < 0) goto out; @@ -223,6 +223,9 @@ void rds_tcp_listen_data_ready(struct sock *sk) * before it has been accepted and the accepter has set up their * data_ready.. we only want to queue listen work for our listening * socket + * + * (*ready)() may be null if we are racing with netns delete, and + * the listen socket is being torn down. */ if (sk->sk_state == TCP_LISTEN) rds_tcp_accept_work(sk); @@ -231,7 +234,8 @@ void rds_tcp_listen_data_ready(struct sock *sk) out: read_unlock_bh(&sk->sk_callback_lock); - ready(sk); + if (ready) + ready(sk); } struct socket *rds_tcp_listen_init(struct net *net) @@ -271,7 +275,7 @@ out: return NULL; } -void rds_tcp_listen_stop(struct socket *sock) +void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor) { struct sock *sk; @@ -292,5 +296,6 @@ void rds_tcp_listen_stop(struct socket *sock) /* wait for accepts to stop and close the socket */ flush_workqueue(rds_wq); + flush_work(acceptor); sock_release(sock); } diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index dcf4742083ea..52d11d7725c8 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -40,13 +40,7 @@ static void rds_tcp_cork(struct socket *sock, int val) { - mm_segment_t oldfs; - - oldfs = get_fs(); - set_fs(KERNEL_DS); - sock->ops->setsockopt(sock, SOL_TCP, TCP_CORK, (char __user *)&val, - sizeof(val)); - set_fs(oldfs); + kernel_setsockopt(sock, SOL_TCP, TCP_CORK, (void *)&val, sizeof(val)); } void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp) diff --git a/net/rds/threads.c b/net/rds/threads.c index e36e333a0aa0..3e447d056d09 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -156,7 +156,7 @@ void rds_connect_worker(struct work_struct *work) struct rds_connection *conn = cp->cp_conn; int ret; - if (cp->cp_index > 1 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr) + if (cp->cp_index > 0 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr) return; clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING); |