summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2013-10-31 16:14:36 +1100
committerTrond Myklebust <Trond.Myklebust@netapp.com>2013-10-31 09:14:50 -0400
commit93dc41bdc5c853916610576c6b48a1704959c70d (patch)
tree893c971fc8538c8872938c56a442be0abe7db437 /net
parent09c3e54635c85b3da44d3bc156619c1f1af3bb43 (diff)
downloadtalos-obmc-linux-93dc41bdc5c853916610576c6b48a1704959c70d.tar.gz
talos-obmc-linux-93dc41bdc5c853916610576c6b48a1704959c70d.zip
SUNRPC: close a rare race in xs_tcp_setup_socket.
We have one report of a crash in xs_tcp_setup_socket. The call path to the crash is: xs_tcp_setup_socket -> inet_stream_connect -> lock_sock_nested. The 'sock' passed to that last function is NULL. The only way I can see this happening is a concurrent call to xs_close: xs_close -> xs_reset_transport -> sock_release -> inet_release inet_release sets: sock->sk = NULL; inet_stream_connect calls lock_sock(sock->sk); which gets NULL. All calls to xs_close are protected by XPRT_LOCKED as are most activations of the workqueue which runs xs_tcp_setup_socket. The exception is xs_tcp_schedule_linger_timeout. So presumably the timeout queued by the later fires exactly when some other code runs xs_close(). To protect against this we can move the cancel_delayed_work_sync() call from xs_destory() to xs_close(). As xs_close is never called from the worker scheduled on ->connect_worker, this can never deadlock. Signed-off-by: NeilBrown <neilb@suse.de> [Trond: Make it safe to call cancel_delayed_work_sync() on AF_LOCAL sockets] Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/xprtsock.c13
1 files changed, 9 insertions, 4 deletions
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9deed17fd3e4..a4709bbf8e5e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -835,6 +835,8 @@ static void xs_close(struct rpc_xprt *xprt)
dprintk("RPC: xs_close xprt %p\n", xprt);
+ cancel_delayed_work_sync(&transport->connect_worker);
+
xs_reset_transport(transport);
xprt->reestablish_timeout = 0;
@@ -869,12 +871,8 @@ static void xs_local_destroy(struct rpc_xprt *xprt)
*/
static void xs_destroy(struct rpc_xprt *xprt)
{
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
-
dprintk("RPC: xs_destroy xprt %p\n", xprt);
- cancel_delayed_work_sync(&transport->connect_worker);
-
xs_local_destroy(xprt);
}
@@ -1817,6 +1815,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock)
}
#endif
+static void xs_dummy_setup_socket(struct work_struct *work)
+{
+}
+
static struct socket *xs_create_sock(struct rpc_xprt *xprt,
struct sock_xprt *transport, int family, int type, int protocol)
{
@@ -2668,6 +2670,9 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
xprt->ops = &xs_local_ops;
xprt->timeout = &xs_local_default_timeout;
+ INIT_DELAYED_WORK(&transport->connect_worker,
+ xs_dummy_setup_socket);
+
switch (sun->sun_family) {
case AF_LOCAL:
if (sun->sun_path[0] != '/') {
OpenPOWER on IntegriCloud